Skip to content
Snippets Groups Projects
Commit ea2a57ca authored by Pedro Folloni Pesserl's avatar Pedro Folloni Pesserl
Browse files

almost works

parent 2223a855
No related branches found
No related tags found
No related merge requests found
...@@ -41,62 +41,45 @@ zcat $XML | \ ...@@ -41,62 +41,45 @@ zcat $XML | \
# o sed acha os campos úteis -- PMID seguido de ArticleTitle seguido # o sed acha os campos úteis -- PMID seguido de ArticleTitle seguido
# de Abstract, seguido ou não de MeshHeadingList -- e converte no # de Abstract, seguido ou não de MeshHeadingList -- e converte no
# formato csv. No caso de não haver MeshHeadingList para um dado artigo, # formato csv. No caso de não haver MeshHeadingList para um dado artigo,
# cria um campo vazio (<). # insere uma linha vazia.
sed -En ' sed -En '
/PMID/{ :x; /<PMID/{
s/<PMID Version="//; s/<PMID Version="//;
s/">//; s/">//;
s/<\/PMID>//; s/<\/PMID>/</;
N; x;
n;
/ArticleTitle/{ /<ArticleTitle>/{
s/<ArticleTitle>/</; s/<ArticleTitle>//;
s/<\/ArticleTitle>//; s/ *<\/ArticleTitle>/</;
N; H;
n;
/Abstract/{ /<Abstract>/{
s/<Abstract> *<AbstractText>/</; s/(<Abstract>|<AbstractText[^>]*>) *//g;
s/<\/AbstractText> *<\/Abstract>//; s/ *<\/AbstractText> *(|<CopyrightInformation>)/, /g;
s/(,|<\/CopyrightInformation>) *<\/Abstract>/</g;
s/(<sup>|<sub>)//g;
s/(<\/sup>|<\/sub>)/, /g;
H;
x;
p; p;
n; n;
/MeshHeadingList/{ /<MeshHeadingList>/{
s/ *<MeshHeading> *<Descriptor[^>]*>//g; s/(<\/DescriptorName>|<\/QualifierName>) *(<Qualifier[^>]*>|<\/MeshHeading> *)/, /g;
s/(<\/DescriptorName>|<\/QualifierName>) *(<Qualifier[^>]*>|<\/MeshHeading>)/, /g; s/(<MeshHeadingList> *|<MeshHeading> *<Descriptor[^>]*>|(, *|)<\/MeshHeadingList>)//g;
s/<MeshHeadingList>/</;
s/(, *|)<\/MeshHeadingList>//;
p; p;
d;
}; };
/PMID/{ i \
i <
}
}
}
}' >> $CSV
# converter para o formato csv
# sed -i '
# /PMID/{
# s/<PMID Version="//;
# s/">//;
# s/<\/PMID>//;
# }
# /ArticleTitle/{
# s/<ArticleTitle>/</;
# s/<\/ArticleTitle>//;
# }
# /Abstract/{
# s/<Abstract> *<AbstractText>/</;
# s/<\/AbstractText> *<\/Abstract>//;
# }
# /MeshHeadingList/{
# s/ *<MeshHeading> *<Descriptor[^>]*>//g;
# s/\(<\/DescriptorName>\|<\/QualifierName>\) *\(<Qualifier[^>]*>\|<\/MeshHeading>\)/, /g;
# s/<MeshHeadingList>/</;
# s/\(, *\|\)<\/MeshHeadingList>//;
# }' $CSV
bx;
};
bx;
};
bx;
}' | \
awk 'BEGIN {RS=""} {gsub(/<\n/, "<", $0); print $0}' >> $CSV
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment