Skip to content
Snippets Groups Projects
Commit ea2a57ca authored by Pedro Folloni Pesserl's avatar Pedro Folloni Pesserl
Browse files

almost works

parent 2223a855
No related branches found
No related tags found
No related merge requests found
......@@ -41,62 +41,45 @@ zcat $XML | \
# o sed acha os campos úteis -- PMID seguido de ArticleTitle seguido
# de Abstract, seguido ou não de MeshHeadingList -- e converte no
# formato csv. No caso de não haver MeshHeadingList para um dado artigo,
# cria um campo vazio (<).
# insere uma linha vazia.
sed -En '
/PMID/{
:x; /<PMID/{
s/<PMID Version="//;
s/">//;
s/<\/PMID>//;
N;
s/<\/PMID>/</;
x;
n;
/ArticleTitle/{
s/<ArticleTitle>/</;
s/<\/ArticleTitle>//;
N;
/<ArticleTitle>/{
s/<ArticleTitle>//;
s/ *<\/ArticleTitle>/</;
H;
n;
/Abstract/{
s/<Abstract> *<AbstractText>/</;
s/<\/AbstractText> *<\/Abstract>//;
/<Abstract>/{
s/(<Abstract>|<AbstractText[^>]*>) *//g;
s/ *<\/AbstractText> *(|<CopyrightInformation>)/, /g;
s/(,|<\/CopyrightInformation>) *<\/Abstract>/</g;
s/(<sup>|<sub>)//g;
s/(<\/sup>|<\/sub>)/, /g;
H;
x;
p;
n;
/MeshHeadingList/{
s/ *<MeshHeading> *<Descriptor[^>]*>//g;
s/(<\/DescriptorName>|<\/QualifierName>) *(<Qualifier[^>]*>|<\/MeshHeading>)/, /g;
s/<MeshHeadingList>/</;
s/(, *|)<\/MeshHeadingList>//;
/<MeshHeadingList>/{
s/(<\/DescriptorName>|<\/QualifierName>) *(<Qualifier[^>]*>|<\/MeshHeading> *)/, /g;
s/(<MeshHeadingList> *|<MeshHeading> *<Descriptor[^>]*>|(, *|)<\/MeshHeadingList>)//g;
p;
d;
};
/PMID/{
i <
}
}
}
}' >> $CSV
# converter para o formato csv
# sed -i '
# /PMID/{
# s/<PMID Version="//;
# s/">//;
# s/<\/PMID>//;
# }
# /ArticleTitle/{
# s/<ArticleTitle>/</;
# s/<\/ArticleTitle>//;
# }
# /Abstract/{
# s/<Abstract> *<AbstractText>/</;
# s/<\/AbstractText> *<\/Abstract>//;
# }
# /MeshHeadingList/{
# s/ *<MeshHeading> *<Descriptor[^>]*>//g;
# s/\(<\/DescriptorName>\|<\/QualifierName>\) *\(<Qualifier[^>]*>\|<\/MeshHeading>\)/, /g;
# s/<MeshHeadingList>/</;
# s/\(, *\|\)<\/MeshHeadingList>//;
# }' $CSV
i \
bx;
};
bx;
};
bx;
}' | \
awk 'BEGIN {RS=""} {gsub(/<\n/, "<", $0); print $0}' >> $CSV
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment