From e2392d917a60d5ef253dc35669a81b80154401ce Mon Sep 17 00:00:00 2001
From: Pedro Folloni Pesserl <fpesserl7@gmail.com>
Date: Fri, 24 Feb 2023 10:32:21 -0300
Subject: [PATCH] tshell: p3: add sed command

---
 .gitignore          |  3 +--
 tshell/tshell_p3.sh | 25 +++++++++++++++++++++----
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/.gitignore b/.gitignore
index 2599fc8..383975b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,2 @@
 ./nomes-de-bebes/
-./PubMed/
-./Sinonimos/
+./tshell/temp/
diff --git a/tshell/tshell_p3.sh b/tshell/tshell_p3.sh
index f1dbb90..8356078 100755
--- a/tshell/tshell_p3.sh
+++ b/tshell/tshell_p3.sh
@@ -35,7 +35,24 @@ CSV=$2
 confere_arq_saida $CSV
 CSV=$(realpath $CSV)
 
-# separar os campos necessários do arquivo xml.gz
-# zcat $XML | xgrep -tx "//PMID|//ArticleTitle|//Abstract|//MeshHeadingList"
-
-
+# separar os campos úteis do arquivo xml.gz
+zcat $XML | \
+	xgrep -tx "//PMID|//ArticleTitle|//Abstract|//MeshHeadingList" | \
+	# o sed acha um campo PMID, seguido de um campo ArticleTitle, seguido
+	# de um campo Abstract, e imprime. Se houver um campo MeshHeadingList
+	# após o Abstract, imprime também.
+	sed -En '
+	/<PMID/{
+		N;
+		/<ArticleTitle>/{
+			N;
+			/<Abstract>/{
+				p;
+				n;
+				/<MeshHeadingList>/{
+					p;
+				}
+			}
+		}
+	}' >> $CSV
+# grep 'PMID' ARQUIVO_COM_CAMPOS_SEPARADOS | awk '{ print substr($0, 16, length($0)-22) }' | sed 's/">//'
-- 
GitLab