new script : v1

4 years ago · 3c930aa218
parent 30c40ef970
commit 3c930aa218
1 changed files with 30 additions and 41 deletions
--- a/app/cmd/split_div.py
+++ b/app/cmd/split_div.py
@ -11,47 +11,37 @@ def make_soup(file: str):
 def split_div(xml_file: str, prince: str)-> dict:
-	"""1) read xml file with function make_soup
+	"""read xml file with function make_soup
-	2) make a list with all //div if //div/@n == True
+	& make a list for each acte with : id, date
-	3) make actes' ids with param <prince> + date
+	(YYYY-MM-DD), date (string), sourceDesc,
-	manage double dates : each //docDate/date must have
+	analysis, the whole div.
 	a @n with 'a', 'b', etc... the commented scrip may
 	help you to check if there is double date.
 	4) merge actes' ids list with div list"""
 	# 1
 	soup = make_soup(xml_file)
 	# 2
 	actes = [div for div in soup.find_all('div', {'n': True})]
 	# 3
 	actes_id =[]
 	for acte in actes:
 		date = acte.div.docDate.date["when"]
 		date_nb = acte.div.docDate.date["n"]
 		actes_id.append(prince + date.replace("-", "_") + date_nb)
 	"""
 	# script to get all double dates
 	prev_acte = None
 	for id_acte in actes_id:
 		if prev_acte == id_acte:
 			print(prev_acte, id_acte)
 		prev_acte = id_acte
 	"""
-	# 4
+	soup = make_soup(xml_file)
-	if len(actes_id) == len(actes):
+	actes = []
-		dict_actes = {actes_id[item]: actes[item] for item in range(len(actes_id))}
+	for div in soup.find_all('div', {'n': True}):
-	else:
+		when = div.docDate.date["when"]
-		print(f"""** ERROR **\nactes_id == {len(actes_id)}\nactes == {len(actes)}""")
+		nb = div.docDate.date["n"]
-	return dict_actes
+		acte_id = prince + when.replace("-", "_") + nb
-
+		date = div.docDate.date.text
-def create_file(actes, house, folder, tei_canvas):
+		trad_table = div.listWit
-	for acte in actes:
+		analysis = div.argument.p.string
-		filename = os.path.join("..", "static", "xml", house, folder, acte + '.xml')
+		acte = [acte_id, when, date, trad_table, analysis, div]
-		with open(filename, 'w', encoding='utf-8') as writting:
+		actes.append(acte)
-			writting.write(str(actes[acte]))
+	return actes
 		with open(filename, 'r', encoding='utf-8') as xml:
 			soup = BeautifulSoup(xml, "xml")
 		print(soup)
 def tei_maker(lst_acte, house, folder, tei_canvas):
 	"""write the acte in an xml file"""
 	for acte in lst_acte:
 		filename = os.path.join("..", "static", "xml", house, folder, acte[0] + '.xml')
 		soup = BeautifulSoup(tei_canvas, 'xml')
 		title = soup.find("title", {"level":"a"})
 		title.string = "Acte " + acte[0]
 		soup.sourceDesc.append(acte[-3])
 		soup.body.append(acte[-1])
 		tradition = soup.find("div", {"type": "tradition"})
 		tradition.decompose()
 		with open(filename, 'w', encoding="utf-8") as writting:
 			writting.write(str(soup))
 canvas = f"""<?xml version="1.0" encoding="utf-8"?>
@ -94,7 +84,6 @@ canvas = f"""<?xml version="1.0" encoding="utf-8"?>
 </teiHeader>
 <text>
 <body>
 ++ACTE++
 </body>
 </text>
 </TEI>"""
@ -102,4 +91,4 @@ canvas = f"""<?xml version="1.0" encoding="utf-8"?>
 corpus = "../../bourbon-latex/charles-actes-latex.xml"
-create_file(split_div(corpus, "brb_ch_i_"), "Bourbon", "Brb_5_Charles_Ier", canvas)
+tei_maker(split_div(corpus, "brb_ch_i_"), "Bourbon", "Brb_5_Charles_Ier", canvas)