new script : v1

main
jgenero 3 years ago
parent 30c40ef970
commit 3c930aa218

@ -11,47 +11,37 @@ def make_soup(file: str):
def split_div(xml_file: str, prince: str)-> dict:
"""1) read xml file with function make_soup
2) make a list with all //div if //div/@n == True
3) make actes' ids with param <prince> + date
manage double dates : each //docDate/date must have
a @n with 'a', 'b', etc... the commented scrip may
help you to check if there is double date.
4) merge actes' ids list with div list"""
# 1
soup = make_soup(xml_file)
# 2
actes = [div for div in soup.find_all('div', {'n': True})]
# 3
actes_id =[]
for acte in actes:
date = acte.div.docDate.date["when"]
date_nb = acte.div.docDate.date["n"]
actes_id.append(prince + date.replace("-", "_") + date_nb)
"""
# script to get all double dates
prev_acte = None
for id_acte in actes_id:
if prev_acte == id_acte:
print(prev_acte, id_acte)
prev_acte = id_acte
"""read xml file with function make_soup
& make a list for each acte with : id, date
(YYYY-MM-DD), date (string), sourceDesc,
analysis, the whole div.
"""
# 4
if len(actes_id) == len(actes):
dict_actes = {actes_id[item]: actes[item] for item in range(len(actes_id))}
else:
print(f"""** ERROR **\nactes_id == {len(actes_id)}\nactes == {len(actes)}""")
return dict_actes
def create_file(actes, house, folder, tei_canvas):
for acte in actes:
filename = os.path.join("..", "static", "xml", house, folder, acte + '.xml')
with open(filename, 'w', encoding='utf-8') as writting:
writting.write(str(actes[acte]))
with open(filename, 'r', encoding='utf-8') as xml:
soup = BeautifulSoup(xml, "xml")
print(soup)
soup = make_soup(xml_file)
actes = []
for div in soup.find_all('div', {'n': True}):
when = div.docDate.date["when"]
nb = div.docDate.date["n"]
acte_id = prince + when.replace("-", "_") + nb
date = div.docDate.date.text
trad_table = div.listWit
analysis = div.argument.p.string
acte = [acte_id, when, date, trad_table, analysis, div]
actes.append(acte)
return actes
def tei_maker(lst_acte, house, folder, tei_canvas):
"""write the acte in an xml file"""
for acte in lst_acte:
filename = os.path.join("..", "static", "xml", house, folder, acte[0] + '.xml')
soup = BeautifulSoup(tei_canvas, 'xml')
title = soup.find("title", {"level":"a"})
title.string = "Acte " + acte[0]
soup.sourceDesc.append(acte[-3])
soup.body.append(acte[-1])
tradition = soup.find("div", {"type": "tradition"})
tradition.decompose()
with open(filename, 'w', encoding="utf-8") as writting:
writting.write(str(soup))
canvas = f"""<?xml version="1.0" encoding="utf-8"?>
@ -94,7 +84,6 @@ canvas = f"""<?xml version="1.0" encoding="utf-8"?>
</teiHeader>
<text>
<body>
++ACTE++
</body>
</text>
</TEI>"""
@ -102,4 +91,4 @@ canvas = f"""<?xml version="1.0" encoding="utf-8"?>
corpus = "../../bourbon-latex/charles-actes-latex.xml"
create_file(split_div(corpus, "brb_ch_i_"), "Bourbon", "Brb_5_Charles_Ier", canvas)
tei_maker(split_div(corpus, "brb_ch_i_"), "Bourbon", "Brb_5_Charles_Ier", canvas)

Loading…
Cancel
Save