import os import re from bs4 import BeautifulSoup def make_soup(file: str): """open a xml file and return a BeautifulSoup object""" with open(file, 'r', encoding="utf-8") as opening: xml = BeautifulSoup(opening, 'xml') return xml def split_div(xml_file: str, prince: str)-> dict: """read xml file with function make_soup & make a list for each acte with : id, date (YYYY-MM-DD), date (string), sourceDesc, analysis, the whole div. """ soup = make_soup(xml_file) actes = [] for div in soup.find_all('div', {'n': True}): when = div.docDate.date["when"] nb = div.docDate.date["n"] acte_id = prince + when.replace("-", "_") + nb date = div.docDate.date.text trad_table = div.listWit analysis = div.argument.p.string orgName = div.orgName idnos = div.find_all("idno") idno_lst = [] for idno in idnos: new_idno = soup.new_tag("idno", n=idno["n"]) new_idno.string = idno.string idno_lst.append(new_idno) acte = [acte_id, when, date, trad_table, analysis, orgName, idno_lst, div] actes.append(acte) # print(actes) return actes def tei_maker(lst_acte, house, folder, tei_canvas): """write the acte in an xml file""" org = [acte[-3] for acte in lst_acte] for acte in lst_acte: filename = os.path.join("..", "static", "xml", house, folder, acte[0] + '.xml') soup = BeautifulSoup(tei_canvas, 'xml') soup.repository.string = org[lst_acte.index(acte)].text soup.msItem.docDate.string = acte[2] soup.msItem.docDate["when"] = acte[1] soup.TEI["xml:id"] = acte[0] title = soup.find("title", {"level":"a"}) title.string = "Acte " + acte[0] soup.sourceDesc.append(acte[-5]) soup.body.append(acte[-1]) tradition = soup.find("div", {"type": "tradition"}) tradition.decompose() soup.abstract.p.string = acte[-4] soup.select_one("argument").decompose() if len(acte[-2]) == 2: for item in acte[-2]: soup.msIdentifier.append(item) elif len(acte[-2]) == 1: for item in acte[-2]: soup.msIdentifier.append(item) result = str(soup).replace("\n\n", "") with open(filename, 'w', encoding="utf-8") as writting: writting.write(result) canvas = f""" Actes princiers Actes de Charles Ier de Bourbon transcribed by Jean-Damien Généro Acte édité dans le cadre du programme Actes princiers. direction scientifique Olivier Mattéoni direction technique Jean-Damien Généro direction technique Nicolas Perreaux Laboratoire de Médiévistique occidentale de Paris (UMR 8589), Centre de recherches historiques (UMR 8558) Olivier Mattéoni 2022 Distributed under an Open License 2.0

""" corpus = "../../bourbon-latex/charles-actes-latex.xml" tei_maker(split_div(corpus, "brb_ch_i_"), "Bourbon", "Brb_5_Charles_Ier", canvas)