import os import re from bs4 import BeautifulSoup def make_soup(file: str): """open a xml file and return a BeautifulSoup object""" with open(file, 'r', encoding="utf-8") as opening: xml = BeautifulSoup(opening, 'xml') return xml def split_div(xml_file: str, prince: str)-> dict: """read xml file with function make_soup & make a list for each acte with : id, date (YYYY-MM-DD), date (string), sourceDesc, analysis, the whole div. """ soup = make_soup(xml_file) actes = [] for div in soup.find_all('div', {'n': True}): when = div.docDate.date["when"] nb = div.docDate.date["n"] acte_id = prince + when.replace("-", "_") + nb date = div.docDate.date.text trad_table = div.listWit analysis = div.argument.p.string print(when) print(date) acte = [acte_id, when, date, trad_table, analysis, div] actes.append(acte) return actes def tei_maker(lst_acte, house, folder, tei_canvas): """write the acte in an xml file""" for acte in lst_acte: filename = os.path.join("..", "static", "xml", house, folder, acte[0] + '.xml') soup = BeautifulSoup(tei_canvas, 'xml') soup.TEI["xml:id"] = acte[0] title = soup.find("title", {"level":"a"}) title.string = "Acte " + acte[0] soup.sourceDesc.append(acte[-3]) soup.body.append(acte[-1]) tradition = soup.find("div", {"type": "tradition"}) tradition.decompose() soup.abstract.p.string = acte[-2] soup.select_one("argument").decompose() result = str(soup).replace("\n\n", "") with open(filename, 'w', encoding="utf-8") as writting: writting.write(result) canvas = f""" Actes princiers Actes de Charles Ier de Bourbon transcribed by Jean-Damien Généro Acte édité dans le cadre du programme Actes princiers. direction scientifique Olivier Mattéoni direction technique Jean-Damien Généro direction technique Nicolas Perreaux Laboratoire de Médiévistique occidentale de Paris (UMR 8589), Centre de recherches historiques (UMR 8558) Olivier Mattéoni 2022 Distributed under an Open License 2.0

""" corpus = "../../bourbon-latex/charles-actes-latex.xml" tei_maker(split_div(corpus, "brb_ch_i_"), "Bourbon", "Brb_5_Charles_Ier", canvas)