|
|
|
|
import os
|
|
|
|
|
import re
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def make_soup(file: str):
|
|
|
|
|
"""open a xml file and return a BeautifulSoup object"""
|
|
|
|
|
with open(file, 'r', encoding="utf-8") as opening:
|
|
|
|
|
xml = BeautifulSoup(opening, 'xml')
|
|
|
|
|
return xml
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def split_div(xml_file: str, prince: str)-> dict:
|
|
|
|
|
"""read xml file with function make_soup
|
|
|
|
|
& make a list for each acte with : id, date
|
|
|
|
|
(YYYY-MM-DD), date (string), sourceDesc,
|
|
|
|
|
analysis, the whole div.
|
|
|
|
|
"""
|
|
|
|
|
soup = make_soup(xml_file)
|
|
|
|
|
actes = []
|
|
|
|
|
for div in soup.find_all('div', {'n': True}):
|
|
|
|
|
when = div.docDate.date["when"]
|
|
|
|
|
nb = div.docDate.date["n"]
|
|
|
|
|
acte_id = prince + when.replace("-", "_") + nb
|
|
|
|
|
date = div.docDate.date.text
|
|
|
|
|
trad_table = div.listWit
|
|
|
|
|
analysis = div.argument.p.string
|
|
|
|
|
orgName = div.orgName
|
|
|
|
|
idnos = div.find_all("idno")
|
|
|
|
|
idno_lst = []
|
|
|
|
|
for idno in idnos:
|
|
|
|
|
new_idno = soup.new_tag("idno", n=idno["n"])
|
|
|
|
|
new_idno.string = idno.string
|
|
|
|
|
idno_lst.append(new_idno)
|
|
|
|
|
acte = [acte_id, when, date, trad_table, analysis, orgName, idno_lst, div]
|
|
|
|
|
actes.append(acte)
|
|
|
|
|
# print(actes)
|
|
|
|
|
return actes
|
|
|
|
|
|
|
|
|
|
def tei_maker(lst_acte, house, folder, tei_canvas):
|
|
|
|
|
"""write the acte in an xml file"""
|
|
|
|
|
org = [acte[-3] for acte in lst_acte]
|
|
|
|
|
for acte in lst_acte:
|
|
|
|
|
filename = os.path.join("..", "static", "xml", house, folder, acte[0] + '.xml')
|
|
|
|
|
soup = BeautifulSoup(tei_canvas, 'xml')
|
|
|
|
|
soup.repository.string = org[lst_acte.index(acte)].text
|
|
|
|
|
|
|
|
|
|
soup.msItem.docDate.string = acte[2]
|
|
|
|
|
soup.msItem.docDate["when"] = acte[1]
|
|
|
|
|
|
|
|
|
|
soup.TEI["xml:id"] = acte[0]
|
|
|
|
|
title = soup.find("title", {"level":"a"})
|
|
|
|
|
title.string = "Acte " + acte[0]
|
|
|
|
|
soup.sourceDesc.append(acte[-5])
|
|
|
|
|
soup.body.append(acte[-1])
|
|
|
|
|
tradition = soup.find("div", {"type": "tradition"})
|
|
|
|
|
tradition.decompose()
|
|
|
|
|
|
|
|
|
|
soup.abstract.p.string = acte[-4]
|
|
|
|
|
soup.select_one("argument").decompose()
|
|
|
|
|
|
|
|
|
|
if len(acte[-2]) == 2:
|
|
|
|
|
for item in acte[-2]:
|
|
|
|
|
soup.msIdentifier.append(item)
|
|
|
|
|
elif len(acte[-2]) == 1:
|
|
|
|
|
for item in acte[-2]:
|
|
|
|
|
soup.msIdentifier.append(item)
|
|
|
|
|
|
|
|
|
|
result = str(soup).replace("\n\n", "")
|
|
|
|
|
|
|
|
|
|
with open(filename, 'w', encoding="utf-8") as writting:
|
|
|
|
|
writting.write(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
canvas = f"""<?xml version="1.0" encoding="utf-8"?>
|
|
|
|
|
<TEI xmlns="http://www.tei-c.org/ns/1.0">
|
|
|
|
|
<teiHeader>
|
|
|
|
|
<fileDesc>
|
|
|
|
|
<titleStmt>
|
|
|
|
|
<title level="s">Actes princiers</title>
|
|
|
|
|
<title level="m">Actes de Charles Ier de Bourbon</title>
|
|
|
|
|
<title level="a"></title>
|
|
|
|
|
<respStmt>
|
|
|
|
|
<resp>transcribed by</resp>
|
|
|
|
|
<name>Jean-Damien Généro</name>
|
|
|
|
|
</respStmt>
|
|
|
|
|
</titleStmt>
|
|
|
|
|
<editionStmt>
|
|
|
|
|
<edition>Acte édité dans le cadre du programme Actes princiers.</edition>
|
|
|
|
|
<respStmt>
|
|
|
|
|
<resp>direction scientifique</resp>
|
|
|
|
|
<name>Olivier Mattéoni</name>
|
|
|
|
|
</respStmt>
|
|
|
|
|
<respStmt>
|
|
|
|
|
<resp>direction technique</resp>
|
|
|
|
|
<name>Jean-Damien Généro</name>
|
|
|
|
|
</respStmt>
|
|
|
|
|
<respStmt>
|
|
|
|
|
<resp>direction technique</resp>
|
|
|
|
|
<name>Nicolas Perreaux</name>
|
|
|
|
|
</respStmt>
|
|
|
|
|
</editionStmt>
|
|
|
|
|
<publicationStmt>
|
|
|
|
|
<publisher>Laboratoire de Médiévistique occidentale de Paris (UMR 8589), Centre de recherches historiques (UMR 8558)</publisher>
|
|
|
|
|
<authority>Olivier Mattéoni</authority>
|
|
|
|
|
<date when="2022">2022</date>
|
|
|
|
|
<availability><licence source="https://github.com/etalab/licence-ouverte/blob/master/open-licence.md">Distributed under an Open License 2.0</licence></availability>
|
|
|
|
|
</publicationStmt>
|
|
|
|
|
<sourceDesc>
|
|
|
|
|
<msDesc>
|
|
|
|
|
<msIdentifier>
|
|
|
|
|
<repository></repository>
|
|
|
|
|
</msIdentifier>
|
|
|
|
|
<msContents>
|
|
|
|
|
<msItem>
|
|
|
|
|
<docDate></docDate>
|
|
|
|
|
</msItem>
|
|
|
|
|
</msContents>
|
|
|
|
|
</msDesc>
|
|
|
|
|
<listPerson>
|
|
|
|
|
<listPerson type="prince">
|
|
|
|
|
<person>
|
|
|
|
|
<name>Charles Ier de Bourbon</name>
|
|
|
|
|
</person>
|
|
|
|
|
</listPerson>
|
|
|
|
|
<listPerson type="signatory">
|
|
|
|
|
<person>
|
|
|
|
|
<name></name>
|
|
|
|
|
</person>
|
|
|
|
|
</listPerson>
|
|
|
|
|
</listPerson>
|
|
|
|
|
</sourceDesc>
|
|
|
|
|
</fileDesc>
|
|
|
|
|
<profileDesc>
|
|
|
|
|
<abstract>
|
|
|
|
|
<p></p>
|
|
|
|
|
</abstract>
|
|
|
|
|
</profileDesc>
|
|
|
|
|
</teiHeader>
|
|
|
|
|
<text>
|
|
|
|
|
<body>
|
|
|
|
|
</body>
|
|
|
|
|
</text>
|
|
|
|
|
</TEI>"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
corpus = "../../bourbon-latex/charles-actes-latex.xml"
|
|
|
|
|
|
|
|
|
|
tei_maker(split_div(corpus, "brb_ch_i_"), "Bourbon", "Brb_5_Charles_Ier", canvas)
|