You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

95 lines
2.7 KiB
Python

import os
import re
from bs4 import BeautifulSoup
def make_soup(file: str):
"""open a xml file and return a BeautifulSoup object"""
with open(file, 'r', encoding="utf-8") as opening:
xml = BeautifulSoup(opening, 'xml')
return xml
def split_div(xml_file: str, prince: str)-> dict:
"""read xml file with function make_soup
& make a list for each acte with : id, date
(YYYY-MM-DD), date (string), sourceDesc,
analysis, the whole div.
"""
soup = make_soup(xml_file)
actes = []
for div in soup.find_all('div', {'n': True}):
when = div.docDate.date["when"]
nb = div.docDate.date["n"]
acte_id = prince + when.replace("-", "_") + nb
date = div.docDate.date.text
trad_table = div.listWit
analysis = div.argument.p.string
acte = [acte_id, when, date, trad_table, analysis, div]
actes.append(acte)
return actes
def tei_maker(lst_acte, house, folder, tei_canvas):
"""write the acte in an xml file"""
for acte in lst_acte:
filename = os.path.join("..", "static", "xml", house, folder, acte[0] + '.xml')
soup = BeautifulSoup(tei_canvas, 'xml')
title = soup.find("title", {"level":"a"})
title.string = "Acte " + acte[0]
soup.sourceDesc.append(acte[-3])
soup.body.append(acte[-1])
tradition = soup.find("div", {"type": "tradition"})
tradition.decompose()
with open(filename, 'w', encoding="utf-8") as writting:
writting.write(str(soup))
canvas = f"""<?xml version="1.0" encoding="utf-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0">
<teiHeader>
<fileDesc>
<titleStmt>
<title level="s">Actes princiers</title>
<title level="m">Actes de Charles Ier de Bourbon</title>
<title level="a"></title>
<respStmt>
<resp>transcribed by</resp>
<name>Jean-Damien Généro</name>
</respStmt>
</titleStmt>
<editionStmt>
<edition>Acte édité dans le cadre du programme Actes princiers.</edition>
<respStmt>
<resp>direction scientifique</resp>
<name>Olivier Mattéoni</name>
</respStmt>
<respStmt>
<resp>direction technique</resp>
<name>Jean-Damien Généro</name>
</respStmt>
<respStmt>
<resp>direction technique</resp>
<name>Nicolas Perreaux</name>
</respStmt>
</editionStmt>
<publicationStmt>
<publisher>Laboratoire de Médiévistique occidentale de Paris (UMR 8589), Centre de recherches historiques (UMR 8558)</publisher>
<authority>Olivier Mattéoni</authority>
<date when="2022">2022</date>
<availability><licence source="https://github.com/etalab/licence-ouverte/blob/master/open-licence.md">Distributed under an Open License 2.0</licence></availability>
</publicationStmt>
<sourceDesc>
</sourceDesc>
</fileDesc>
</teiHeader>
<text>
<body>
</body>
</text>
</TEI>"""
corpus = "../../bourbon-latex/charles-actes-latex.xml"
tei_maker(split_div(corpus, "brb_ch_i_"), "Bourbon", "Brb_5_Charles_Ier", canvas)