From 96fb3485483748881d6fc36bc31297e8050e86ad Mon Sep 17 00:00:00 2001 From: Jean-Damien Date: Fri, 23 Dec 2022 16:35:38 +0100 Subject: [PATCH] cmd/db.py docstring create acte --- app/cmd/db.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/app/cmd/db.py b/app/cmd/db.py index d31f376..22ddf24 100644 --- a/app/cmd/db.py +++ b/app/cmd/db.py @@ -88,7 +88,7 @@ def _create_doc(folder: str)-> None: inst_doc = soup.repository.text # //sourceDesc//msIdentifier/repository # //sourceDesc//msIdentifier/idno[@n='1'] is always the # archive box or manuscript collection id - # (//sourceDesc//msIdentifier/idno[@n='1'] is the doc id inside + # (//sourceDesc//msIdentifier/idno[@n='2'] is the doc id inside # the box or the page number inside a manuscript) nb_doc_1 = soup.msIdentifier.find_all("idno", {"n": "1"})[0].text details_doc.append(inst_doc + " == " + nb_doc_1) @@ -124,25 +124,36 @@ def _create_agent(name_lst: list)-> None: Agent.create(**data) def _create_acte(folder: str)-> None: + """create table acte""" actes = [] counter = 0 for acte in sorted(os.listdir(folder)): if acte.endswith(".xml"): counter += 1 soup = make_soup(os.path.join(folder, acte)) - numb = soup.TEI["xml:id"] - date_time = soup.msItem.docDate["when"] - date = soup.msItem.docDate.text - analyse = soup.abstract.p.text + + # 1.1/ Get all data from XML (9). counter is the id (= numb_acte) + numb = soup.TEI["xml:id"] # /TEI[@xml:id] is always the acte's ID + date_time = soup.msItem.docDate["when"] # YYYY-MM-DD or YYYY-MM date + date = soup.msItem.docDate.text # verbose date + analyse = soup.abstract.p.text # acte's short analysis ref = soup.msIdentifier.find_all("idno", {"n": "2"}) - if len(ref) > 0: + # //sourceDesc//msIdentifier/idno[@n='2'] is the doc id inside the + # archive box or the page number inside a manuscript (see _create_doc) + # warning: the analysis may not have been written yet, + # which would result in List Index Out of Range Error. Hence : + if len(ref) > 0: # there is an analysis ref_acte = ref[0].text - else: + else: # there is no analysis ref_acte = "NS" prod_place = soup.find_all("placeName", {"type": "production_place"})[0].text + # //sourceDesc//msIdentifier/idno[@n='1'] is always the + # archive box or manuscript collection id doc = soup.msIdentifier.find_all("idno", {"n": "1"})[0] type_diplo = soup.body.div["subtype"] diplo_state = soup.body.div["type"] + + # 1.2/ For some data, we need to make queries to get foreign keys place_query = [t.id_place for t in Production_place.select().where( Production_place.placename == prod_place)] doc_query = [t.id_document for t in Document.select().where( @@ -151,6 +162,8 @@ def _create_acte(folder: str)-> None: Diplo_type.diplo_label == type_diplo)] state_query = [t.id_state for t in State.select().where( State.state_label == diplo_state)] + + # 2/ Make the data list actes.append({ "num_acte": counter, "filename": numb, @@ -163,6 +176,7 @@ def _create_acte(folder: str)-> None: "state_doc": state_query[0], "diplo_type_acte": diplo_query[0] }) + # 4/ create the table for data in tqdm(actes, desc="Populating Actes..."): Acte.create(**data)