cmd/db.py docstring create acte

4 years ago · 96fb348548
parent fb8b55a504
commit 96fb348548
1 changed files with 21 additions and 7 deletions
--- a/app/cmd/db.py
+++ b/app/cmd/db.py
@ -88,7 +88,7 @@ def _create_doc(folder: str)-> None:
            inst_doc = soup.repository.text  # //sourceDesc//msIdentifier/repository
            # //sourceDesc//msIdentifier/idno[@n='1'] is always the 
            # archive box or manuscript collection id
-            # (//sourceDesc//msIdentifier/idno[@n='1'] is the doc id inside
+            # (//sourceDesc//msIdentifier/idno[@n='2'] is the doc id inside
            # the box or the page number inside a manuscript)
            nb_doc_1 = soup.msIdentifier.find_all("idno", {"n": "1"})[0].text
            details_doc.append(inst_doc + " == " + nb_doc_1)
@ -124,25 +124,36 @@ def _create_agent(name_lst: list)-> None:
        Agent.create(**data)
 def _create_acte(folder: str)-> None:
    """create table acte"""
    actes = []
    counter = 0
    for acte in sorted(os.listdir(folder)):
        if acte.endswith(".xml"):
            counter += 1
            soup = make_soup(os.path.join(folder, acte))
-            numb = soup.TEI["xml:id"]
+
-            date_time = soup.msItem.docDate["when"]
+            # 1.1/ Get all data from XML (9). counter is the id (= numb_acte)
-            date = soup.msItem.docDate.text
+            numb = soup.TEI["xml:id"]  # /TEI[@xml:id] is always the acte's ID
-            analyse = soup.abstract.p.text
+            date_time = soup.msItem.docDate["when"]  # YYYY-MM-DD or YYYY-MM date
            date = soup.msItem.docDate.text  # verbose date
            analyse = soup.abstract.p.text  # acte's short analysis
            ref = soup.msIdentifier.find_all("idno", {"n": "2"})
-            if len(ref) > 0:
+            # //sourceDesc//msIdentifier/idno[@n='2'] is the doc id inside the
            # archive box or the page number inside a manuscript (see _create_doc)
            # warning: the analysis may not have been written yet,
            # which would result in List Index Out of Range Error. Hence :
            if len(ref) > 0:  # there is an analysis
                ref_acte = ref[0].text
-            else:
+            else:  # there is no analysis
                ref_acte = "NS"
            prod_place = soup.find_all("placeName", {"type": "production_place"})[0].text
            # //sourceDesc//msIdentifier/idno[@n='1'] is always the 
            # archive box or manuscript collection id
            doc = soup.msIdentifier.find_all("idno", {"n": "1"})[0]
            type_diplo = soup.body.div["subtype"]
            diplo_state = soup.body.div["type"]
            # 1.2/ For some data, we need to make queries to get foreign keys
            place_query = [t.id_place for t in Production_place.select().where(
                Production_place.placename == prod_place)]
            doc_query = [t.id_document for t in Document.select().where(
@ -151,6 +162,8 @@ def _create_acte(folder: str)-> None:
                Diplo_type.diplo_label == type_diplo)]
            state_query = [t.id_state for t in State.select().where(
                State.state_label == diplo_state)]
            # 2/ Make the data list
            actes.append({
                "num_acte": counter,
                "filename": numb,
@ -163,6 +176,7 @@ def _create_acte(folder: str)-> None:
                "state_doc": state_query[0],
                "diplo_type_acte": diplo_query[0]
                })
    # 4/ create the table
    for data in tqdm(actes, desc="Populating Actes..."):
        Acte.create(**data)