From 96fb3485483748881d6fc36bc31297e8050e86ad Mon Sep 17 00:00:00 2001
From: Jean-Damien <jeandamien.genero@gmail.com>
Date: Fri, 23 Dec 2022 16:35:38 +0100
Subject: [PATCH] cmd/db.py docstring create acte

---
 app/cmd/db.py | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/app/cmd/db.py b/app/cmd/db.py
index d31f376..22ddf24 100644
--- a/app/cmd/db.py
+++ b/app/cmd/db.py
@@ -88,7 +88,7 @@ def _create_doc(folder: str)-> None:
             inst_doc = soup.repository.text  # //sourceDesc//msIdentifier/repository
             # //sourceDesc//msIdentifier/idno[@n='1'] is always the 
             # archive box or manuscript collection id
-            # (//sourceDesc//msIdentifier/idno[@n='1'] is the doc id inside
+            # (//sourceDesc//msIdentifier/idno[@n='2'] is the doc id inside
             # the box or the page number inside a manuscript)
             nb_doc_1 = soup.msIdentifier.find_all("idno", {"n": "1"})[0].text
             details_doc.append(inst_doc + " == " + nb_doc_1)
@@ -124,25 +124,36 @@ def _create_agent(name_lst: list)-> None:
         Agent.create(**data)
 
 def _create_acte(folder: str)-> None:
+    """create table acte"""
     actes = []
     counter = 0
     for acte in sorted(os.listdir(folder)):
         if acte.endswith(".xml"):
             counter += 1
             soup = make_soup(os.path.join(folder, acte))
-            numb = soup.TEI["xml:id"]
-            date_time = soup.msItem.docDate["when"]
-            date = soup.msItem.docDate.text
-            analyse = soup.abstract.p.text
+
+            # 1.1/ Get all data from XML (9). counter is the id (= numb_acte)
+            numb = soup.TEI["xml:id"]  # /TEI[@xml:id] is always the acte's ID
+            date_time = soup.msItem.docDate["when"]  # YYYY-MM-DD or YYYY-MM date
+            date = soup.msItem.docDate.text  # verbose date
+            analyse = soup.abstract.p.text  # acte's short analysis
             ref = soup.msIdentifier.find_all("idno", {"n": "2"})
-            if len(ref) > 0:
+            # //sourceDesc//msIdentifier/idno[@n='2'] is the doc id inside the
+            # archive box or the page number inside a manuscript (see _create_doc)
+            # warning: the analysis may not have been written yet,
+            # which would result in List Index Out of Range Error. Hence :
+            if len(ref) > 0:  # there is an analysis
                 ref_acte = ref[0].text
-            else:
+            else:  # there is no analysis
                 ref_acte = "NS"
             prod_place = soup.find_all("placeName", {"type": "production_place"})[0].text
+            # //sourceDesc//msIdentifier/idno[@n='1'] is always the 
+            # archive box or manuscript collection id
             doc = soup.msIdentifier.find_all("idno", {"n": "1"})[0]
             type_diplo = soup.body.div["subtype"]
             diplo_state = soup.body.div["type"]
+            
+            # 1.2/ For some data, we need to make queries to get foreign keys
             place_query = [t.id_place for t in Production_place.select().where(
                 Production_place.placename == prod_place)]
             doc_query = [t.id_document for t in Document.select().where(
@@ -151,6 +162,8 @@ def _create_acte(folder: str)-> None:
                 Diplo_type.diplo_label == type_diplo)]
             state_query = [t.id_state for t in State.select().where(
                 State.state_label == diplo_state)]
+            
+            # 2/ Make the data list
             actes.append({
                 "num_acte": counter,
                 "filename": numb,
@@ -163,6 +176,7 @@ def _create_acte(folder: str)-> None:
                 "state_doc": state_query[0],
                 "diplo_type_acte": diplo_query[0]
                 })
+    # 4/ create the table
     for data in tqdm(actes, desc="Populating Actes..."):
         Acte.create(**data)