From ca604c277c61ca710790e56e69a5e80281dfb475 Mon Sep 17 00:00:00 2001 From: Jean-Damien Date: Fri, 23 Dec 2022 17:10:25 +0100 Subject: [PATCH] docstring for functions related to involved in --- app/cmd/db.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/app/cmd/db.py b/app/cmd/db.py index 7345c14..7afa919 100644 --- a/app/cmd/db.py +++ b/app/cmd/db.py @@ -232,22 +232,36 @@ def __grape_indiv(list_person, role: str): print(person_text, "==", prince_q[0]) def _create_involved_in(folder: str): + """create involved in table + NB : relation table containing 3 foreigns keys + """ princes_actes = [] for acte in os.listdir(folder): if acte.endswith(".xml"): + # 1/ Get act's id. + # the file name is the act'id when you remove ".xml" + # so we make a query on Acte to get the primary key. + # the act's id remains the same whether the individual + # is a prince or a signatory, so no need to iterate. acte_q = [t.id_acte for t in Acte.select().where( Acte.filename == acte.replace(".xml", ""))] # print(acte, "==", acte_q[0]) soup = make_soup(os.path.join(folder, acte)) + # 2A/ 1st iteration for princes individuals. for persons in soup.sourceDesc.find_all("listPerson", {"type": "prince"}): + # persons == //sourceDesc/listPerson/listPerson[@type="prince"] for person_tag in persons.find_all("person"): + # person_tag = \n\n?prince's name\n?\n person_text = person_tag.text.replace("\n", "") if person_text != "None": + # get the prince's id prince_q = [t.id_indiv for t in Individual.select().where( Individual.name_indiv == person_text)] + # a prince is always an act's producer interv_q = [t.id_intev for t in Intervention_type.select().where( Intervention_type.interv_label == "producer")] # print(person_text, "==", prince_q[0]) + # Exception Handling : when we forget to add the name in the csv try: prince_q[0] except IndexError: @@ -256,6 +270,9 @@ def _create_involved_in(folder: str): princes_actes.append({"involved_in_acte": acte_q[0], "involved_in_prince": prince_q[0], "invol_in_interv": interv_q[0]}) + # 2B/ 2th iteration for signatories individuals. + # NB : a prince may sign an act + # the process is basically the same than 2A for persons in soup.sourceDesc.find_all("listPerson", {"type": "signatory"}): for person_tag in persons.find_all("person"): person_text = person_tag.text.replace("\n", "") @@ -273,6 +290,7 @@ def _create_involved_in(folder: str): princes_actes.append({"involved_in_acte": acte_q[0], "involved_in_prince": prince_q[0], "invol_in_interv": interv_q[0]}) + # 3/ Create the table for data in tqdm(princes_actes, desc="Populating involved_in..."): Involved_in.create(**data)