diff --git a/app/cmd/db.py b/app/cmd/db.py index 3242d6c..ff85696 100644 --- a/app/cmd/db.py +++ b/app/cmd/db.py @@ -131,28 +131,38 @@ def _create_acte(xml_file: str, folder: str)-> None: for data in tqdm(actes, desc="Populating Actes..."): Acte.create(**data) - -def __find_indiv(xml_soup, role: str, indiv_lst: list)-> None: - princes = xml_soup.sourceDesc.find_all("listPerson", {"type": role}) - for prince in princes: - dukes = prince.find_all("person") - for duke in dukes: - indiv_lst.append(duke.text.replace("\n", "")) - -def __indiv_infos(indiv_type): +def __find_indiv(folder: str, role: str)-> None: + indiv_lst = [] + for acte in os.listdir(folder): + soup = make_soup(os.path.join(folder, acte)) + xml_indivs = soup.sourceDesc.find_all("listPerson", {"type": role}) + for xml_indiv in xml_indivs: + persons = xml_indiv.find_all("person") + for person in persons: + indiv_lst.append(person.text.replace("\n", "")) + return set(indiv_lst) + +def __csv_indiv_infos(indiv_type): with open(os.path.join(APPPATH, "static", "csv", "actors.csv"), 'r', encoding="utf-8") as opening: actors_csv = csv.reader(opening, delimiter=";") next(actors_csv, None) lst_of_indiv = [row for row in actors_csv if row[1] == indiv_type] return lst_of_indiv -def _create_indiv(): - actors = [*__indiv_infos("secret"), *__indiv_infos("prince")] +def __compareList(l1,l2): + l1.sort() + l2.sort() + if(l1==l2): + return "Equal" + else: + return "Non equal" + +def _create_indiv(list_csv): individuals = [{"name_indiv": actor[0], "role_indiv": actor[1], "house_indiv": [t.id_house for t in House.select().where( House.house_label == actor[2])][0], "date1": actor[3], "date2": actor[4], "date3": actor[5]} - for actor in actors] + for actor in list_csv] for data in tqdm(individuals, desc="Populating Individual..."): Individual.create(**data) @@ -201,5 +211,11 @@ def init() -> None: _create_produc_place(xml, xml_folder) _create_doc(xml, xml_folder) _create_acte(xml, xml_folder) - _create_indiv() + # check which names need to be add to the actors.csv + actors = [*__csv_indiv_infos("secret"), *__csv_indiv_infos("prince")] + names_in_csv = [actor[0] for actor in actors] + names_in_xml = __find_indiv(xml_folder, "signatory") + for name in [x for x in names_in_xml if x not in names_in_csv]: + print("!! name " + name + " not found in /app/static/csv/actors.csv") + _create_indiv(actors) _create_produced_by(xml, xml_folder)