From 4fec76117a4318cf7a81dfe047d3f8f3aede3376 Mon Sep 17 00:00:00 2001 From: jgenero Date: Mon, 24 Oct 2022 11:11:52 +0200 Subject: [PATCH] + tables Agent & Transcribed_by --- app/cmd/db.py | 50 +++++++++++++++++++++++++++++++++++------ app/modeles/__init__.py | 4 ++-- app/modeles/data.py | 19 +++++++++++++++- 3 files changed, 63 insertions(+), 10 deletions(-) diff --git a/app/cmd/db.py b/app/cmd/db.py index 478ddd0..2c2aebb 100644 --- a/app/cmd/db.py +++ b/app/cmd/db.py @@ -10,7 +10,7 @@ from tqdm import tqdm from app.app import APPPATH, db from app.data_actes import diplomatic_type, institution, state, houses, interventions -from app.modeles import Institution, State, House, Intervention_type, Production_place, Diplo_type, Document, Acte, Individual, Involved_in +from app.modeles import Institution, State, House, Intervention_type, Production_place, Diplo_type, Document, Agent, Acte, Transcribed_by, Individual, Involved_in db_cli = AppGroup("db") @@ -99,6 +99,20 @@ def _create_doc(folder: str)-> None: for data in tqdm(infos_doc, desc="Populating Document..."): Document.create(**data) +def __find_transcribers(folder: str)-> None: + transcribers = [] + for acte in sorted(os.listdir(folder)): + if acte.endswith(".xml"): + soup = make_soup(os.path.join(folder, acte)) + transcriber = soup.fileDesc.titleStmt.respStmt + for name in transcriber.find_all("name"): + transcribers.append(name.text) + return set(transcribers) + +def _create_agent(name_lst: list)-> None: + for data in tqdm(name_lst, desc="Populating Agent..."): + Agent.create(**data) + def _create_acte(folder: str)-> None: actes = [] counter = 0 @@ -142,6 +156,23 @@ def _create_acte(folder: str)-> None: for data in tqdm(actes, desc="Populating Actes..."): Acte.create(**data) +def _create_transcribed_by(folder: str)-> None: + transcribed = [] + for acte in os.listdir(folder): + if acte.endswith(".xml"): + soup = make_soup(os.path.join(folder, acte)) + acte_q = [t.id_acte for t in Acte.select().where( + Acte.filename == acte.replace(".xml", ""))] + transcriber = soup.fileDesc.titleStmt.respStmt + for name in transcriber.find_all("name"): + agent_q = [t.id_agent for t in Agent.select().where( + Agent.agent_name == name.text)] + transcribed.append({"transcr_acte": acte_q[0], + "transcr_agent": agent_q[0]}) + for data in tqdm(transcribed, desc="Populating Transcribed_by..."): + Transcribed_by.create(**data) + + def __find_indiv(folder: str, role: str)-> None: indiv_lst = [] for acte in os.listdir(folder): @@ -181,8 +212,6 @@ def __grape_indiv(list_person, role: str): # Duke.indiv_duke == prince_q[0])] print(person_text, "==", prince_q[0]) - - def _create_involved_in(folder: str): princes_actes = [] for acte in os.listdir(folder): @@ -236,22 +265,26 @@ def init() -> None: print("Dropping existing DB...") db.drop_tables([Institution, State, House, Intervention_type, - Production_place, Diplo_type, Document, Acte, Individual, - Involved_in]) + Production_place, Diplo_type, Document, Agent, Acte, Transcribed_by, + Individual, Involved_in]) print("Re-creating schema...") db.create_tables([Institution, State, House, Intervention_type, - Production_place, Diplo_type, Document, Acte, Individual, Involved_in]) + Production_place, Diplo_type, Document, Agent, Acte, + Transcribed_by, Individual, Involved_in]) _create_institution(institution) _create_state(state) _create_house(houses) _create_interv_type(interventions) _create_diplo_type(diplomatic_type) actors = [*__csv_indiv_infos("secret"), *__csv_indiv_infos("prince")] - _create_indiv(actors) + agents_names = [] for prince_house in princes_houses: xml_folder = os.path.join(APPPATH, "static", "xml", prince_house) print("\n\n**** HOUSE ", prince_house, " ****") + for name in __find_transcribers(xml_folder): + agents_names.append(name) + # check which names need to be add to the actors.csv names_in_csv = [actor[0] for actor in actors] names_in_xml = __find_indiv(xml_folder, "signatory") @@ -260,7 +293,10 @@ def init() -> None: _create_produc_place(xml_folder) _create_doc(xml_folder) _create_acte(xml_folder) + agents = [{"agent_name": agent} for agent in set(agents_names)] + _create_agent(agents) for prince_house in princes_houses: print("\n\n**** INVOLVED IN ", prince_house, " ****") xml_folder = os.path.join(APPPATH, "static", "xml", prince_house) + _create_transcribed_by(xml_folder) _create_involved_in(xml_folder) diff --git a/app/modeles/__init__.py b/app/modeles/__init__.py index cfc9f2d..28b8612 100644 --- a/app/modeles/__init__.py +++ b/app/modeles/__init__.py @@ -1,3 +1,3 @@ -from .data import Institution, State, House, Intervention_type, Production_place, Diplo_type, Document, Acte, Individual, Involved_in +from .data import Institution, State, House, Intervention_type, Production_place, Diplo_type, Document, Agent, Acte, Transcribed_by, Individual, Involved_in -__all__ = ["Institution", "State", "House", "Intervention_type", "Production_place", "Diplo_type", "Document", "Acte", "Individual", "involved_in"] +__all__ = ["Institution", "State", "House", "Intervention_type", "Production_place", "Diplo_type", "Document", "Agent", "Acte", "Transcribed_by", "Individual", "involved_in"] diff --git a/app/modeles/data.py b/app/modeles/data.py index f3132cd..97262fb 100644 --- a/app/modeles/data.py +++ b/app/modeles/data.py @@ -89,6 +89,14 @@ class Document(BaseModel): database = db db_table = 'Document' +class Agent(BaseModel): + id_agent = peewee.AutoField() + agent_name = peewee.TextField() + + class Meta: + database = db + db_table = 'Agent' + class Acte(BaseModel): id_acte = peewee.AutoField() @@ -107,6 +115,15 @@ class Acte(BaseModel): database = db db_table = 'Acte' +class Transcribed_by(BaseModel): + id_transcr = peewee.AutoField() + transcr_acte = peewee.ForeignKeyField(Acte, backref='transcribed_bys') + transcr_agent = peewee.ForeignKeyField(Agent, backref='transcribed_bys') + + class Meta: + database = db + db_table = 'Transcribed_by' + class Individual(BaseModel): id_indiv = peewee.AutoField() name_indiv = peewee.TextField() @@ -128,4 +145,4 @@ class Involved_in(BaseModel): class Meta: database = db - db_table = 'involved_in' + db_table = 'Involved_in'