+ tables Agent & Transcribed_by

main
jgenero 3 years ago
parent 3d037d35ea
commit 4fec76117a

@ -10,7 +10,7 @@ from tqdm import tqdm
from app.app import APPPATH, db from app.app import APPPATH, db
from app.data_actes import diplomatic_type, institution, state, houses, interventions from app.data_actes import diplomatic_type, institution, state, houses, interventions
from app.modeles import Institution, State, House, Intervention_type, Production_place, Diplo_type, Document, Acte, Individual, Involved_in from app.modeles import Institution, State, House, Intervention_type, Production_place, Diplo_type, Document, Agent, Acte, Transcribed_by, Individual, Involved_in
db_cli = AppGroup("db") db_cli = AppGroup("db")
@ -99,6 +99,20 @@ def _create_doc(folder: str)-> None:
for data in tqdm(infos_doc, desc="Populating Document..."): for data in tqdm(infos_doc, desc="Populating Document..."):
Document.create(**data) Document.create(**data)
def __find_transcribers(folder: str)-> None:
transcribers = []
for acte in sorted(os.listdir(folder)):
if acte.endswith(".xml"):
soup = make_soup(os.path.join(folder, acte))
transcriber = soup.fileDesc.titleStmt.respStmt
for name in transcriber.find_all("name"):
transcribers.append(name.text)
return set(transcribers)
def _create_agent(name_lst: list)-> None:
for data in tqdm(name_lst, desc="Populating Agent..."):
Agent.create(**data)
def _create_acte(folder: str)-> None: def _create_acte(folder: str)-> None:
actes = [] actes = []
counter = 0 counter = 0
@ -142,6 +156,23 @@ def _create_acte(folder: str)-> None:
for data in tqdm(actes, desc="Populating Actes..."): for data in tqdm(actes, desc="Populating Actes..."):
Acte.create(**data) Acte.create(**data)
def _create_transcribed_by(folder: str)-> None:
transcribed = []
for acte in os.listdir(folder):
if acte.endswith(".xml"):
soup = make_soup(os.path.join(folder, acte))
acte_q = [t.id_acte for t in Acte.select().where(
Acte.filename == acte.replace(".xml", ""))]
transcriber = soup.fileDesc.titleStmt.respStmt
for name in transcriber.find_all("name"):
agent_q = [t.id_agent for t in Agent.select().where(
Agent.agent_name == name.text)]
transcribed.append({"transcr_acte": acte_q[0],
"transcr_agent": agent_q[0]})
for data in tqdm(transcribed, desc="Populating Transcribed_by..."):
Transcribed_by.create(**data)
def __find_indiv(folder: str, role: str)-> None: def __find_indiv(folder: str, role: str)-> None:
indiv_lst = [] indiv_lst = []
for acte in os.listdir(folder): for acte in os.listdir(folder):
@ -181,8 +212,6 @@ def __grape_indiv(list_person, role: str):
# Duke.indiv_duke == prince_q[0])] # Duke.indiv_duke == prince_q[0])]
print(person_text, "==", prince_q[0]) print(person_text, "==", prince_q[0])
def _create_involved_in(folder: str): def _create_involved_in(folder: str):
princes_actes = [] princes_actes = []
for acte in os.listdir(folder): for acte in os.listdir(folder):
@ -236,22 +265,26 @@ def init() -> None:
print("Dropping existing DB...") print("Dropping existing DB...")
db.drop_tables([Institution, State, House, Intervention_type, db.drop_tables([Institution, State, House, Intervention_type,
Production_place, Diplo_type, Document, Acte, Individual, Production_place, Diplo_type, Document, Agent, Acte, Transcribed_by,
Involved_in]) Individual, Involved_in])
print("Re-creating schema...") print("Re-creating schema...")
db.create_tables([Institution, State, House, Intervention_type, db.create_tables([Institution, State, House, Intervention_type,
Production_place, Diplo_type, Document, Acte, Individual, Involved_in]) Production_place, Diplo_type, Document, Agent, Acte,
Transcribed_by, Individual, Involved_in])
_create_institution(institution) _create_institution(institution)
_create_state(state) _create_state(state)
_create_house(houses) _create_house(houses)
_create_interv_type(interventions) _create_interv_type(interventions)
_create_diplo_type(diplomatic_type) _create_diplo_type(diplomatic_type)
actors = [*__csv_indiv_infos("secret"), *__csv_indiv_infos("prince")] actors = [*__csv_indiv_infos("secret"), *__csv_indiv_infos("prince")]
_create_indiv(actors) _create_indiv(actors)
agents_names = []
for prince_house in princes_houses: for prince_house in princes_houses:
xml_folder = os.path.join(APPPATH, "static", "xml", prince_house) xml_folder = os.path.join(APPPATH, "static", "xml", prince_house)
print("\n\n**** HOUSE ", prince_house, " ****") print("\n\n**** HOUSE ", prince_house, " ****")
for name in __find_transcribers(xml_folder):
agents_names.append(name)
# check which names need to be add to the actors.csv # check which names need to be add to the actors.csv
names_in_csv = [actor[0] for actor in actors] names_in_csv = [actor[0] for actor in actors]
names_in_xml = __find_indiv(xml_folder, "signatory") names_in_xml = __find_indiv(xml_folder, "signatory")
@ -260,7 +293,10 @@ def init() -> None:
_create_produc_place(xml_folder) _create_produc_place(xml_folder)
_create_doc(xml_folder) _create_doc(xml_folder)
_create_acte(xml_folder) _create_acte(xml_folder)
agents = [{"agent_name": agent} for agent in set(agents_names)]
_create_agent(agents)
for prince_house in princes_houses: for prince_house in princes_houses:
print("\n\n**** INVOLVED IN ", prince_house, " ****") print("\n\n**** INVOLVED IN ", prince_house, " ****")
xml_folder = os.path.join(APPPATH, "static", "xml", prince_house) xml_folder = os.path.join(APPPATH, "static", "xml", prince_house)
_create_transcribed_by(xml_folder)
_create_involved_in(xml_folder) _create_involved_in(xml_folder)

@ -1,3 +1,3 @@
from .data import Institution, State, House, Intervention_type, Production_place, Diplo_type, Document, Acte, Individual, Involved_in from .data import Institution, State, House, Intervention_type, Production_place, Diplo_type, Document, Agent, Acte, Transcribed_by, Individual, Involved_in
__all__ = ["Institution", "State", "House", "Intervention_type", "Production_place", "Diplo_type", "Document", "Acte", "Individual", "involved_in"] __all__ = ["Institution", "State", "House", "Intervention_type", "Production_place", "Diplo_type", "Document", "Agent", "Acte", "Transcribed_by", "Individual", "involved_in"]

@ -89,6 +89,14 @@ class Document(BaseModel):
database = db database = db
db_table = 'Document' db_table = 'Document'
class Agent(BaseModel):
id_agent = peewee.AutoField()
agent_name = peewee.TextField()
class Meta:
database = db
db_table = 'Agent'
class Acte(BaseModel): class Acte(BaseModel):
id_acte = peewee.AutoField() id_acte = peewee.AutoField()
@ -107,6 +115,15 @@ class Acte(BaseModel):
database = db database = db
db_table = 'Acte' db_table = 'Acte'
class Transcribed_by(BaseModel):
id_transcr = peewee.AutoField()
transcr_acte = peewee.ForeignKeyField(Acte, backref='transcribed_bys')
transcr_agent = peewee.ForeignKeyField(Agent, backref='transcribed_bys')
class Meta:
database = db
db_table = 'Transcribed_by'
class Individual(BaseModel): class Individual(BaseModel):
id_indiv = peewee.AutoField() id_indiv = peewee.AutoField()
name_indiv = peewee.TextField() name_indiv = peewee.TextField()
@ -128,4 +145,4 @@ class Involved_in(BaseModel):
class Meta: class Meta:
database = db database = db
db_table = 'involved_in' db_table = 'Involved_in'

Loading…
Cancel
Save