+ tables Agent & Transcribed_by

main
jgenero 3 years ago
parent 3d037d35ea
commit 4fec76117a

@ -10,7 +10,7 @@ from tqdm import tqdm
from app.app import APPPATH, db
from app.data_actes import diplomatic_type, institution, state, houses, interventions
from app.modeles import Institution, State, House, Intervention_type, Production_place, Diplo_type, Document, Acte, Individual, Involved_in
from app.modeles import Institution, State, House, Intervention_type, Production_place, Diplo_type, Document, Agent, Acte, Transcribed_by, Individual, Involved_in
db_cli = AppGroup("db")
@ -99,6 +99,20 @@ def _create_doc(folder: str)-> None:
for data in tqdm(infos_doc, desc="Populating Document..."):
Document.create(**data)
def __find_transcribers(folder: str)-> None:
transcribers = []
for acte in sorted(os.listdir(folder)):
if acte.endswith(".xml"):
soup = make_soup(os.path.join(folder, acte))
transcriber = soup.fileDesc.titleStmt.respStmt
for name in transcriber.find_all("name"):
transcribers.append(name.text)
return set(transcribers)
def _create_agent(name_lst: list)-> None:
for data in tqdm(name_lst, desc="Populating Agent..."):
Agent.create(**data)
def _create_acte(folder: str)-> None:
actes = []
counter = 0
@ -142,6 +156,23 @@ def _create_acte(folder: str)-> None:
for data in tqdm(actes, desc="Populating Actes..."):
Acte.create(**data)
def _create_transcribed_by(folder: str)-> None:
transcribed = []
for acte in os.listdir(folder):
if acte.endswith(".xml"):
soup = make_soup(os.path.join(folder, acte))
acte_q = [t.id_acte for t in Acte.select().where(
Acte.filename == acte.replace(".xml", ""))]
transcriber = soup.fileDesc.titleStmt.respStmt
for name in transcriber.find_all("name"):
agent_q = [t.id_agent for t in Agent.select().where(
Agent.agent_name == name.text)]
transcribed.append({"transcr_acte": acte_q[0],
"transcr_agent": agent_q[0]})
for data in tqdm(transcribed, desc="Populating Transcribed_by..."):
Transcribed_by.create(**data)
def __find_indiv(folder: str, role: str)-> None:
indiv_lst = []
for acte in os.listdir(folder):
@ -181,8 +212,6 @@ def __grape_indiv(list_person, role: str):
# Duke.indiv_duke == prince_q[0])]
print(person_text, "==", prince_q[0])
def _create_involved_in(folder: str):
princes_actes = []
for acte in os.listdir(folder):
@ -236,22 +265,26 @@ def init() -> None:
print("Dropping existing DB...")
db.drop_tables([Institution, State, House, Intervention_type,
Production_place, Diplo_type, Document, Acte, Individual,
Involved_in])
Production_place, Diplo_type, Document, Agent, Acte, Transcribed_by,
Individual, Involved_in])
print("Re-creating schema...")
db.create_tables([Institution, State, House, Intervention_type,
Production_place, Diplo_type, Document, Acte, Individual, Involved_in])
Production_place, Diplo_type, Document, Agent, Acte,
Transcribed_by, Individual, Involved_in])
_create_institution(institution)
_create_state(state)
_create_house(houses)
_create_interv_type(interventions)
_create_diplo_type(diplomatic_type)
actors = [*__csv_indiv_infos("secret"), *__csv_indiv_infos("prince")]
_create_indiv(actors)
agents_names = []
for prince_house in princes_houses:
xml_folder = os.path.join(APPPATH, "static", "xml", prince_house)
print("\n\n**** HOUSE ", prince_house, " ****")
for name in __find_transcribers(xml_folder):
agents_names.append(name)
# check which names need to be add to the actors.csv
names_in_csv = [actor[0] for actor in actors]
names_in_xml = __find_indiv(xml_folder, "signatory")
@ -260,7 +293,10 @@ def init() -> None:
_create_produc_place(xml_folder)
_create_doc(xml_folder)
_create_acte(xml_folder)
agents = [{"agent_name": agent} for agent in set(agents_names)]
_create_agent(agents)
for prince_house in princes_houses:
print("\n\n**** INVOLVED IN ", prince_house, " ****")
xml_folder = os.path.join(APPPATH, "static", "xml", prince_house)
_create_transcribed_by(xml_folder)
_create_involved_in(xml_folder)

@ -1,3 +1,3 @@
from .data import Institution, State, House, Intervention_type, Production_place, Diplo_type, Document, Acte, Individual, Involved_in
from .data import Institution, State, House, Intervention_type, Production_place, Diplo_type, Document, Agent, Acte, Transcribed_by, Individual, Involved_in
__all__ = ["Institution", "State", "House", "Intervention_type", "Production_place", "Diplo_type", "Document", "Acte", "Individual", "involved_in"]
__all__ = ["Institution", "State", "House", "Intervention_type", "Production_place", "Diplo_type", "Document", "Agent", "Acte", "Transcribed_by", "Individual", "involved_in"]

@ -89,6 +89,14 @@ class Document(BaseModel):
database = db
db_table = 'Document'
class Agent(BaseModel):
id_agent = peewee.AutoField()
agent_name = peewee.TextField()
class Meta:
database = db
db_table = 'Agent'
class Acte(BaseModel):
id_acte = peewee.AutoField()
@ -107,6 +115,15 @@ class Acte(BaseModel):
database = db
db_table = 'Acte'
class Transcribed_by(BaseModel):
id_transcr = peewee.AutoField()
transcr_acte = peewee.ForeignKeyField(Acte, backref='transcribed_bys')
transcr_agent = peewee.ForeignKeyField(Agent, backref='transcribed_bys')
class Meta:
database = db
db_table = 'Transcribed_by'
class Individual(BaseModel):
id_indiv = peewee.AutoField()
name_indiv = peewee.TextField()
@ -128,4 +145,4 @@ class Involved_in(BaseModel):
class Meta:
database = db
db_table = 'involved_in'
db_table = 'Involved_in'

Loading…
Cancel
Save