delete list compare function + remove argument xml_file + add os.endwith

main
jgenero 3 years ago
parent dc6d3c1b38
commit b68c50abfa

@ -60,28 +60,30 @@ def _create_diplo_type(data_lst: list)-> None:
for data in tqdm(data_lst, desc="Populating Diplo_type..."): for data in tqdm(data_lst, desc="Populating Diplo_type..."):
Diplo_type.create(**data) Diplo_type.create(**data)
def _create_produc_place(xml_file: str, folder: str)-> None: def _create_produc_place(folder: str)-> None:
"""create production place table""" """create production place table"""
places_xtract = [] places_xtract = []
production_places = [] production_places = []
for acte in os.listdir(folder): for acte in os.listdir(folder):
soup = make_soup(os.path.join(folder, acte)) if acte.endswith(".xml"):
for place in soup.find('placeName', {'type': 'production_place'}): soup = make_soup(os.path.join(folder, acte))
places_xtract.append(place) for place in soup.find('placeName', {'type': 'production_place'}):
places_xtract.append(place)
production_places = [{"placename": xtraction} for xtraction in set(places_xtract)] production_places = [{"placename": xtraction} for xtraction in set(places_xtract)]
for data in tqdm(production_places, desc="Populating Place..."): for data in tqdm(production_places, desc="Populating Place..."):
Production_place.create(**data) Production_place.create(**data)
def _create_doc(xml_file: str, folder: str)-> None: def _create_doc(folder: str)-> None:
"""create doc table""" """create doc table"""
details_doc = [] details_doc = []
infos_doc = [] infos_doc = []
# 1/ get repository (doc archives) + doc collection in a list # 1/ get repository (doc archives) + doc collection in a list
for acte in os.listdir(folder): for acte in os.listdir(folder):
soup = make_soup(os.path.join(folder, acte)) if acte.endswith(".xml"):
inst_doc = soup.repository.text soup = make_soup(os.path.join(folder, acte))
nb_doc_1 = soup.msIdentifier.find_all("idno", {"n": "1"})[0].text inst_doc = soup.repository.text
details_doc.append(inst_doc + " == " + nb_doc_1) nb_doc_1 = soup.msIdentifier.find_all("idno", {"n": "1"})[0].text
details_doc.append(inst_doc + " == " + nb_doc_1)
# 2/ make a query on table Inst to get inst id # 2/ make a query on table Inst to get inst id
# then pretiffy data for the table Doc # then pretiffy data for the table Doc
for doc in set(details_doc): for doc in set(details_doc):
@ -97,57 +99,59 @@ def _create_doc(xml_file: str, folder: str)-> None:
for data in tqdm(infos_doc, desc="Populating Document..."): for data in tqdm(infos_doc, desc="Populating Document..."):
Document.create(**data) Document.create(**data)
def _create_acte(xml_file: str, folder: str)-> None: def _create_acte(folder: str)-> None:
actes = [] actes = []
counter = 0 counter = 0
for acte in sorted(os.listdir(folder)): for acte in sorted(os.listdir(folder)):
counter += 1 if acte.endswith(".xml"):
soup = make_soup(os.path.join(folder, acte)) counter += 1
numb = soup.TEI["xml:id"] soup = make_soup(os.path.join(folder, acte))
date_time = soup.msItem.docDate["when"] numb = soup.TEI["xml:id"]
date = soup.msItem.docDate.text date_time = soup.msItem.docDate["when"]
analyse = soup.abstract.p.text date = soup.msItem.docDate.text
ref = soup.msIdentifier.find_all("idno", {"n": "2"}) analyse = soup.abstract.p.text
if len(ref) > 0: ref = soup.msIdentifier.find_all("idno", {"n": "2"})
ref_acte = ref[0].text if len(ref) > 0:
else: ref_acte = ref[0].text
ref_acte = "NS" else:
prod_place = soup.find_all("placeName", {"type": "production_place"})[0].text ref_acte = "NS"
doc = soup.msIdentifier.find_all("idno", {"n": "1"})[0] prod_place = soup.find_all("placeName", {"type": "production_place"})[0].text
type_diplo = soup.body.div["subtype"] doc = soup.msIdentifier.find_all("idno", {"n": "1"})[0]
diplo_state = soup.body.div["type"] type_diplo = soup.body.div["subtype"]
place_query = [t.id_place for t in Production_place.select().where( diplo_state = soup.body.div["type"]
Production_place.placename == prod_place)] place_query = [t.id_place for t in Production_place.select().where(
doc_query = [t.id_document for t in Document.select().where( Production_place.placename == prod_place)]
Document.collection_doc == doc.text)] doc_query = [t.id_document for t in Document.select().where(
diplo_query = [t.id_diplo_type for t in Diplo_type.select().where( Document.collection_doc == doc.text)]
Diplo_type.diplo_label == type_diplo)] diplo_query = [t.id_diplo_type for t in Diplo_type.select().where(
state_query = [t.id_state for t in State.select().where( Diplo_type.diplo_label == type_diplo)]
State.state_label == diplo_state)] state_query = [t.id_state for t in State.select().where(
actes.append({ State.state_label == diplo_state)]
"num_acte": counter, actes.append({
"filename": numb, "num_acte": counter,
"date_time": date_time, "filename": numb,
"date": date, "date_time": date_time,
"prod_place_acte": place_query[0], "date": date,
"analysis": analyse, "prod_place_acte": place_query[0],
"doc_acte": doc_query[0], "analysis": analyse,
"ref_acte": ref_acte, "doc_acte": doc_query[0],
"state_doc": state_query[0], "ref_acte": ref_acte,
"diplo_type_acte": diplo_query[0] "state_doc": state_query[0],
}) "diplo_type_acte": diplo_query[0]
})
for data in tqdm(actes, desc="Populating Actes..."): for data in tqdm(actes, desc="Populating Actes..."):
Acte.create(**data) Acte.create(**data)
def __find_indiv(folder: str, role: str)-> None: def __find_indiv(folder: str, role: str)-> None:
indiv_lst = [] indiv_lst = []
for acte in os.listdir(folder): for acte in os.listdir(folder):
soup = make_soup(os.path.join(folder, acte)) if acte.endswith(".xml"):
xml_indivs = soup.sourceDesc.find_all("listPerson", {"type": role}) soup = make_soup(os.path.join(folder, acte))
for xml_indiv in xml_indivs: xml_indivs = soup.sourceDesc.find_all("listPerson", {"type": role})
persons = xml_indiv.find_all("person") for xml_indiv in xml_indivs:
for person in persons: persons = xml_indiv.find_all("person")
indiv_lst.append(person.text.replace("\n", "")) for person in persons:
indiv_lst.append(person.text.replace("\n", ""))
return set(indiv_lst) return set(indiv_lst)
def __csv_indiv_infos(indiv_type): def __csv_indiv_infos(indiv_type):
@ -157,14 +161,6 @@ def __csv_indiv_infos(indiv_type):
lst_of_indiv = [row for row in actors_csv if row[1] == indiv_type] lst_of_indiv = [row for row in actors_csv if row[1] == indiv_type]
return lst_of_indiv return lst_of_indiv
def __compareList(l1,l2):
l1.sort()
l2.sort()
if(l1==l2):
return "Equal"
else:
return "Non equal"
def _create_indiv(list_csv): def _create_indiv(list_csv):
individuals = [{"name_indiv": actor[0], "role_indiv": actor[1], individuals = [{"name_indiv": actor[0], "role_indiv": actor[1],
"house_indiv": [t.id_house for t in House.select().where( "house_indiv": [t.id_house for t in House.select().where(
@ -187,51 +183,48 @@ def __grape_indiv(list_person, role: str):
def _create_involved_in(xml_file: str, folder: str): def _create_involved_in(folder: str):
princes_actes = [] princes_actes = []
for acte in os.listdir(folder): for acte in os.listdir(folder):
acte_q = [t.id_acte for t in Acte.select().where( if acte.endswith(".xml"):
Acte.filename == acte.replace(".xml", ""))] acte_q = [t.id_acte for t in Acte.select().where(
# print(acte, "==", acte_q[0]) Acte.filename == acte.replace(".xml", ""))]
soup = make_soup(os.path.join(folder, acte)) # print(acte, "==", acte_q[0])
for persons in soup.sourceDesc.find_all("listPerson", {"type": "prince"}): soup = make_soup(os.path.join(folder, acte))
for person_tag in persons.find_all("person"): for persons in soup.sourceDesc.find_all("listPerson", {"type": "prince"}):
person_text = person_tag.text.replace("\n", "") for person_tag in persons.find_all("person"):
if person_text != "None": person_text = person_tag.text.replace("\n", "")
prince_q = [t.id_indiv for t in Individual.select().where( if person_text != "None":
Individual.name_indiv == person_text)] prince_q = [t.id_indiv for t in Individual.select().where(
interv_q = [t.id_intev for t in Intervention_type.select().where( Individual.name_indiv == person_text)]
Intervention_type.interv_label == "producer")] interv_q = [t.id_intev for t in Intervention_type.select().where(
# print(person_text, "==", prince_q[0]) Intervention_type.interv_label == "producer")]
# print(person_text, "==", prince_q[0])
try: try:
prince_q[0] prince_q[0]
except IndexError: except IndexError:
print("!! name " + person_text + " (prince) not found in /app/static/csv/actors.csv") print("!! name " + person_text + " (prince) not found in /app/static/csv/actors.csv")
continue continue
princes_actes.append({"involved_in_acte": acte_q[0],
princes_actes.append({"involved_in_acte": acte_q[0], "involved_in_prince": prince_q[0],
"involved_in_prince": prince_q[0], "invol_in_interv": interv_q[0]})
"invol_in_interv": interv_q[0]}) for persons in soup.sourceDesc.find_all("listPerson", {"type": "signatory"}):
for persons in soup.sourceDesc.find_all("listPerson", {"type": "signatory"}): for person_tag in persons.find_all("person"):
for person_tag in persons.find_all("person"): person_text = person_tag.text.replace("\n", "")
person_text = person_tag.text.replace("\n", "") if person_text != "None":
if person_text != "None": prince_q = [t.id_indiv for t in Individual.select().where(
prince_q = [t.id_indiv for t in Individual.select().where( Individual.name_indiv == person_text)]
Individual.name_indiv == person_text)] interv_q = [t.id_intev for t in Intervention_type.select().where(
interv_q = [t.id_intev for t in Intervention_type.select().where( Intervention_type.interv_label == "signatory")]
Intervention_type.interv_label == "signatory")] # print(person_text, "==", prince_q[0])
# print(person_text, "==", prince_q[0]) try:
prince_q[0]
try: except IndexError:
prince_q[0] print("!! name " + person_text + " (signatory) not found in /app/static/csv/actors.csv")
except IndexError: continue
print("!! name " + person_text + " (signatory) not found in /app/static/csv/actors.csv") princes_actes.append({"involved_in_acte": acte_q[0],
continue "involved_in_prince": prince_q[0],
"invol_in_interv": interv_q[0]})
princes_actes.append({"involved_in_acte": acte_q[0],
"involved_in_prince": prince_q[0],
"invol_in_interv": interv_q[0]})
for data in tqdm(princes_actes, desc="Populating involved_in..."): for data in tqdm(princes_actes, desc="Populating involved_in..."):
Involved_in.create(**data) Involved_in.create(**data)
@ -239,10 +232,8 @@ def _create_involved_in(xml_file: str, folder: str):
@db_cli.command() @db_cli.command()
def init() -> None: def init() -> None:
"""Initialization of the database""" """Initialization of the database"""
xml = os.path.join(APPPATH, "static", "xml",
"Bourbon", "Brb_5_Charles_Ier"), ".xml"
xml_folder = os.path.join(APPPATH, "static", "xml", xml_folder = os.path.join(APPPATH, "static", "xml",
"Bourbon", "Brb_5_Charles_Ier") "Bourbon")
print("Dropping existing DB...") print("Dropping existing DB...")
db.drop_tables([Institution, State, House, Intervention_type, db.drop_tables([Institution, State, House, Intervention_type,
@ -256,14 +247,10 @@ def init() -> None:
_create_house(houses) _create_house(houses)
_create_interv_type(interventions) _create_interv_type(interventions)
_create_diplo_type(diplomatic_type) _create_diplo_type(diplomatic_type)
_create_produc_place(xml, xml_folder) _create_produc_place(xml_folder)
_create_doc(xml, xml_folder) _create_doc(xml_folder)
_create_acte(xml, xml_folder) _create_acte(xml_folder)
# check which names need to be add to the actors.csv # check which names need to be add to the actors.csv
actors = [*__csv_indiv_infos("secret"), *__csv_indiv_infos("prince")] actors = [*__csv_indiv_infos("secret"), *__csv_indiv_infos("prince")]
# names_in_csv = [actor[0] for actor in actors]
# names_in_xml = __find_indiv(xml_folder, "signatory")
# for name in [x for x in names_in_xml if x not in names_in_csv]:
# print("!! name " + name + " not found in /app/static/csv/actors.csv")
_create_indiv(actors) _create_indiv(actors)
_create_involved_in(xml, xml_folder) _create_involved_in(xml_folder)

Loading…
Cancel
Save