From 04ee8175866ea5dc130a1bcc7f30e1ddfe77852c Mon Sep 17 00:00:00 2001 From: jgenero Date: Sun, 16 Oct 2022 12:20:57 +0200 Subject: [PATCH] renanmed: /app/db_maker.py /app/cmd/db.py --- app/db_maker.py | 217 ------------------------------------------------ 1 file changed, 217 deletions(-) delete mode 100644 app/db_maker.py diff --git a/app/db_maker.py b/app/db_maker.py deleted file mode 100644 index 046a1a2..0000000 --- a/app/db_maker.py +++ /dev/null @@ -1,217 +0,0 @@ -#!/usr/bin/python -# -*- coding: UTF-8 -*- - -""" -Authors : Jean-Damien Généro -Affiliation : French National Center for Scientific Research (CNRS) -Assigned at the Centre de recherches historiques (CRH, UMR 8558) -Date : 2022-10-11 -Update : 2022-10-13 -""" - -import csv -import os -import re -from bs4 import BeautifulSoup -from peewee import * -from tqdm import tqdm - -from modeles.princes_db_tables import db, Institution, State, Production_place, Diplo_type, Document, Acte, Individual, Duke, Produced_by - -from data.institution_data import institution -from data.state_data import state -from data.diplo_type_data import diplomatic_type - - -def make_soup(file): - """open a xml file and return a BeautifulSoup object""" - with open(file, 'r', encoding="utf-8") as opening: - xml = BeautifulSoup(opening, 'xml') - return xml - - -def _create_institution(data_lst: list)-> None: - """create institution table""" - for data in tqdm(data_lst, desc="Populating Institution..."): - Institution.create(**data) - -def _create_state(data_lst: list)-> None: - """create state table""" - for data in tqdm(data_lst, desc="Populating State..."): - State.create(**data) - -def _create_diplo_type(data_lst: list)-> None: - """create diplo type table""" - for data in tqdm(data_lst, desc="Populating Diplo_type..."): - Diplo_type.create(**data) - -def _create_produc_place(xml_file: str, folder: str)-> None: - """create production place table""" - places_xtract = [] - production_places = [] - for acte in os.listdir(folder): - soup = make_soup(os.path.join(folder, acte)) - for place in soup.find('placeName', {'type': 'production_place'}): - places_xtract.append(place) - production_places = [{"placename": xtraction} for xtraction in set(places_xtract)] - for data in tqdm(production_places, desc="Populating Place..."): - Production_place.create(**data) - -def _create_doc(xml_file: str, folder: str)-> None: - """create doc table""" - details_doc = [] - infos_doc = [] - # 1/ get repository (doc archives) + doc collection in a list - for acte in os.listdir(folder): - soup = make_soup(os.path.join(folder, acte)) - inst_doc = soup.repository.text - nb_doc_1 = soup.msIdentifier.find_all("idno", {"n": "1"})[0].text - details_doc.append(inst_doc + " == " + nb_doc_1) - # 2/ make a query on table Inst to get inst id - # then pretiffy data for the table Doc - for doc in set(details_doc): - doc_archives = re.sub('(.+) == .+', '\\1', doc) - doc_cote = re.sub('.+ == (.+)', '\\1', doc) - inst_query = [t.id_institution for t in Institution.select().where( - Institution.full_label == doc_archives)] - infos_doc.append({ - "inst_doc": inst_query[0], - "collection_doc": doc_cote, - }) - # 3/ create the table - for data in tqdm(infos_doc, desc="Populating Document..."): - Document.create(**data) - -def _create_acte(xml_file: str, folder: str)-> None: - actes = [] - for acte in os.listdir(folder): - soup = make_soup(os.path.join(folder, acte)) - numb = soup.TEI["xml:id"] - date_time = soup.msItem.docDate["when"] - date = soup.msItem.docDate.text - analyse = soup.abstract.p.text - ref = soup.msIdentifier.find_all("idno", {"n": "2"}) - if len(ref) > 0: - ref_acte = ref[0].text - else: - ref_acte = "NS" - prod_place = soup.find_all("placeName", {"type": "production_place"})[0].text - doc = soup.msIdentifier.find_all("idno", {"n": "1"})[0] - type_diplo = soup.body.div["subtype"] - diplo_state = soup.body.div["type"] - place_query = [t.id_place for t in Production_place.select().where( - Production_place.placename == prod_place)] - doc_query = [t.id_document for t in Document.select().where( - Document.collection_doc == doc.text)] - diplo_query = [t.id_diplo_type for t in Diplo_type.select().where( - Diplo_type.diplo_label == type_diplo)] - state_query = [t.id_state for t in State.select().where( - State.state_label == diplo_state)] - actes.append({ - "numb_acte": numb, - "date_time": date_time, - "date": date, - "prod_place_acte": place_query[0], - "analysis": analyse, - "doc_acte": doc_query[0], - "ref_acte": ref_acte, - "state_doc": state_query[0], - "diplo_type_acte": diplo_query[0] - }) - for data in tqdm(actes, desc="Populating Actes..."): - Acte.create(**data) - - -def __find_indiv(xml_soup, role: str, indiv_lst: list)-> None: - princes = xml_soup.sourceDesc.find_all("listPerson", {"type": role}) - for prince in princes: - dukes = prince.find_all("person") - for duke in dukes: - indiv_lst.append(duke.text.replace("\n", "")) -""" -def _create_individual(xml_file: str, folder: str)-> None: - indiv_prince = [] - indiv_secret = [] - for acte in os.listdir(folder): - soup = make_soup(os.path.join(folder, acte)) - __find_indiv(soup, "prince", indiv_prince) - __find_indiv(soup, "signatory", indiv_secret) - print(set(indiv_secret)) - print(set(indiv_prince)) -""" - -def __indiv_infos(indiv_type): - with open("./static/csv/actors.csv", 'r', encoding="utf-8") as opening: - actors_csv = csv.reader(opening, delimiter=";") - next(actors_csv, None) - lst_of_indiv = [row for row in actors_csv if row[1] == indiv_type] - return lst_of_indiv - -def _create_indiv(): - actors = [*__indiv_infos("secret"), *__indiv_infos("prince")] - individuals = [{"name_indiv": actor[0], "role_indiv": actor[1]} - for actor in actors] - for data in tqdm(individuals, desc="Populating Individual..."): - Individual.create(**data) - -def _create_duke(): - dukes = [] - for info in __indiv_infos("prince"): - indiv_query = [t.id_indiv for t in Individual.select().where( - Individual.name_indiv == info[0])] - dukes.append({"house": info[2], "indiv_duke": indiv_query[0], - "birth": info[3], "reign": info[4], "death": info[4]}) - for data in tqdm(dukes, desc="Populating Duke..."): - Duke.create(**data) - -def _create_produced_by(xml_file: str, folder: str): - princes_actes = [] - for acte in os.listdir(folder): - acte_q = [t.id_acte for t in Acte.select().where( - Acte.numb_acte == acte.replace(".xml", ""))] - # print(acte, "==", acte_q[0]) - soup = make_soup(os.path.join(folder, acte)) - princes = soup.sourceDesc.find_all("listPerson", {"type": "prince"}) - for prince in princes: - dukes = prince.find_all("person") - for duke in dukes: - prince = duke.text.replace("\n", "") - prince_q = [t.id_indiv for t in Individual.select().where( - Individual.name_indiv == duke.text.replace("\n", ""))] - duke_q = [t.id_duke for t in Duke.select().where( - Duke.indiv_duke == prince_q[0])] - # print(prince, "==", prince_q[0], "==", duke_q[0]) - princes_actes.append({"produced_by_acte": acte_q[0], - "produced_by_prince": duke_q[0]}) - for data in tqdm(princes_actes, desc="Populating Produced_by..."): - Produced_by.create(**data) - - - -def init(): - """initializing db""" - db.connect() - print("Dropping existing DB...") - db.drop_tables([Institution, State, Production_place, - Diplo_type, Document, Acte, Individual, Duke, - Produced_by]) - print("Re-creating schema...") - db.create_tables([Institution, State, Production_place, - Diplo_type, Document, Acte, Individual, Duke, - Produced_by]) - _create_institution(institution) - _create_state(state) - _create_diplo_type(diplomatic_type) - _create_produc_place(xml, xml_folder) - _create_doc(xml, xml_folder) - _create_acte(xml, xml_folder) - _create_indiv() - _create_duke() - _create_produced_by(xml, xml_folder) - -xml = "../bourbon-latex/charles-actes-latex.xml" -xml_folder = "./static/xml/Bourbon/Brb_5_Charles_Ier" - -init() -# _create_individual(xml, xml_folder) -# _create_produced_by(xml, xml_folder)