renanmed: /app/db_maker.py /app/cmd/db.py
parent
34c2c65010
commit
04ee817586
@ -1,217 +0,0 @@
|
|||||||
#!/usr/bin/python
|
|
||||||
# -*- coding: UTF-8 -*-
|
|
||||||
|
|
||||||
"""
|
|
||||||
Authors : Jean-Damien Généro
|
|
||||||
Affiliation : French National Center for Scientific Research (CNRS)
|
|
||||||
Assigned at the Centre de recherches historiques (CRH, UMR 8558)
|
|
||||||
Date : 2022-10-11
|
|
||||||
Update : 2022-10-13
|
|
||||||
"""
|
|
||||||
|
|
||||||
import csv
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from peewee import *
|
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
from modeles.princes_db_tables import db, Institution, State, Production_place, Diplo_type, Document, Acte, Individual, Duke, Produced_by
|
|
||||||
|
|
||||||
from data.institution_data import institution
|
|
||||||
from data.state_data import state
|
|
||||||
from data.diplo_type_data import diplomatic_type
|
|
||||||
|
|
||||||
|
|
||||||
def make_soup(file):
|
|
||||||
"""open a xml file and return a BeautifulSoup object"""
|
|
||||||
with open(file, 'r', encoding="utf-8") as opening:
|
|
||||||
xml = BeautifulSoup(opening, 'xml')
|
|
||||||
return xml
|
|
||||||
|
|
||||||
|
|
||||||
def _create_institution(data_lst: list)-> None:
|
|
||||||
"""create institution table"""
|
|
||||||
for data in tqdm(data_lst, desc="Populating Institution..."):
|
|
||||||
Institution.create(**data)
|
|
||||||
|
|
||||||
def _create_state(data_lst: list)-> None:
|
|
||||||
"""create state table"""
|
|
||||||
for data in tqdm(data_lst, desc="Populating State..."):
|
|
||||||
State.create(**data)
|
|
||||||
|
|
||||||
def _create_diplo_type(data_lst: list)-> None:
|
|
||||||
"""create diplo type table"""
|
|
||||||
for data in tqdm(data_lst, desc="Populating Diplo_type..."):
|
|
||||||
Diplo_type.create(**data)
|
|
||||||
|
|
||||||
def _create_produc_place(xml_file: str, folder: str)-> None:
|
|
||||||
"""create production place table"""
|
|
||||||
places_xtract = []
|
|
||||||
production_places = []
|
|
||||||
for acte in os.listdir(folder):
|
|
||||||
soup = make_soup(os.path.join(folder, acte))
|
|
||||||
for place in soup.find('placeName', {'type': 'production_place'}):
|
|
||||||
places_xtract.append(place)
|
|
||||||
production_places = [{"placename": xtraction} for xtraction in set(places_xtract)]
|
|
||||||
for data in tqdm(production_places, desc="Populating Place..."):
|
|
||||||
Production_place.create(**data)
|
|
||||||
|
|
||||||
def _create_doc(xml_file: str, folder: str)-> None:
|
|
||||||
"""create doc table"""
|
|
||||||
details_doc = []
|
|
||||||
infos_doc = []
|
|
||||||
# 1/ get repository (doc archives) + doc collection in a list
|
|
||||||
for acte in os.listdir(folder):
|
|
||||||
soup = make_soup(os.path.join(folder, acte))
|
|
||||||
inst_doc = soup.repository.text
|
|
||||||
nb_doc_1 = soup.msIdentifier.find_all("idno", {"n": "1"})[0].text
|
|
||||||
details_doc.append(inst_doc + " == " + nb_doc_1)
|
|
||||||
# 2/ make a query on table Inst to get inst id
|
|
||||||
# then pretiffy data for the table Doc
|
|
||||||
for doc in set(details_doc):
|
|
||||||
doc_archives = re.sub('(.+) == .+', '\\1', doc)
|
|
||||||
doc_cote = re.sub('.+ == (.+)', '\\1', doc)
|
|
||||||
inst_query = [t.id_institution for t in Institution.select().where(
|
|
||||||
Institution.full_label == doc_archives)]
|
|
||||||
infos_doc.append({
|
|
||||||
"inst_doc": inst_query[0],
|
|
||||||
"collection_doc": doc_cote,
|
|
||||||
})
|
|
||||||
# 3/ create the table
|
|
||||||
for data in tqdm(infos_doc, desc="Populating Document..."):
|
|
||||||
Document.create(**data)
|
|
||||||
|
|
||||||
def _create_acte(xml_file: str, folder: str)-> None:
|
|
||||||
actes = []
|
|
||||||
for acte in os.listdir(folder):
|
|
||||||
soup = make_soup(os.path.join(folder, acte))
|
|
||||||
numb = soup.TEI["xml:id"]
|
|
||||||
date_time = soup.msItem.docDate["when"]
|
|
||||||
date = soup.msItem.docDate.text
|
|
||||||
analyse = soup.abstract.p.text
|
|
||||||
ref = soup.msIdentifier.find_all("idno", {"n": "2"})
|
|
||||||
if len(ref) > 0:
|
|
||||||
ref_acte = ref[0].text
|
|
||||||
else:
|
|
||||||
ref_acte = "NS"
|
|
||||||
prod_place = soup.find_all("placeName", {"type": "production_place"})[0].text
|
|
||||||
doc = soup.msIdentifier.find_all("idno", {"n": "1"})[0]
|
|
||||||
type_diplo = soup.body.div["subtype"]
|
|
||||||
diplo_state = soup.body.div["type"]
|
|
||||||
place_query = [t.id_place for t in Production_place.select().where(
|
|
||||||
Production_place.placename == prod_place)]
|
|
||||||
doc_query = [t.id_document for t in Document.select().where(
|
|
||||||
Document.collection_doc == doc.text)]
|
|
||||||
diplo_query = [t.id_diplo_type for t in Diplo_type.select().where(
|
|
||||||
Diplo_type.diplo_label == type_diplo)]
|
|
||||||
state_query = [t.id_state for t in State.select().where(
|
|
||||||
State.state_label == diplo_state)]
|
|
||||||
actes.append({
|
|
||||||
"numb_acte": numb,
|
|
||||||
"date_time": date_time,
|
|
||||||
"date": date,
|
|
||||||
"prod_place_acte": place_query[0],
|
|
||||||
"analysis": analyse,
|
|
||||||
"doc_acte": doc_query[0],
|
|
||||||
"ref_acte": ref_acte,
|
|
||||||
"state_doc": state_query[0],
|
|
||||||
"diplo_type_acte": diplo_query[0]
|
|
||||||
})
|
|
||||||
for data in tqdm(actes, desc="Populating Actes..."):
|
|
||||||
Acte.create(**data)
|
|
||||||
|
|
||||||
|
|
||||||
def __find_indiv(xml_soup, role: str, indiv_lst: list)-> None:
|
|
||||||
princes = xml_soup.sourceDesc.find_all("listPerson", {"type": role})
|
|
||||||
for prince in princes:
|
|
||||||
dukes = prince.find_all("person")
|
|
||||||
for duke in dukes:
|
|
||||||
indiv_lst.append(duke.text.replace("\n", ""))
|
|
||||||
"""
|
|
||||||
def _create_individual(xml_file: str, folder: str)-> None:
|
|
||||||
indiv_prince = []
|
|
||||||
indiv_secret = []
|
|
||||||
for acte in os.listdir(folder):
|
|
||||||
soup = make_soup(os.path.join(folder, acte))
|
|
||||||
__find_indiv(soup, "prince", indiv_prince)
|
|
||||||
__find_indiv(soup, "signatory", indiv_secret)
|
|
||||||
print(set(indiv_secret))
|
|
||||||
print(set(indiv_prince))
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __indiv_infos(indiv_type):
|
|
||||||
with open("./static/csv/actors.csv", 'r', encoding="utf-8") as opening:
|
|
||||||
actors_csv = csv.reader(opening, delimiter=";")
|
|
||||||
next(actors_csv, None)
|
|
||||||
lst_of_indiv = [row for row in actors_csv if row[1] == indiv_type]
|
|
||||||
return lst_of_indiv
|
|
||||||
|
|
||||||
def _create_indiv():
|
|
||||||
actors = [*__indiv_infos("secret"), *__indiv_infos("prince")]
|
|
||||||
individuals = [{"name_indiv": actor[0], "role_indiv": actor[1]}
|
|
||||||
for actor in actors]
|
|
||||||
for data in tqdm(individuals, desc="Populating Individual..."):
|
|
||||||
Individual.create(**data)
|
|
||||||
|
|
||||||
def _create_duke():
|
|
||||||
dukes = []
|
|
||||||
for info in __indiv_infos("prince"):
|
|
||||||
indiv_query = [t.id_indiv for t in Individual.select().where(
|
|
||||||
Individual.name_indiv == info[0])]
|
|
||||||
dukes.append({"house": info[2], "indiv_duke": indiv_query[0],
|
|
||||||
"birth": info[3], "reign": info[4], "death": info[4]})
|
|
||||||
for data in tqdm(dukes, desc="Populating Duke..."):
|
|
||||||
Duke.create(**data)
|
|
||||||
|
|
||||||
def _create_produced_by(xml_file: str, folder: str):
|
|
||||||
princes_actes = []
|
|
||||||
for acte in os.listdir(folder):
|
|
||||||
acte_q = [t.id_acte for t in Acte.select().where(
|
|
||||||
Acte.numb_acte == acte.replace(".xml", ""))]
|
|
||||||
# print(acte, "==", acte_q[0])
|
|
||||||
soup = make_soup(os.path.join(folder, acte))
|
|
||||||
princes = soup.sourceDesc.find_all("listPerson", {"type": "prince"})
|
|
||||||
for prince in princes:
|
|
||||||
dukes = prince.find_all("person")
|
|
||||||
for duke in dukes:
|
|
||||||
prince = duke.text.replace("\n", "")
|
|
||||||
prince_q = [t.id_indiv for t in Individual.select().where(
|
|
||||||
Individual.name_indiv == duke.text.replace("\n", ""))]
|
|
||||||
duke_q = [t.id_duke for t in Duke.select().where(
|
|
||||||
Duke.indiv_duke == prince_q[0])]
|
|
||||||
# print(prince, "==", prince_q[0], "==", duke_q[0])
|
|
||||||
princes_actes.append({"produced_by_acte": acte_q[0],
|
|
||||||
"produced_by_prince": duke_q[0]})
|
|
||||||
for data in tqdm(princes_actes, desc="Populating Produced_by..."):
|
|
||||||
Produced_by.create(**data)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def init():
|
|
||||||
"""initializing db"""
|
|
||||||
db.connect()
|
|
||||||
print("Dropping existing DB...")
|
|
||||||
db.drop_tables([Institution, State, Production_place,
|
|
||||||
Diplo_type, Document, Acte, Individual, Duke,
|
|
||||||
Produced_by])
|
|
||||||
print("Re-creating schema...")
|
|
||||||
db.create_tables([Institution, State, Production_place,
|
|
||||||
Diplo_type, Document, Acte, Individual, Duke,
|
|
||||||
Produced_by])
|
|
||||||
_create_institution(institution)
|
|
||||||
_create_state(state)
|
|
||||||
_create_diplo_type(diplomatic_type)
|
|
||||||
_create_produc_place(xml, xml_folder)
|
|
||||||
_create_doc(xml, xml_folder)
|
|
||||||
_create_acte(xml, xml_folder)
|
|
||||||
_create_indiv()
|
|
||||||
_create_duke()
|
|
||||||
_create_produced_by(xml, xml_folder)
|
|
||||||
|
|
||||||
xml = "../bourbon-latex/charles-actes-latex.xml"
|
|
||||||
xml_folder = "./static/xml/Bourbon/Brb_5_Charles_Ier"
|
|
||||||
|
|
||||||
init()
|
|
||||||
# _create_individual(xml, xml_folder)
|
|
||||||
# _create_produced_by(xml, xml_folder)
|
|
||||||
Loading…
Reference in New Issue