#!/usr/bin/python # -*- coding: UTF-8 -*- """ Authors : Jean-Damien Généro Affiliation : French National Center for Scientific Research (CNRS) Assigned at the Centre de recherches historiques (CRH, UMR 8558) Date : 2022-10-11 Update : """ import os from bs4 import BeautifulSoup from collections import OrderedDict from peewee import * from tqdm import tqdm from modeles.princes_db_tables import db, Institution, State, Production_place, Diplo_type, Document, Acte from data.institution_data import institution from data.state_data import state from data.diplo_type_data import diplomatic_type def make_soup(file): """open a xml file and return a BeautifulSoup object""" with open(file, 'r', encoding="utf-8") as opening: xml = BeautifulSoup(opening, 'xml') return xml def _create_institution(data_lst: list)-> None: for data in tqdm(data_lst, desc="Populating Institution..."): Institution.create(**data) def _create_state(data_lst: list)-> None: for data in tqdm(data_lst, desc="Populating State..."): State.create(**data) def _create_diplo_type(data_lst: list)-> None: for data in tqdm(data_lst, desc="Populating Diplo_type..."): Diplo_type.create(**data) def _create_produc_place(xml_file: str, folder: str)-> None: places_xtract = [] production_places = [] for acte in os.listdir(folder): soup = make_soup(os.path.join(folder, acte)) for place in soup.find('placeName', {'type': 'production_place'}): places_xtract.append(place) production_places = [{"placename": xtraction} for xtraction in set(places_xtract)] for data in tqdm(production_places, desc="Populating Place..."): Production_place.create(**data) def _create_doc(xml_file: str, folder: str)-> None: details_doc = {} infos_doc = [] for acte in os.listdir(folder): soup = make_soup(os.path.join(folder, acte)) # doc_id = soup.TEI["xml:id"] inst_doc = soup.repository nb_doc_1 = soup.msIdentifier.find_all("idno", {"n": "1"}) diplo_state = soup.body.div["type"] # details_doc.append([inst_doc, nb_doc_1, diplo_state]) details_doc[nb_doc_1]= [inst_doc, diplo_state] print(details_doc) """ for item in details_doc: inst_query = [t.id_institution for t in Institution.select().where( Institution.full_label == item[0].text)] state_query = [t.id_state for t in State.select().where( State.state_label == item[2])] if len(item[1]) > 0: # if there is a collection infos_doc.append({ "inst_doc": inst_query[0], "collection_doc": item[1][0].text, "state_doc": state_query[0], }) else: # if collection is missing infos_doc.append({ "inst_doc": inst_query[0], "collection_doc": "", "state_doc": state_query[0], }) for data in tqdm(infos_doc, desc="Populating Document..."): print(data) Document.create(**data) """ def init(): db.connect() print("Dropping existing DB...") db.drop_tables([Institution, State, Production_place, Diplo_type, Document, Acte]) print("Re-creating schema...") db.create_tables([Institution, State, Production_place, Diplo_type, Document, Acte]) _create_institution(institution) _create_state(state) _create_diplo_type(diplomatic_type) _create_produc_place(xml, "./static/xml/Bourbon/Brb_5_Charles_Ier") print("ok") _create_doc(xml, "./static/xml/Bourbon/Brb_5_Charles_Ier") xml = "../bourbon-latex/charles-actes-latex.xml" # init() _create_doc(xml, "./static/xml/Bourbon/Brb_5_Charles_Ier")