You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
69 lines
2.1 KiB
Python
69 lines
2.1 KiB
Python
#!/usr/bin/python
|
|
# -*- coding: UTF-8 -*-
|
|
|
|
"""
|
|
Authors : Jean-Damien Généro
|
|
Affiliation : French National Center for Scientific Research (CNRS)
|
|
Assigned at the Centre de recherches historiques (CRH, UMR 8558)
|
|
Date : 2022-10-11
|
|
Update :
|
|
"""
|
|
|
|
import os
|
|
from bs4 import BeautifulSoup
|
|
from peewee import *
|
|
from tqdm import tqdm
|
|
|
|
from modeles.princes_db_tables import db, Institution, State, Production_place, Diplo_type, Document, Acte
|
|
|
|
from data.institution_data import institution
|
|
from data.state_data import state
|
|
from data.diplo_type_data import diplomatic_type
|
|
|
|
|
|
def make_soup(file):
|
|
"""open a xml file and return a BeautifulSoup object"""
|
|
with open(file, 'r', encoding="utf-8") as opening:
|
|
xml = BeautifulSoup(opening, 'xml')
|
|
return xml
|
|
|
|
|
|
def _create_institution(data_lst: list)-> None:
|
|
for data in tqdm(data_lst, desc="Populating Institution..."):
|
|
Institution.create(**data)
|
|
|
|
def _create_state(data_lst: list)-> None:
|
|
for data in tqdm(data_lst, desc="Populating State..."):
|
|
State.create(**data)
|
|
|
|
def _create_diplo_type(data_lst: list)-> None:
|
|
for data in tqdm(data_lst, desc="Populating Diplo_type..."):
|
|
Diplo_type.create(**data)
|
|
|
|
def _create_produc_place(xml_file: str, folder: str)-> None:
|
|
places_xtract = []
|
|
production_places = []
|
|
for acte in os.listdir(folder):
|
|
soup = make_soup(os.path.join(folder, acte))
|
|
for place in soup.find('placeName', {'type': 'production_place'}):
|
|
places_xtract.append(place)
|
|
production_places = [{"placename": xtraction} for xtraction in set(places_xtract)]
|
|
for data in tqdm(production_places, desc="Populating Place..."):
|
|
Production_place.create(**data)
|
|
|
|
|
|
def init():
|
|
db.connect()
|
|
print("Dropping existing DB...")
|
|
db.drop_tables([Institution, State, Production_place, Diplo_type, Document, Acte])
|
|
print("Re-creating schema...")
|
|
db.create_tables([Institution, State, Production_place, Diplo_type, Document, Acte])
|
|
_create_institution(institution)
|
|
_create_state(state)
|
|
_create_diplo_type(diplomatic_type)
|
|
_create_produc_place(xml, "./static/xml/Bourbon/Brb_5_Charles_Ier")
|
|
|
|
xml = "../bourbon-latex/charles-actes-latex.xml"
|
|
|
|
init()
|