You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

69 lines
1.9 KiB
Python

#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
Authors : Jean-Damien Généro
Affiliation : French National Center for Scientific Research (CNRS)
Assigned at the Centre de recherches historiques (CRH, UMR 8558)
Date : 2022-10-11
Update :
"""
from bs4 import BeautifulSoup
from peewee import *
from tqdm import tqdm
from modeles.princes_db_tables import db, Institution, State, Place, Diplo_type, Document, Acte
from data.institution_data import institution
from data.state_data import state
from data.diplo_type_data import diplomatic_type
def make_soup(file):
"""open a xml file and return a BeautifulSoup object"""
with open(file, 'r', encoding="utf-8") as opening:
xml = BeautifulSoup(opening, 'xml')
return xml
def _create_institution(data_lst: list)-> None:
for data in tqdm(data_lst, desc="Populating Institution..."):
Institution.create(**data)
def _create_state(data_lst: list)-> None:
for data in tqdm(data_lst, desc="Populating State..."):
State.create(**data)
def _create_diplo_type(data_lst: list)-> None:
for data in tqdm(data_lst, desc="Populating Diplo_type..."):
Diplo_type.create(**data)
def _create_place(xml_file: str)-> None:
places_xtract = []
production_places = []
soup = make_soup(xml_file)
for div in soup.find_all('div', {'xml:id': True}):
for place in div.find_all('placeName', {'type': 'production_place'}):
places_xtract.append(place.text)
production_places = [{"placename": xtraction} for xtraction in set(places_xtract)]
for data in tqdm(production_places, desc="Populating Place..."):
Place.create(**data)
def init():
db.connect()
print("Dropping existing DB...")
db.drop_tables([Institution, State, Place, Diplo_type, Document, Acte])
print("Re-creating schema...")
db.create_tables([Institution, State, Place, Diplo_type, Document, Acte])
_create_institution(institution)
_create_state(state)
_create_diplo_type(diplomatic_type)
_create_place(xml)
xml = "../bourbon-latex/charles-actes-latex.xml"
init()