create doc table

main
jgenero 3 years ago
parent 72551c1cf2
commit 7df98c9381

@ -6,12 +6,12 @@ Authors : Jean-Damien Généro
Affiliation : French National Center for Scientific Research (CNRS) Affiliation : French National Center for Scientific Research (CNRS)
Assigned at the Centre de recherches historiques (CRH, UMR 8558) Assigned at the Centre de recherches historiques (CRH, UMR 8558)
Date : 2022-10-11 Date : 2022-10-11
Update : Update : 2022-10-13
""" """
import os import os
import re
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from collections import OrderedDict
from peewee import * from peewee import *
from tqdm import tqdm from tqdm import tqdm
@ -30,18 +30,22 @@ def make_soup(file):
def _create_institution(data_lst: list)-> None: def _create_institution(data_lst: list)-> None:
"""create institution table"""
for data in tqdm(data_lst, desc="Populating Institution..."): for data in tqdm(data_lst, desc="Populating Institution..."):
Institution.create(**data) Institution.create(**data)
def _create_state(data_lst: list)-> None: def _create_state(data_lst: list)-> None:
"""create state table"""
for data in tqdm(data_lst, desc="Populating State..."): for data in tqdm(data_lst, desc="Populating State..."):
State.create(**data) State.create(**data)
def _create_diplo_type(data_lst: list)-> None: def _create_diplo_type(data_lst: list)-> None:
"""create diplo type table"""
for data in tqdm(data_lst, desc="Populating Diplo_type..."): for data in tqdm(data_lst, desc="Populating Diplo_type..."):
Diplo_type.create(**data) Diplo_type.create(**data)
def _create_produc_place(xml_file: str, folder: str)-> None: def _create_produc_place(xml_file: str, folder: str)-> None:
"""create production place table"""
places_xtract = [] places_xtract = []
production_places = [] production_places = []
for acte in os.listdir(folder): for acte in os.listdir(folder):
@ -53,42 +57,32 @@ def _create_produc_place(xml_file: str, folder: str)-> None:
Production_place.create(**data) Production_place.create(**data)
def _create_doc(xml_file: str, folder: str)-> None: def _create_doc(xml_file: str, folder: str)-> None:
details_doc = {} """create doc table"""
details_doc = []
infos_doc = [] infos_doc = []
# 1/ get repository (doc archives) + doc collection in a list
for acte in os.listdir(folder): for acte in os.listdir(folder):
soup = make_soup(os.path.join(folder, acte)) soup = make_soup(os.path.join(folder, acte))
# doc_id = soup.TEI["xml:id"] inst_doc = soup.repository.text
inst_doc = soup.repository nb_doc_1 = soup.msIdentifier.find_all("idno", {"n": "1"})[0].text
nb_doc_1 = soup.msIdentifier.find_all("idno", {"n": "1"}) details_doc.append(inst_doc + " == " + nb_doc_1)
diplo_state = soup.body.div["type"] # 2/ make a query on table Inst to get inst id
# details_doc.append([inst_doc, nb_doc_1, diplo_state]) # then pretiffy data for the table Doc
details_doc[nb_doc_1]= [inst_doc, diplo_state] for doc in set(details_doc):
print(details_doc) doc_archives = re.sub('(.+) == .+', '\\1', doc)
""" doc_cote = re.sub('.+ == (.+)', '\\1', doc)
for item in details_doc:
inst_query = [t.id_institution for t in Institution.select().where( inst_query = [t.id_institution for t in Institution.select().where(
Institution.full_label == item[0].text)] Institution.full_label == doc_archives)]
state_query = [t.id_state for t in State.select().where( infos_doc.append({
State.state_label == item[2])]
if len(item[1]) > 0:
# if there is a collection
infos_doc.append({
"inst_doc": inst_query[0], "inst_doc": inst_query[0],
"collection_doc": item[1][0].text, "collection_doc": doc_cote,
"state_doc": state_query[0],
})
else:
# if collection is missing
infos_doc.append({
"inst_doc": inst_query[0],
"collection_doc": "",
"state_doc": state_query[0],
}) })
# 3/ create the table
for data in tqdm(infos_doc, desc="Populating Document..."): for data in tqdm(infos_doc, desc="Populating Document..."):
print(data)
Document.create(**data) Document.create(**data)
"""
def init(): def init():
"""initializing db"""
db.connect() db.connect()
print("Dropping existing DB...") print("Dropping existing DB...")
db.drop_tables([Institution, State, Production_place, Diplo_type, Document, Acte]) db.drop_tables([Institution, State, Production_place, Diplo_type, Document, Acte])
@ -98,11 +92,11 @@ def init():
_create_state(state) _create_state(state)
_create_diplo_type(diplomatic_type) _create_diplo_type(diplomatic_type)
_create_produc_place(xml, "./static/xml/Bourbon/Brb_5_Charles_Ier") _create_produc_place(xml, "./static/xml/Bourbon/Brb_5_Charles_Ier")
print("ok")
_create_doc(xml, "./static/xml/Bourbon/Brb_5_Charles_Ier") _create_doc(xml, "./static/xml/Bourbon/Brb_5_Charles_Ier")
xml = "../bourbon-latex/charles-actes-latex.xml" xml = "../bourbon-latex/charles-actes-latex.xml"
# init() init()
_create_doc(xml, "./static/xml/Bourbon/Brb_5_Charles_Ier") # diplo_state = soup.body.div["type"]
# state_query = [t.id_state for t in State.select().where(State.state_label == item[2])]
Loading…
Cancel
Save