|
|
|
@ -72,7 +72,7 @@ def _create_produc_place(folder: str)-> None:
|
|
|
|
# and add to list places_xtract
|
|
|
|
# and add to list places_xtract
|
|
|
|
for place in soup.find('placeName', {'type': 'production_place'}):
|
|
|
|
for place in soup.find('placeName', {'type': 'production_place'}):
|
|
|
|
places_xtract.append(place)
|
|
|
|
places_xtract.append(place)
|
|
|
|
# made data list (production_places) by iterating on set(places_xtract)
|
|
|
|
# make data list (production_places) by iterating on set(places_xtract)
|
|
|
|
production_places = [{"placename": xtraction} for xtraction in set(places_xtract)]
|
|
|
|
production_places = [{"placename": xtraction} for xtraction in set(places_xtract)]
|
|
|
|
for data in tqdm(production_places, desc="Populating Place..."):
|
|
|
|
for data in tqdm(production_places, desc="Populating Place..."):
|
|
|
|
Production_place.create(**data)
|
|
|
|
Production_place.create(**data)
|
|
|
|
@ -81,18 +81,22 @@ def _create_doc(folder: str)-> None:
|
|
|
|
"""create doc table"""
|
|
|
|
"""create doc table"""
|
|
|
|
details_doc = []
|
|
|
|
details_doc = []
|
|
|
|
infos_doc = []
|
|
|
|
infos_doc = []
|
|
|
|
# 1/ get repository (doc archives) + doc collection in a list
|
|
|
|
# 1/ get repository (doc Archives) + doc number in a list
|
|
|
|
for acte in os.listdir(folder):
|
|
|
|
for acte in os.listdir(folder):
|
|
|
|
if acte.endswith(".xml"):
|
|
|
|
if acte.endswith(".xml"):
|
|
|
|
soup = make_soup(os.path.join(folder, acte))
|
|
|
|
soup = make_soup(os.path.join(folder, acte))
|
|
|
|
inst_doc = soup.repository.text
|
|
|
|
inst_doc = soup.repository.text # //sourceDesc//msIdentifier/repository
|
|
|
|
|
|
|
|
# //sourceDesc//msIdentifier/idno[@n='1'] is always the
|
|
|
|
|
|
|
|
# archive box or manuscript collection id
|
|
|
|
|
|
|
|
# (//sourceDesc//msIdentifier/idno[@n='1'] is the doc id inside
|
|
|
|
|
|
|
|
# the box or the page number inside a manuscript)
|
|
|
|
nb_doc_1 = soup.msIdentifier.find_all("idno", {"n": "1"})[0].text
|
|
|
|
nb_doc_1 = soup.msIdentifier.find_all("idno", {"n": "1"})[0].text
|
|
|
|
details_doc.append(inst_doc + " == " + nb_doc_1)
|
|
|
|
details_doc.append(inst_doc + " == " + nb_doc_1)
|
|
|
|
# 2/ make a query on table Inst to get inst id
|
|
|
|
# 2/ make the data list
|
|
|
|
# then pretiffy data for the table Doc
|
|
|
|
|
|
|
|
for doc in set(details_doc):
|
|
|
|
for doc in set(details_doc):
|
|
|
|
doc_archives = re.sub('(.+) == .+', '\\1', doc)
|
|
|
|
doc_archives = re.sub('(.+) == .+', '\\1', doc) # only the Archives
|
|
|
|
doc_cote = re.sub('.+ == (.+)', '\\1', doc)
|
|
|
|
doc_cote = re.sub('.+ == (.+)', '\\1', doc) # only the box number (or ms id)
|
|
|
|
|
|
|
|
# query on table institution with <doc_archives> to get the correspondant institution key
|
|
|
|
inst_query = [t.id_institution for t in Institution.select().where(
|
|
|
|
inst_query = [t.id_institution for t in Institution.select().where(
|
|
|
|
Institution.full_label == doc_archives)]
|
|
|
|
Institution.full_label == doc_archives)]
|
|
|
|
infos_doc.append({
|
|
|
|
infos_doc.append({
|
|
|
|
|