From feefd35e18875aa14685daf96dc0a18510e44d96 Mon Sep 17 00:00:00 2001 From: Jean-Damien Date: Fri, 23 Dec 2022 16:23:22 +0100 Subject: [PATCH] cmd/db.py docstring create doc --- app/cmd/db.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/app/cmd/db.py b/app/cmd/db.py index 6960d2f..a93daa5 100644 --- a/app/cmd/db.py +++ b/app/cmd/db.py @@ -72,7 +72,7 @@ def _create_produc_place(folder: str)-> None: # and add to list places_xtract for place in soup.find('placeName', {'type': 'production_place'}): places_xtract.append(place) - # made data list (production_places) by iterating on set(places_xtract) + # make data list (production_places) by iterating on set(places_xtract) production_places = [{"placename": xtraction} for xtraction in set(places_xtract)] for data in tqdm(production_places, desc="Populating Place..."): Production_place.create(**data) @@ -81,18 +81,22 @@ def _create_doc(folder: str)-> None: """create doc table""" details_doc = [] infos_doc = [] - # 1/ get repository (doc archives) + doc collection in a list + # 1/ get repository (doc Archives) + doc number in a list for acte in os.listdir(folder): if acte.endswith(".xml"): soup = make_soup(os.path.join(folder, acte)) - inst_doc = soup.repository.text + inst_doc = soup.repository.text # //sourceDesc//msIdentifier/repository + # //sourceDesc//msIdentifier/idno[@n='1'] is always the + # archive box or manuscript collection id + # (//sourceDesc//msIdentifier/idno[@n='1'] is the doc id inside + # the box or the page number inside a manuscript) nb_doc_1 = soup.msIdentifier.find_all("idno", {"n": "1"})[0].text details_doc.append(inst_doc + " == " + nb_doc_1) - # 2/ make a query on table Inst to get inst id - # then pretiffy data for the table Doc + # 2/ make the data list for doc in set(details_doc): - doc_archives = re.sub('(.+) == .+', '\\1', doc) - doc_cote = re.sub('.+ == (.+)', '\\1', doc) + doc_archives = re.sub('(.+) == .+', '\\1', doc) # only the Archives + doc_cote = re.sub('.+ == (.+)', '\\1', doc) # only the box number (or ms id) + # query on table institution with to get the correspondant institution key inst_query = [t.id_institution for t in Institution.select().where( Institution.full_label == doc_archives)] infos_doc.append({