cmd/db.py docstring create doc

main
Jean-Damien 3 years ago
parent 631c6a9148
commit feefd35e18

@ -72,7 +72,7 @@ def _create_produc_place(folder: str)-> None:
# and add to list places_xtract
for place in soup.find('placeName', {'type': 'production_place'}):
places_xtract.append(place)
# made data list (production_places) by iterating on set(places_xtract)
# make data list (production_places) by iterating on set(places_xtract)
production_places = [{"placename": xtraction} for xtraction in set(places_xtract)]
for data in tqdm(production_places, desc="Populating Place..."):
Production_place.create(**data)
@ -81,18 +81,22 @@ def _create_doc(folder: str)-> None:
"""create doc table"""
details_doc = []
infos_doc = []
# 1/ get repository (doc archives) + doc collection in a list
# 1/ get repository (doc Archives) + doc number in a list
for acte in os.listdir(folder):
if acte.endswith(".xml"):
soup = make_soup(os.path.join(folder, acte))
inst_doc = soup.repository.text
inst_doc = soup.repository.text # //sourceDesc//msIdentifier/repository
# //sourceDesc//msIdentifier/idno[@n='1'] is always the
# archive box or manuscript collection id
# (//sourceDesc//msIdentifier/idno[@n='1'] is the doc id inside
# the box or the page number inside a manuscript)
nb_doc_1 = soup.msIdentifier.find_all("idno", {"n": "1"})[0].text
details_doc.append(inst_doc + " == " + nb_doc_1)
# 2/ make a query on table Inst to get inst id
# then pretiffy data for the table Doc
# 2/ make the data list
for doc in set(details_doc):
doc_archives = re.sub('(.+) == .+', '\\1', doc)
doc_cote = re.sub('.+ == (.+)', '\\1', doc)
doc_archives = re.sub('(.+) == .+', '\\1', doc) # only the Archives
doc_cote = re.sub('.+ == (.+)', '\\1', doc) # only the box number (or ms id)
# query on table institution with <doc_archives> to get the correspondant institution key
inst_query = [t.id_institution for t in Institution.select().where(
Institution.full_label == doc_archives)]
infos_doc.append({

Loading…
Cancel
Save