cmd/db.py docstring create acte

main
Jean-Damien 3 years ago
parent fb8b55a504
commit 96fb348548

@ -88,7 +88,7 @@ def _create_doc(folder: str)-> None:
inst_doc = soup.repository.text # //sourceDesc//msIdentifier/repository inst_doc = soup.repository.text # //sourceDesc//msIdentifier/repository
# //sourceDesc//msIdentifier/idno[@n='1'] is always the # //sourceDesc//msIdentifier/idno[@n='1'] is always the
# archive box or manuscript collection id # archive box or manuscript collection id
# (//sourceDesc//msIdentifier/idno[@n='1'] is the doc id inside # (//sourceDesc//msIdentifier/idno[@n='2'] is the doc id inside
# the box or the page number inside a manuscript) # the box or the page number inside a manuscript)
nb_doc_1 = soup.msIdentifier.find_all("idno", {"n": "1"})[0].text nb_doc_1 = soup.msIdentifier.find_all("idno", {"n": "1"})[0].text
details_doc.append(inst_doc + " == " + nb_doc_1) details_doc.append(inst_doc + " == " + nb_doc_1)
@ -124,25 +124,36 @@ def _create_agent(name_lst: list)-> None:
Agent.create(**data) Agent.create(**data)
def _create_acte(folder: str)-> None: def _create_acte(folder: str)-> None:
"""create table acte"""
actes = [] actes = []
counter = 0 counter = 0
for acte in sorted(os.listdir(folder)): for acte in sorted(os.listdir(folder)):
if acte.endswith(".xml"): if acte.endswith(".xml"):
counter += 1 counter += 1
soup = make_soup(os.path.join(folder, acte)) soup = make_soup(os.path.join(folder, acte))
numb = soup.TEI["xml:id"]
date_time = soup.msItem.docDate["when"] # 1.1/ Get all data from XML (9). counter is the id (= numb_acte)
date = soup.msItem.docDate.text numb = soup.TEI["xml:id"] # /TEI[@xml:id] is always the acte's ID
analyse = soup.abstract.p.text date_time = soup.msItem.docDate["when"] # YYYY-MM-DD or YYYY-MM date
date = soup.msItem.docDate.text # verbose date
analyse = soup.abstract.p.text # acte's short analysis
ref = soup.msIdentifier.find_all("idno", {"n": "2"}) ref = soup.msIdentifier.find_all("idno", {"n": "2"})
if len(ref) > 0: # //sourceDesc//msIdentifier/idno[@n='2'] is the doc id inside the
# archive box or the page number inside a manuscript (see _create_doc)
# warning: the analysis may not have been written yet,
# which would result in List Index Out of Range Error. Hence :
if len(ref) > 0: # there is an analysis
ref_acte = ref[0].text ref_acte = ref[0].text
else: else: # there is no analysis
ref_acte = "NS" ref_acte = "NS"
prod_place = soup.find_all("placeName", {"type": "production_place"})[0].text prod_place = soup.find_all("placeName", {"type": "production_place"})[0].text
# //sourceDesc//msIdentifier/idno[@n='1'] is always the
# archive box or manuscript collection id
doc = soup.msIdentifier.find_all("idno", {"n": "1"})[0] doc = soup.msIdentifier.find_all("idno", {"n": "1"})[0]
type_diplo = soup.body.div["subtype"] type_diplo = soup.body.div["subtype"]
diplo_state = soup.body.div["type"] diplo_state = soup.body.div["type"]
# 1.2/ For some data, we need to make queries to get foreign keys
place_query = [t.id_place for t in Production_place.select().where( place_query = [t.id_place for t in Production_place.select().where(
Production_place.placename == prod_place)] Production_place.placename == prod_place)]
doc_query = [t.id_document for t in Document.select().where( doc_query = [t.id_document for t in Document.select().where(
@ -151,6 +162,8 @@ def _create_acte(folder: str)-> None:
Diplo_type.diplo_label == type_diplo)] Diplo_type.diplo_label == type_diplo)]
state_query = [t.id_state for t in State.select().where( state_query = [t.id_state for t in State.select().where(
State.state_label == diplo_state)] State.state_label == diplo_state)]
# 2/ Make the data list
actes.append({ actes.append({
"num_acte": counter, "num_acte": counter,
"filename": numb, "filename": numb,
@ -163,6 +176,7 @@ def _create_acte(folder: str)-> None:
"state_doc": state_query[0], "state_doc": state_query[0],
"diplo_type_acte": diplo_query[0] "diplo_type_acte": diplo_query[0]
}) })
# 4/ create the table
for data in tqdm(actes, desc="Populating Actes..."): for data in tqdm(actes, desc="Populating Actes..."):
Acte.create(**data) Acte.create(**data)

Loading…
Cancel
Save