From 3789a83b4ba14ce1520eb2a5b9a83103a5a1412d Mon Sep 17 00:00:00 2001 From: gwen Date: Wed, 20 Sep 2023 17:52:28 +0200 Subject: [PATCH] add entries in acte database --- .../pipelines/populate_mongo/nodes.py | 2 ++ actes-princiers/src/actesdataset.py | 20 +++++++++---------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py index 235f6e1..cef527d 100755 --- a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py +++ b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py @@ -52,6 +52,8 @@ class Acte(Document): xmlcontent = StringField(required=True) # no max_length place = DictField() folium = StringField(required=False) # no max_length + diplo_state = StringField(required=False) # sample: "diplo_state": "Original", + diplo_type = StringField(required=False) # sample: "diplo_type": "Lettres_patentes", def db_connect(storage_ip, db_name, mongodb_admin, mongodb_password): diff --git a/actes-princiers/src/actesdataset.py b/actes-princiers/src/actesdataset.py index 976d813..2040f3d 100644 --- a/actes-princiers/src/actesdataset.py +++ b/actes-princiers/src/actesdataset.py @@ -104,7 +104,7 @@ class BsXMLDataSet(XMLDataSet): prince_name = ps.get_text() return prince_name - def make_prince_code_from_filestem(self, filestem): + def extract_prince_code_from_filestem(self, filestem): """ builds prince code @@ -137,9 +137,8 @@ class BsXMLDataSet(XMLDataSet): # //sourceDesc//msIdentifier/idno[@n='1'] is always the # archive box or manuscript collection id #doc = self.soup.msIdentifier.find_all("idno", {"n": "1"})[0] - #type_diplo = self.soup.body.div["subtype"] - #diplo_state = self.soup.body.div["type"] - + type_diplo = self.soup.body.div["subtype"] + diplo_state = self.soup.body.div["type"] # geolocalisation place = self.soup.find("place") place_name = place.find("placeName") @@ -174,11 +173,10 @@ class BsXMLDataSet(XMLDataSet): latitude = latitude, longitude = longitude ) - return { -# "num_acte": counter, +# "num_acte": counter, "prince_name": self.find_prince_name(), - "prince_code": self.make_prince_code_from_filestem(numb), + "prince_code": self.extract_prince_code_from_filestem(numb), "filename": numb, "date_time": date_time, "date": date, @@ -187,9 +185,11 @@ class BsXMLDataSet(XMLDataSet): # "doc_acte": doc_query[0], "ref_acte": ref_acte, "transcribers": self.find_transcribers(), - "place": place -# "state_doc": state_query[0], -# "diplo_type_acte": diplo_query[0] + "place": place, + "diplo_type": type_diplo, + "diplo_state": diplo_state +# "state_doc": state_query[0], +# "diplo_type_acte": diplo_query[0] } class DataSetCollection(AbstractDataSet):