From 208d22053c7ad0233bed2b668fa0a240fa7275a1 Mon Sep 17 00:00:00 2001 From: gwen Date: Tue, 19 Sep 2023 15:32:47 +0200 Subject: [PATCH] add anjou and berry in mongodb --- .../pipelines/populate_mongo/nodes.py | 4 ++-- .../pipelines/populate_mongo/pipeline.py | 20 +++++++++++++++++-- .../pipelines/xml_processing/nodes.py | 3 +-- actes-princiers/src/actesdataset.py | 16 +++++++++++++++ 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py index 867ac6d..235f6e1 100755 --- a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py +++ b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py @@ -41,10 +41,11 @@ class Acte(Document): _id = StringField(required=True, max_length=150) house = StringField(required=True, max_length=100) prince_name = StringField(required=True, max_length=150) + prince_code = StringField(required=True, max_length=100) analysis = StringField(required=True, max_length=3000) date = StringField(required=True, max_length=250) transcribers = ListField(required=True) - # FIXME type it as a **real** date object ? + # FIXME date_teim type shal it be a **real** date object ? date_time = StringField(required=True, max_length=15) filename = StringField(required=True, max_length=100) ref_acte = StringField(required=True, max_length=100) @@ -113,7 +114,6 @@ def load_houses(yamldoc: YAMLDataSet, storage_ip: str, db_name: str, mongodb_adm for house_dict in yamldoc['houses'].values(): house_dict['_id'] = house_dict['name'] - #logger.info("inserting: " + str(house_dict)) #houses_col.insert_one(house_dict) house_entry = House(**house_dict) house_entry.save() diff --git a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/pipeline.py b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/pipeline.py index daff373..518c9f0 100755 --- a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/pipeline.py +++ b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/pipeline.py @@ -15,6 +15,24 @@ def create_pipeline(**kwargs) -> Pipeline: name="populate_actes", tags="populate_database", ), + node( + func=populate_mongo, + inputs=["berry_fulljsonoutput", "params:storage_ip", "params:db_name", + "params:db_collection_name", "params:mongodb_admin", + "params:mongodb_password"], + outputs=None, + name="populate_actes", + tags="populate_database", + ), + node( + func=populate_mongo, + inputs=["anjou_fulljsonoutput", "params:storage_ip", "params:db_name", + "params:db_collection_name", "params:mongodb_admin", + "params:mongodb_password"], + outputs=None, + name="populate_actes", + tags="populate_database", + ), node( func=load_houses, inputs=["houses", "params:storage_ip", "params:db_name", @@ -32,8 +50,6 @@ def create_pipeline(**kwargs) -> Pipeline: name="populate_helpers", tags="populate_database", ) - - ] ) diff --git a/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py b/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py index 2ac6787..be36345 100755 --- a/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py +++ b/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py @@ -69,11 +69,10 @@ def _make_map(latitude, longitude, popup): width=800, height=600, ) - folium.Marker( location=[latitude, longitude], popup=popup, - icon=folium.Icon(icon="cloud"), + icon=folium.Icon(color='lightgray', icon="circle", prefix='fa') ).add_to(m) return m.get_root()._repr_html_() diff --git a/actes-princiers/src/actesdataset.py b/actes-princiers/src/actesdataset.py index 99b258d..976d813 100644 --- a/actes-princiers/src/actesdataset.py +++ b/actes-princiers/src/actesdataset.py @@ -104,6 +104,20 @@ class BsXMLDataSet(XMLDataSet): prince_name = ps.get_text() return prince_name + def make_prince_code_from_filestem(self, filestem): + """ + builds prince code + + :param: filestem + sample: "anj_isa_i_1441_08_05a" + :return: prince code, sample: "isa_i" + """ + # cut with the underscores + cut = filestem.split('_') + # remove house and date + prince_code = "_".join(cut[1:3]) + return prince_code + def transform(self): #soup = make_soup(os.path.join(folder, acte)) # 1.1/ Get all data from XML (9). counter is the id (= numb_acte) @@ -160,9 +174,11 @@ class BsXMLDataSet(XMLDataSet): latitude = latitude, longitude = longitude ) + return { # "num_acte": counter, "prince_name": self.find_prince_name(), + "prince_code": self.make_prince_code_from_filestem(numb), "filename": numb, "date_time": date_time, "date": date,