From 318213034ef6cb589e12c4d1346ced50df721b0d Mon Sep 17 00:00:00 2001 From: gwen Date: Fri, 15 Sep 2023 18:16:48 +0200 Subject: [PATCH] add mongo helper collection --- .../pipelines/populate_mongo/nodes.py | 34 ++++++++++--------- .../pipelines/populate_mongo/pipeline.py | 4 ++- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py index 7cac618..4e15d41 100755 --- a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py +++ b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py @@ -33,6 +33,7 @@ class House(Document): class Acte(Document): "_id is the filename" _id = StringField(required=True, max_length=150) + house = StringField(required=True, max_length=100) analysis = StringField(required=True, max_length=3000) date = StringField(required=True, max_length=250) # FIMXE make a real date object ? or not. @@ -42,25 +43,28 @@ class Acte(Document): xmlcontent = StringField(required=True) # no max_length -# pipeline functions -def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str, db_collection_name: str, mongodb_admin: str, mongodb_password: str) -> None: - - #logger.info(storage_ip) - #logger.info(db_name) - #logger.info(db_collection_name) - jsondatasets = jsondoc.datasets - housename = jsondoc._housename +def db_connect(storage_ip, db_name, mongodb_admin, mongodb_password): #mongodb://%s:%s@149.202.41.75:27017' % (username, password) username = urllib.parse.quote_plus(mongodb_admin) password = urllib.parse.quote_plus(mongodb_password) mongodb_url = f"mongodb://{username}:{password}@{storage_ip}:27017/" #mongodb_url = "mongodb://{}:27017/".format(storage_ip) logger.info("connection to the mongodb server") - # pymongo settings # myclient = pymongo.MongoClient(mongodb_url) myclient = connect(db=db_name, host=mongodb_url, authentication_source='admin', alias="default") + +# pipeline functions +def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str, db_collection_name: str, mongodb_admin: str, mongodb_password: str) -> None: + + #logger.info(storage_ip) + #logger.info(db_name) + #logger.info(db_collection_name) + jsondatasets = jsondoc.datasets + housename = jsondoc._housename + + db_connect(storage_ip, db_name, mongodb_admin, mongodb_password) #actesdb = myclient[db_name] #actes_collection = actesdb[db_collection_name] @@ -81,12 +85,7 @@ def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str def load_houses(yamldoc: YAMLDataSet, storage_ip: str, db_name: str, mongodb_admin: str, mongodb_password: str) -> None: - username = urllib.parse.quote_plus(mongodb_admin) - password = urllib.parse.quote_plus(mongodb_password) - mongodb_url = f"mongodb://{username}:{password}@{storage_ip}:27017/" - logger.info("connection to the mongodb server") - - myclient = connect(db=db_name, host=mongodb_url, authentication_source='admin', alias="default") + db_connect(storage_ip, db_name, mongodb_admin, mongodb_password) for house_dict in yamldoc['houses'].values(): house_dict['_id'] = house_dict['name'] @@ -101,7 +100,10 @@ def load_houses(yamldoc: YAMLDataSet, storage_ip: str, db_name: str, mongodb_adm return -def load_helpers(house_trigram: JSONDataSet, prince_bigram: JSONDataSet) -> None: +def load_helpers(house_trigram: JSONDataSet, prince_bigram: JSONDataSet, + storage_ip: str, db_name: str, mongodb_admin: str, mongodb_password: str) -> None: + + db_connect(storage_ip, db_name, mongodb_admin, mongodb_password) helper_entry = Helpers(house_trigram=house_trigram, prince_bigram=prince_bigram) helper_entry.save() diff --git a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/pipeline.py b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/pipeline.py index c6fc3d5..daff373 100755 --- a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/pipeline.py +++ b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/pipeline.py @@ -25,7 +25,9 @@ def create_pipeline(**kwargs) -> Pipeline: ), node( func=load_helpers, - inputs=["houses_trigram","prince_bigram"], + inputs=["houses_trigram","prince_bigram", + "params:storage_ip", "params:db_name", + "params:mongodb_admin", "params:mongodb_password"], outputs=None, name="populate_helpers", tags="populate_database",