add mongo helper collection

develop
gwen 2 years ago
parent 1a3468abeb
commit 318213034e

@ -33,6 +33,7 @@ class House(Document):
class Acte(Document):
"_id is the filename"
_id = StringField(required=True, max_length=150)
house = StringField(required=True, max_length=100)
analysis = StringField(required=True, max_length=3000)
date = StringField(required=True, max_length=250)
# FIMXE make a real date object ? or not.
@ -42,25 +43,28 @@ class Acte(Document):
xmlcontent = StringField(required=True) # no max_length
# pipeline functions
def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str, db_collection_name: str, mongodb_admin: str, mongodb_password: str) -> None:
#logger.info(storage_ip)
#logger.info(db_name)
#logger.info(db_collection_name)
jsondatasets = jsondoc.datasets
housename = jsondoc._housename
def db_connect(storage_ip, db_name, mongodb_admin, mongodb_password):
#mongodb://%s:%s@149.202.41.75:27017' % (username, password)
username = urllib.parse.quote_plus(mongodb_admin)
password = urllib.parse.quote_plus(mongodb_password)
mongodb_url = f"mongodb://{username}:{password}@{storage_ip}:27017/"
#mongodb_url = "mongodb://{}:27017/".format(storage_ip)
logger.info("connection to the mongodb server")
# pymongo settings
# myclient = pymongo.MongoClient(mongodb_url)
myclient = connect(db=db_name, host=mongodb_url, authentication_source='admin', alias="default")
# pipeline functions
def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str, db_collection_name: str, mongodb_admin: str, mongodb_password: str) -> None:
#logger.info(storage_ip)
#logger.info(db_name)
#logger.info(db_collection_name)
jsondatasets = jsondoc.datasets
housename = jsondoc._housename
db_connect(storage_ip, db_name, mongodb_admin, mongodb_password)
#actesdb = myclient[db_name]
#actes_collection = actesdb[db_collection_name]
@ -81,12 +85,7 @@ def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str
def load_houses(yamldoc: YAMLDataSet, storage_ip: str, db_name: str, mongodb_admin: str, mongodb_password: str) -> None:
username = urllib.parse.quote_plus(mongodb_admin)
password = urllib.parse.quote_plus(mongodb_password)
mongodb_url = f"mongodb://{username}:{password}@{storage_ip}:27017/"
logger.info("connection to the mongodb server")
myclient = connect(db=db_name, host=mongodb_url, authentication_source='admin', alias="default")
db_connect(storage_ip, db_name, mongodb_admin, mongodb_password)
for house_dict in yamldoc['houses'].values():
house_dict['_id'] = house_dict['name']
@ -101,7 +100,10 @@ def load_houses(yamldoc: YAMLDataSet, storage_ip: str, db_name: str, mongodb_adm
return
def load_helpers(house_trigram: JSONDataSet, prince_bigram: JSONDataSet) -> None:
def load_helpers(house_trigram: JSONDataSet, prince_bigram: JSONDataSet,
storage_ip: str, db_name: str, mongodb_admin: str, mongodb_password: str) -> None:
db_connect(storage_ip, db_name, mongodb_admin, mongodb_password)
helper_entry = Helpers(house_trigram=house_trigram, prince_bigram=prince_bigram)
helper_entry.save()

@ -25,7 +25,9 @@ def create_pipeline(**kwargs) -> Pipeline:
),
node(
func=load_helpers,
inputs=["houses_trigram","prince_bigram"],
inputs=["houses_trigram","prince_bigram",
"params:storage_ip", "params:db_name",
"params:mongodb_admin", "params:mongodb_password"],
outputs=None,
name="populate_helpers",
tags="populate_database",

Loading…
Cancel
Save