diff --git a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py index d6efdc0..8bdb6ab 100755 --- a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py +++ b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py @@ -15,11 +15,27 @@ from actesdataset import JSONDataSetCollection logger = logging.getLogger(__name__) + # Database schemas class House(Document): + "_id is the name" + _id = StringField(required=True, max_length=100) name = StringField(required=True, max_length=100) + trigram = StringField(required=True, max_length=3) particle = StringField(required=True, max_length=150) + +class Acte(Document): + "_id is the filename" + _id = StringField(required=True, max_length=150) + analysis = StringField(required=True, max_length=3000) + date = StringField(required=True, max_length=250) + # FIMXE make a real date object ? or not. + date_time = StringField(required=True, max_length=15) + filename = StringField(required=True, max_length=100) + ref_acte = StringField(required=True, max_length=100) + xmlcontent = StringField(required=True) # no max_length + # pipeline functions def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str, db_collection_name: str, mongodb_admin: str, mongodb_password: str) -> None: @@ -33,27 +49,27 @@ def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str password = urllib.parse.quote_plus(mongodb_password) mongodb_url = f"mongodb://{username}:{password}@{storage_ip}:27017/" #mongodb_url = "mongodb://{}:27017/".format(storage_ip) - logger.info("connection to the mongodb server: " + mongodb_url) + logger.info("connection to the mongodb server") # pymongo settings - myclient = pymongo.MongoClient(mongodb_url) + # myclient = pymongo.MongoClient(mongodb_url) + myclient = connect(db=db_name, host=mongodb_url, authentication_source='admin', alias="default") - actesdb = myclient[db_name] - actes_collection = actesdb[db_collection_name] + #actesdb = myclient[db_name] + #actes_collection = actesdb[db_collection_name] - # TODO faire un insert_many directement ? for dataset_filenamestem, dataset in jsondatasets.items(): # a manual load is required here, because # the dataset **is not** registered in kedro's catalog - document = dataset._load() - # FIXME que mettre comme id ? le filename ? - document["_id"] = document["filename"] - #logger.info(str(document)) - res = actes_collection.insert_one(document) - logger.info(res.inserted_id) + json_document = dataset._load() + json_document["_id"] = json_document["filename"] + acte_entry = Acte(**json_document) + logger.info("adding entry: " + json_document["filename"]) + acte_entry.save() + #res = actes_collection.insert_one(document) + #logger.info(res.inserted_id) # properly closes the db connection - # FIXME with MongoClient() as client - myclient.close() + # myclient.close() return @@ -67,6 +83,7 @@ def load_houses(yamldoc: YAMLDataSet, storage_ip: str, db_name: str, mongodb_adm myclient = connect(db=db_name, host=mongodb_url, authentication_source='admin', alias="default") for house_dict in yamldoc['houses'].values(): + house_dict['_id'] = house_dict['name'] logger.info("inserting: " + str(house_dict)) #houses_col.insert_one(house_dict) house_entry = House(**house_dict) diff --git a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/pipeline.py b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/pipeline.py index 80d0323..f4ed3bf 100755 --- a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/pipeline.py +++ b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/pipeline.py @@ -12,7 +12,7 @@ def create_pipeline(**kwargs) -> Pipeline: "params:db_collection_name", "params:mongodb_admin", "params:mongodb_password"], outputs=None, - name="populate_mongo", + name="populate_actes", tags="populate_database", ), node( @@ -20,8 +20,8 @@ def create_pipeline(**kwargs) -> Pipeline: inputs=["houses", "params:storage_ip", "params:db_name", "params:mongodb_admin", "params:mongodb_password"], outputs=None, - name="load_houses", - tags="load_houses", + name="polulate_houses", + tags="populate_database", ) ]