From 4c0f113bd75c23fabe7eeae790eb56b267c0f1b1 Mon Sep 17 00:00:00 2001 From: gwen Date: Thu, 14 Sep 2023 09:34:03 +0200 Subject: [PATCH] add database mongo schema in houses --- .../pipelines/populate_mongo/nodes.py | 26 +++++++++++-------- actes-princiers/src/requirements.txt | 14 ++-------- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py index 01f484e..d6efdc0 100755 --- a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py +++ b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py @@ -4,7 +4,8 @@ import urllib.parse from pathlib import Path from typing import Dict -import pymongo +from mongoengine import connect +from mongoengine import Document, StringField from kedro.framework.session import KedroSession from kedro.extras.datasets.yaml import YAMLDataSet @@ -14,7 +15,12 @@ from actesdataset import JSONDataSetCollection logger = logging.getLogger(__name__) +# Database schemas +class House(Document): + name = StringField(required=True, max_length=100) + particle = StringField(required=True, max_length=150) +# pipeline functions def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str, db_collection_name: str, mongodb_admin: str, mongodb_password: str) -> None: #logger.info(storage_ip) @@ -56,20 +62,18 @@ def load_houses(yamldoc: YAMLDataSet, storage_ip: str, db_name: str, mongodb_adm username = urllib.parse.quote_plus(mongodb_admin) password = urllib.parse.quote_plus(mongodb_password) mongodb_url = f"mongodb://{username}:{password}@{storage_ip}:27017/" - logger.info("connection to the mongodb server: " + mongodb_url) + logger.info("connection to the mongodb server") - # pymongo settings - myclient = pymongo.MongoClient(mongodb_url) + myclient = connect(db=db_name, host=mongodb_url, authentication_source='admin', alias="default") - actesdb = myclient[db_name] - houses_col = actesdb['houses'] - - for house in yamldoc['houses'].values(): - logger.info(str(house)) - houses_col.insert_one(house) + for house_dict in yamldoc['houses'].values(): + logger.info("inserting: " + str(house_dict)) + #houses_col.insert_one(house_dict) + house_entry = House(**house_dict) + house_entry.save() # properly closes the db connection # FIXME with MongoClient() as client - myclient.close() + #myclient.close() return diff --git a/actes-princiers/src/requirements.txt b/actes-princiers/src/requirements.txt index 1590992..e53643d 100644 --- a/actes-princiers/src/requirements.txt +++ b/actes-princiers/src/requirements.txt @@ -1,22 +1,12 @@ -sphinx-rtd-theme>=1.2.2 -myst-parser>=2.0.0 -pandas>=2.0.2 -nbsphinx>=0.9.2 +beautifulsoup4==4.12.2 python-slugify>=8.0.1 -black~=22.0 -flake8>=3.7.9, <5.0 ipython>=7.31.1, <8.0; python_version < '3.8' ipython~=8.10; python_version >= '3.8' isort~=5.0 -jupyter~=1.0 -jupyterlab_server>=2.11.1, <2.16.0 -jupyterlab~=3.0, <3.6.0 kedro~=0.18.12 kedro-datasets~=1.7.0 kedro-telemetry~=0.2.5 lxml~=4.9.3 nbstripout~=0.4 -pytest-cov~=3.0 -pytest-mock>=1.7.1, <2.0 -pytest~=7.2 pymongo~=4.5.0 +mongoengine~=0.27.0