diff --git a/actes-princiers/conf/base/catalog.yml b/actes-princiers/conf/base/catalog.yml index 50fbde8..62276b4 100644 --- a/actes-princiers/conf/base/catalog.yml +++ b/actes-princiers/conf/base/catalog.yml @@ -1,9 +1,16 @@ -# houses -# input (read only) dataset +# houses and princes +# input (read only) datasets houses: type: yaml.YAMLDataSet filepath: data/01_raw/yaml/houses.yaml +houses_trigram: + type: json.JSONDataSet + filepath: data/01_raw/json/house_trigram.json + +prince_bigram: + type: json.JSONDataSet + filepath: data/01_raw/json/prince_bigram.json # ________________________________________________________________________ # BOURBON # input (read only) dataset diff --git a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py index 8bdb6ab..7cac618 100755 --- a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py +++ b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py @@ -5,10 +5,11 @@ from pathlib import Path from typing import Dict from mongoengine import connect -from mongoengine import Document, StringField +from mongoengine import Document, StringField, DictField from kedro.framework.session import KedroSession from kedro.extras.datasets.yaml import YAMLDataSet +from kedro.extras.datasets.json import JSONDataSet from actesdataset import JSONDataSetCollection @@ -16,6 +17,10 @@ from actesdataset import JSONDataSetCollection logger = logging.getLogger(__name__) +class Helpers(Document): + house_trigram = DictField() + prince_bigram = DictField() + # Database schemas class House(Document): "_id is the name" @@ -36,6 +41,7 @@ class Acte(Document): ref_acte = StringField(required=True, max_length=100) xmlcontent = StringField(required=True) # no max_length + # pipeline functions def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str, db_collection_name: str, mongodb_admin: str, mongodb_password: str) -> None: @@ -94,3 +100,8 @@ def load_houses(yamldoc: YAMLDataSet, storage_ip: str, db_name: str, mongodb_adm #myclient.close() return + +def load_helpers(house_trigram: JSONDataSet, prince_bigram: JSONDataSet) -> None: + helper_entry = Helpers(house_trigram=house_trigram, prince_bigram=prince_bigram) + helper_entry.save() + diff --git a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/pipeline.py b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/pipeline.py index f4ed3bf..c6fc3d5 100755 --- a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/pipeline.py +++ b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/pipeline.py @@ -1,6 +1,6 @@ from kedro.pipeline import Pipeline, node, pipeline -from .nodes import populate_mongo, load_houses +from .nodes import populate_mongo, load_houses, load_helpers def create_pipeline(**kwargs) -> Pipeline: @@ -22,8 +22,16 @@ def create_pipeline(**kwargs) -> Pipeline: outputs=None, name="polulate_houses", tags="populate_database", + ), + node( + func=load_helpers, + inputs=["houses_trigram","prince_bigram"], + outputs=None, + name="populate_helpers", + tags="populate_database", ) + ] )