add houses collection

develop
gwen 2 years ago
parent 23af0f580a
commit 4b6a15416d

@ -1,3 +1,9 @@
# houses
# input (read only) dataset
houses:
type: yaml.YAMLDataSet
filepath: data/01_raw/yaml/houses.yaml
# ________________________________________________________________________
# BOURBON
# input (read only) dataset

@ -1,22 +0,0 @@
# XXX: cette conf est descriptive, elle n'est pas (plus) utilisée par l'apli
# dans son état de généricité actuel.
# TODO: utiliser cette conf pour augmenter la généricité
# du traitement des datas dans une iteration ulterieure
# 
raw_datapath: data/01_raw
houses:
bourbon:
name: Bourbon
path: houses/bourbon
berry:
name: Berry
path: houses/berry
anjou:
name: Anjou
path: houses/anjou
# TODO
# - Bretagne
# - Bourgogne
# - Orléans
# - Armagnac

@ -4,10 +4,12 @@ import urllib.parse
from pathlib import Path
from typing import Dict
import pymongo
from kedro.framework.session import KedroSession
from actesdataset import JSONDataSetCollection
from kedro.extras.datasets.yaml import YAMLDataSet
import pymongo
from actesdataset import JSONDataSetCollection
logger = logging.getLogger(__name__)
@ -21,7 +23,6 @@ def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str
jsondatasets = jsondoc.datasets
housename = jsondoc._housename
#mongodb://%s:%s@149.202.41.75:27017' % (username, password)
# FIXME passer en parametres
username = urllib.parse.quote_plus(mongodb_admin)
password = urllib.parse.quote_plus(mongodb_password)
mongodb_url = f"mongodb://{username}:{password}@{storage_ip}:27017/"
@ -44,4 +45,31 @@ def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str
#logger.info(str(document))
res = actes_collection.insert_one(document)
logger.info(res.inserted_id)
# properly closes the db connection
# FIXME with MongoClient() as client
myclient.close()
return
def load_houses(yamldoc: YAMLDataSet, storage_ip: str, db_name: str, mongodb_admin: str, mongodb_password: str) -> None:
username = urllib.parse.quote_plus(mongodb_admin)
password = urllib.parse.quote_plus(mongodb_password)
mongodb_url = f"mongodb://{username}:{password}@{storage_ip}:27017/"
logger.info("connection to the mongodb server: " + mongodb_url)
# pymongo settings
myclient = pymongo.MongoClient(mongodb_url)
actesdb = myclient[db_name]
houses_col = actesdb['houses']
for house in yamldoc['houses'].values():
logger.info(str(house))
houses_col.insert_one(house)
# properly closes the db connection
# FIXME with MongoClient() as client
myclient.close()
return

@ -1,6 +1,6 @@
from kedro.pipeline import Pipeline, node, pipeline
from .nodes import populate_mongo
from .nodes import populate_mongo, load_houses
def create_pipeline(**kwargs) -> Pipeline:
@ -14,7 +14,16 @@ def create_pipeline(**kwargs) -> Pipeline:
outputs=None,
name="populate_mongo",
tags="populate_database",
),
node(
func=load_houses,
inputs=["houses", "params:storage_ip", "params:db_name",
"params:mongodb_admin", "params:mongodb_password"],
outputs=None,
name="load_houses",
tags="load_houses",
)
]
)

@ -7,6 +7,7 @@ from abc import ABC, abstractmethod
from lxml import etree
from bs4 import BeautifulSoup
#from folium import Map
from kedro.io import AbstractDataSet, DataSetError
from kedro.framework.session import KedroSession
@ -227,3 +228,16 @@ class TextDataSetCollection(DataSetCollection):
filepath=str(filepath))
return self
#class FoliumHTMLDataSet(AbstractDataSet):
# def __init__(self, filepath: str):
# self._filepath = filepath
#
# def _load(self) -> None:
# raise DataSetError('This dataset is WriteOnly')
#
# def _describe(self) -> Dict[str, Any]:
# return dict(filepath=self._filepath)
#
# def _save(self, data: Map) -> None:
# data.save(self._filepath)
#

@ -12,6 +12,7 @@ jupyter~=1.0
jupyterlab_server>=2.11.1, <2.16.0
jupyterlab~=3.0, <3.6.0
kedro~=0.18.12
kedro-datasets~=1.7.0
kedro-telemetry~=0.2.5
lxml~=4.9.3
nbstripout~=0.4

Loading…
Cancel
Save