add houses collection

develop
gwen 2 years ago
parent 23af0f580a
commit 4b6a15416d

@ -1,3 +1,9 @@
# houses
# input (read only) dataset
houses:
type: yaml.YAMLDataSet
filepath: data/01_raw/yaml/houses.yaml
# ________________________________________________________________________ # ________________________________________________________________________
# BOURBON # BOURBON
# input (read only) dataset # input (read only) dataset

@ -1,22 +0,0 @@
# XXX: cette conf est descriptive, elle n'est pas (plus) utilisée par l'apli
# dans son état de généricité actuel.
# TODO: utiliser cette conf pour augmenter la généricité
# du traitement des datas dans une iteration ulterieure
# 
raw_datapath: data/01_raw
houses:
bourbon:
name: Bourbon
path: houses/bourbon
berry:
name: Berry
path: houses/berry
anjou:
name: Anjou
path: houses/anjou
# TODO
# - Bretagne
# - Bourgogne
# - Orléans
# - Armagnac

@ -4,10 +4,12 @@ import urllib.parse
from pathlib import Path from pathlib import Path
from typing import Dict from typing import Dict
import pymongo
from kedro.framework.session import KedroSession from kedro.framework.session import KedroSession
from actesdataset import JSONDataSetCollection from kedro.extras.datasets.yaml import YAMLDataSet
import pymongo from actesdataset import JSONDataSetCollection
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -21,7 +23,6 @@ def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str
jsondatasets = jsondoc.datasets jsondatasets = jsondoc.datasets
housename = jsondoc._housename housename = jsondoc._housename
#mongodb://%s:%s@149.202.41.75:27017' % (username, password) #mongodb://%s:%s@149.202.41.75:27017' % (username, password)
# FIXME passer en parametres
username = urllib.parse.quote_plus(mongodb_admin) username = urllib.parse.quote_plus(mongodb_admin)
password = urllib.parse.quote_plus(mongodb_password) password = urllib.parse.quote_plus(mongodb_password)
mongodb_url = f"mongodb://{username}:{password}@{storage_ip}:27017/" mongodb_url = f"mongodb://{username}:{password}@{storage_ip}:27017/"
@ -44,4 +45,31 @@ def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str
#logger.info(str(document)) #logger.info(str(document))
res = actes_collection.insert_one(document) res = actes_collection.insert_one(document)
logger.info(res.inserted_id) logger.info(res.inserted_id)
# properly closes the db connection
# FIXME with MongoClient() as client
myclient.close()
return
def load_houses(yamldoc: YAMLDataSet, storage_ip: str, db_name: str, mongodb_admin: str, mongodb_password: str) -> None:
username = urllib.parse.quote_plus(mongodb_admin)
password = urllib.parse.quote_plus(mongodb_password)
mongodb_url = f"mongodb://{username}:{password}@{storage_ip}:27017/"
logger.info("connection to the mongodb server: " + mongodb_url)
# pymongo settings
myclient = pymongo.MongoClient(mongodb_url)
actesdb = myclient[db_name]
houses_col = actesdb['houses']
for house in yamldoc['houses'].values():
logger.info(str(house))
houses_col.insert_one(house)
# properly closes the db connection
# FIXME with MongoClient() as client
myclient.close()
return return

@ -1,6 +1,6 @@
from kedro.pipeline import Pipeline, node, pipeline from kedro.pipeline import Pipeline, node, pipeline
from .nodes import populate_mongo from .nodes import populate_mongo, load_houses
def create_pipeline(**kwargs) -> Pipeline: def create_pipeline(**kwargs) -> Pipeline:
@ -14,7 +14,16 @@ def create_pipeline(**kwargs) -> Pipeline:
outputs=None, outputs=None,
name="populate_mongo", name="populate_mongo",
tags="populate_database", tags="populate_database",
),
node(
func=load_houses,
inputs=["houses", "params:storage_ip", "params:db_name",
"params:mongodb_admin", "params:mongodb_password"],
outputs=None,
name="load_houses",
tags="load_houses",
) )
] ]
) )

@ -7,6 +7,7 @@ from abc import ABC, abstractmethod
from lxml import etree from lxml import etree
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
#from folium import Map
from kedro.io import AbstractDataSet, DataSetError from kedro.io import AbstractDataSet, DataSetError
from kedro.framework.session import KedroSession from kedro.framework.session import KedroSession
@ -227,3 +228,16 @@ class TextDataSetCollection(DataSetCollection):
filepath=str(filepath)) filepath=str(filepath))
return self return self
#class FoliumHTMLDataSet(AbstractDataSet):
# def __init__(self, filepath: str):
# self._filepath = filepath
#
# def _load(self) -> None:
# raise DataSetError('This dataset is WriteOnly')
#
# def _describe(self) -> Dict[str, Any]:
# return dict(filepath=self._filepath)
#
# def _save(self, data: Map) -> None:
# data.save(self._filepath)
#

@ -12,6 +12,7 @@ jupyter~=1.0
jupyterlab_server>=2.11.1, <2.16.0 jupyterlab_server>=2.11.1, <2.16.0
jupyterlab~=3.0, <3.6.0 jupyterlab~=3.0, <3.6.0
kedro~=0.18.12 kedro~=0.18.12
kedro-datasets~=1.7.0
kedro-telemetry~=0.2.5 kedro-telemetry~=0.2.5
lxml~=4.9.3 lxml~=4.9.3
nbstripout~=0.4 nbstripout~=0.4

Loading…
Cancel
Save