|
|
|
|
@ -2,7 +2,6 @@ from pathlib import Path
|
|
|
|
|
from typing import Dict
|
|
|
|
|
|
|
|
|
|
from kedro.framework.context import KedroContext
|
|
|
|
|
#from kedro.pipeline import Pipeline
|
|
|
|
|
from kedro.pipeline import Pipeline, node, pipeline
|
|
|
|
|
|
|
|
|
|
from actesdataset import XMLDataSet
|
|
|
|
|
@ -14,12 +13,31 @@ def tree(directory, relative_to=None):
|
|
|
|
|
trees[path.stem] = str(path.relative_to(relative_to))
|
|
|
|
|
return trees
|
|
|
|
|
|
|
|
|
|
def house_dataset_loader(catalog):
|
|
|
|
|
|
|
|
|
|
class ProjectContext(KedroContext):
|
|
|
|
|
project_name = "actes princiers"
|
|
|
|
|
project_version = "0.1"
|
|
|
|
|
package_name = "actes_princiers"
|
|
|
|
|
|
|
|
|
|
def get_houses_config(self):
|
|
|
|
|
"""loading from generic configuration file
|
|
|
|
|
(that is, the global houses `houses.yaml`)"""
|
|
|
|
|
houses_file = self.config_loader.get("houses*")
|
|
|
|
|
# FIXME : put this in attribute in the context
|
|
|
|
|
return houses_file['houses']
|
|
|
|
|
|
|
|
|
|
def _get_catalog(self, *args, **kwargs):
|
|
|
|
|
"catalog loader entry point"
|
|
|
|
|
# loading yaml defined catalogs
|
|
|
|
|
catalog = super()._get_catalog(*args, **kwargs)
|
|
|
|
|
# kedro.io.data_catalog.DataCatalog
|
|
|
|
|
# adding data sets
|
|
|
|
|
self.nodes_description = self._house_dataset_loader(catalog)
|
|
|
|
|
return catalog
|
|
|
|
|
|
|
|
|
|
def _house_dataset_loader(self, catalog):
|
|
|
|
|
nodes_description = []
|
|
|
|
|
# FIXME : a custom DataSet Catalog that lists
|
|
|
|
|
# input_catalog = catalog.load(house_name)
|
|
|
|
|
# FIXME : set root path from config, not here
|
|
|
|
|
# or make an autopath function helper
|
|
|
|
|
# XXX : get root path from config, not here
|
|
|
|
|
data_root_path = Path.cwd() / 'data' / '01_raw' / 'xml'
|
|
|
|
|
relative_to = Path.cwd()
|
|
|
|
|
for dataset_name, dataset_path in tree(data_root_path, relative_to=relative_to).items():
|
|
|
|
|
@ -29,7 +47,7 @@ def house_dataset_loader(catalog):
|
|
|
|
|
replace=True)
|
|
|
|
|
# adding an output catalog entry
|
|
|
|
|
output_dataset_name = dataset_name + "_output"
|
|
|
|
|
# FIXME : JE NE SUIS PAS SATISFAIT
|
|
|
|
|
# XXX : make better
|
|
|
|
|
output_dataset_path = Path(dataset_path.replace("01_raw", "02_intermediate"))
|
|
|
|
|
# let's create subfolders if they don't exist
|
|
|
|
|
output_dataset_dir = output_dataset_path.parent
|
|
|
|
|
@ -45,27 +63,6 @@ def house_dataset_loader(catalog):
|
|
|
|
|
nodes_description.append(node_description)
|
|
|
|
|
return nodes_description
|
|
|
|
|
|
|
|
|
|
class ProjectContext(KedroContext):
|
|
|
|
|
project_name = "actes princiers"
|
|
|
|
|
project_version = "0.1"
|
|
|
|
|
package_name = "actes_princiers"
|
|
|
|
|
|
|
|
|
|
def get_houses_config(self):
|
|
|
|
|
"""loading from generic configuration file
|
|
|
|
|
(that is, the global houses `houses.yaml`)"""
|
|
|
|
|
houses_file = self.config_loader.get("houses*")
|
|
|
|
|
# FIXME : put this in attribute in the context
|
|
|
|
|
return houses_file['houses']
|
|
|
|
|
|
|
|
|
|
def _get_catalog(self, *args, **kwargs):
|
|
|
|
|
"catalog loader entry point"
|
|
|
|
|
# loading yaml defined catalogs
|
|
|
|
|
catalog = super()._get_catalog(*args, **kwargs)
|
|
|
|
|
# kedro.io.data_catalog.DataCatalog
|
|
|
|
|
# adding data sets
|
|
|
|
|
self.nodes_description = house_dataset_loader(catalog)
|
|
|
|
|
return catalog
|
|
|
|
|
|
|
|
|
|
def prepare_pipeline_creation(self):
|
|
|
|
|
return self.nodes_description
|
|
|
|
|
|
|
|
|
|
|