refactoring

develop
gwen 3 years ago
parent 2bf13c78be
commit ec3ce5791b

@ -1 +1 @@
xlststylesheet: static/xsl/actes_princiers.xsl
xlststylesheet: templates/xsl/actes_princiers.xsl

@ -2,7 +2,6 @@ from pathlib import Path
from typing import Dict
from kedro.framework.context import KedroContext
#from kedro.pipeline import Pipeline
from kedro.pipeline import Pipeline, node, pipeline
from actesdataset import XMLDataSet
@ -14,12 +13,31 @@ def tree(directory, relative_to=None):
trees[path.stem] = str(path.relative_to(relative_to))
return trees
def house_dataset_loader(catalog):
class ProjectContext(KedroContext):
project_name = "actes princiers"
project_version = "0.1"
package_name = "actes_princiers"
def get_houses_config(self):
"""loading from generic configuration file
(that is, the global houses `houses.yaml`)"""
houses_file = self.config_loader.get("houses*")
# FIXME : put this in attribute in the context
return houses_file['houses']
def _get_catalog(self, *args, **kwargs):
"catalog loader entry point"
# loading yaml defined catalogs
catalog = super()._get_catalog(*args, **kwargs)
# kedro.io.data_catalog.DataCatalog
# adding data sets
self.nodes_description = self._house_dataset_loader(catalog)
return catalog
def _house_dataset_loader(self, catalog):
nodes_description = []
# FIXME : a custom DataSet Catalog that lists
# input_catalog = catalog.load(house_name)
# FIXME : set root path from config, not here
# or make an autopath function helper
# XXX : get root path from config, not here
data_root_path = Path.cwd() / 'data' / '01_raw' / 'xml'
relative_to = Path.cwd()
for dataset_name, dataset_path in tree(data_root_path, relative_to=relative_to).items():
@ -29,7 +47,7 @@ def house_dataset_loader(catalog):
replace=True)
# adding an output catalog entry
output_dataset_name = dataset_name + "_output"
# FIXME : JE NE SUIS PAS SATISFAIT
# XXX : make better
output_dataset_path = Path(dataset_path.replace("01_raw", "02_intermediate"))
# let's create subfolders if they don't exist
output_dataset_dir = output_dataset_path.parent
@ -45,27 +63,6 @@ def house_dataset_loader(catalog):
nodes_description.append(node_description)
return nodes_description
class ProjectContext(KedroContext):
project_name = "actes princiers"
project_version = "0.1"
package_name = "actes_princiers"
def get_houses_config(self):
"""loading from generic configuration file
(that is, the global houses `houses.yaml`)"""
houses_file = self.config_loader.get("houses*")
# FIXME : put this in attribute in the context
return houses_file['houses']
def _get_catalog(self, *args, **kwargs):
"catalog loader entry point"
# loading yaml defined catalogs
catalog = super()._get_catalog(*args, **kwargs)
# kedro.io.data_catalog.DataCatalog
# adding data sets
self.nodes_description = house_dataset_loader(catalog)
return catalog
def prepare_pipeline_creation(self):
return self.nodes_description

Loading…
Cancel
Save