From ec3ce5791b2cf078c2ffea780393b8850aa21545 Mon Sep 17 00:00:00 2001 From: gwen Date: Thu, 29 Jun 2023 13:39:53 +0200 Subject: [PATCH] refactoring --- actes-princiers/conf/base/parameters.yml | 2 +- .../src/actes_princiers/customcontext.py | 61 +++++++++---------- .../xsl/actes_princiers.xsl | 0 3 files changed, 30 insertions(+), 33 deletions(-) rename actes-princiers/{static => templates}/xsl/actes_princiers.xsl (100%) diff --git a/actes-princiers/conf/base/parameters.yml b/actes-princiers/conf/base/parameters.yml index b1176c5..18d8ea3 100644 --- a/actes-princiers/conf/base/parameters.yml +++ b/actes-princiers/conf/base/parameters.yml @@ -1 +1 @@ -xlststylesheet: static/xsl/actes_princiers.xsl +xlststylesheet: templates/xsl/actes_princiers.xsl diff --git a/actes-princiers/src/actes_princiers/customcontext.py b/actes-princiers/src/actes_princiers/customcontext.py index b2c2fd4..f01f649 100644 --- a/actes-princiers/src/actes_princiers/customcontext.py +++ b/actes-princiers/src/actes_princiers/customcontext.py @@ -2,7 +2,6 @@ from pathlib import Path from typing import Dict from kedro.framework.context import KedroContext -#from kedro.pipeline import Pipeline from kedro.pipeline import Pipeline, node, pipeline from actesdataset import XMLDataSet @@ -14,36 +13,6 @@ def tree(directory, relative_to=None): trees[path.stem] = str(path.relative_to(relative_to)) return trees -def house_dataset_loader(catalog): - nodes_description = [] - # FIXME : a custom DataSet Catalog that lists - # input_catalog = catalog.load(house_name) - # FIXME : set root path from config, not here - # or make an autopath function helper - data_root_path = Path.cwd() / 'data' / '01_raw' / 'xml' - relative_to = Path.cwd() - for dataset_name, dataset_path in tree(data_root_path, relative_to=relative_to).items(): - # dataset_path : data/01_raw/xml/bourbon/brb_ch_i_1437_08_18a.xml - catalog.add(data_set_name=dataset_name, - data_set=XMLDataSet(filepath=dataset_path), - replace=True) - # adding an output catalog entry - output_dataset_name = dataset_name + "_output" - # FIXME : JE NE SUIS PAS SATISFAIT - output_dataset_path = Path(dataset_path.replace("01_raw", "02_intermediate")) - # let's create subfolders if they don't exist - output_dataset_dir = output_dataset_path.parent - output_dataset_dir.mkdir(parents=True, exist_ok=True) - catalog.add(data_set_name=output_dataset_name, - data_set=XMLDataSet(filepath=output_dataset_path), - replace=True) - # prepare information for the next stage (the pipeline stage) - node_description = dict( - inputs=dataset_name, - outputs=output_dataset_name, - name=dataset_name) - nodes_description.append(node_description) - return nodes_description class ProjectContext(KedroContext): project_name = "actes princiers" @@ -63,9 +32,37 @@ class ProjectContext(KedroContext): catalog = super()._get_catalog(*args, **kwargs) # kedro.io.data_catalog.DataCatalog # adding data sets - self.nodes_description = house_dataset_loader(catalog) + self.nodes_description = self._house_dataset_loader(catalog) return catalog + def _house_dataset_loader(self, catalog): + nodes_description = [] + # XXX : get root path from config, not here + data_root_path = Path.cwd() / 'data' / '01_raw' / 'xml' + relative_to = Path.cwd() + for dataset_name, dataset_path in tree(data_root_path, relative_to=relative_to).items(): + # dataset_path : data/01_raw/xml/bourbon/brb_ch_i_1437_08_18a.xml + catalog.add(data_set_name=dataset_name, + data_set=XMLDataSet(filepath=dataset_path), + replace=True) + # adding an output catalog entry + output_dataset_name = dataset_name + "_output" + # XXX : make better + output_dataset_path = Path(dataset_path.replace("01_raw", "02_intermediate")) + # let's create subfolders if they don't exist + output_dataset_dir = output_dataset_path.parent + output_dataset_dir.mkdir(parents=True, exist_ok=True) + catalog.add(data_set_name=output_dataset_name, + data_set=XMLDataSet(filepath=output_dataset_path), + replace=True) + # prepare information for the next stage (the pipeline stage) + node_description = dict( + inputs=dataset_name, + outputs=output_dataset_name, + name=dataset_name) + nodes_description.append(node_description) + return nodes_description + def prepare_pipeline_creation(self): return self.nodes_description diff --git a/actes-princiers/static/xsl/actes_princiers.xsl b/actes-princiers/templates/xsl/actes_princiers.xsl similarity index 100% rename from actes-princiers/static/xsl/actes_princiers.xsl rename to actes-princiers/templates/xsl/actes_princiers.xsl