From 59ce8f1f1b3f88f1539c6964956691d4b91c6e56 Mon Sep 17 00:00:00 2001 From: gwen Date: Fri, 23 Jun 2023 18:17:14 +0200 Subject: [PATCH] tests pour traitement par lots --- .../docs/source/coding_standards.rst | 23 +++++++ actes-princiers/docs/source/index.rst | 2 +- .../src/actes_princiers/mycontext.py | 50 +++++++++++++++ .../pipelines/xml_processing/pipeline.py | 63 +++++++++---------- .../src/actes_princiers/settings.py | 3 +- actes-princiers/src/run.py | 13 +++- 6 files changed, 116 insertions(+), 38 deletions(-) create mode 100644 actes-princiers/docs/source/coding_standards.rst create mode 100644 actes-princiers/src/actes_princiers/mycontext.py diff --git a/actes-princiers/docs/source/coding_standards.rst b/actes-princiers/docs/source/coding_standards.rst new file mode 100644 index 0000000..e940bc4 --- /dev/null +++ b/actes-princiers/docs/source/coding_standards.rst @@ -0,0 +1,23 @@ +Coding Standards +==================== + +Import ordering +------------------- + +1. builtins imports +2. pip installed imports +3. framework imports +4. local project imports + +.. rubric:: Sample + +.. block-code:: python + + from typing import Dict + from pathlib import Path + + from kedro.framework.context import KedroContext, load_package_context + from kedro.pipeline import Pipeline + + from actes_princiers.pipeline_registry import register_pipelines + diff --git a/actes-princiers/docs/source/index.rst b/actes-princiers/docs/source/index.rst index cb06657..e0632c7 100644 --- a/actes-princiers/docs/source/index.rst +++ b/actes-princiers/docs/source/index.rst @@ -5,7 +5,7 @@ Documentation technique du projet Actes princiers :maxdepth: 1 data - + coding_standards Indices and tables ================== diff --git a/actes-princiers/src/actes_princiers/mycontext.py b/actes-princiers/src/actes_princiers/mycontext.py new file mode 100644 index 0000000..31dafb3 --- /dev/null +++ b/actes-princiers/src/actes_princiers/mycontext.py @@ -0,0 +1,50 @@ +from typing import Dict + +from kedro.framework.context import KedroContext +from kedro.pipeline import Pipeline + +from actesdataset import XMLDataSet + + +def catalog_factory(catalog): + input_catalog = catalog.load("load_full_xml_catalog") + #output_catalog = catalog.load("preprocess_full_catalog_html") + + for in_catalog_key, in_catalog_value in input_catalog.items(): + + # adding programmatically an input catalog entry + input_catalog_name = "load_full_xml_catalog" + in_catalog_key + # FIXME : à récuperer du catalogue Patitioned + input_filepath = "data/01_raw/xml/Anjou/" + in_catalog_key + ".xml" + catalog.add(data_set_name=input_catalog_name, + data_set=XMLDataSet(filepath=input_filepath), + replace=True) + # adding programmatically an output catalog entry + output_catalog_name = "preprocess_full_catalog_html" + in_catalog_key + # FIXME : à récuperer du catalogue Patitioned + output_filepath = "data/02_intermediate/xml/Anjou/" + in_catalog_key + ".html" + catalog.add(data_set_name=output_catalog_name, + data_set=XMLDataSet(filepath=output_filepath), + replace=True) + + +class ProjectContext(KedroContext): + project_name = "actes princiers" + project_version = "0.1" + package_name = "actes_princiers" + + def _get_pipelines(self) -> Dict[str, Pipeline]: +# return create_pipelines() + return register_pipelines() + + def _get_catalog(self, *args, **kwargs): + catalog = super()._get_catalog(*args, **kwargs) + catalog.add(data_set_name="mon_test_de_catalogue", + data_set=XMLDataSet( + filepath="data/02_intermediate/xml/Anjou/test.dat", + ), + replace=True, + ) + catalog_factory(catalog) + return catalog + diff --git a/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py b/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py index 8657e46..bc4ef28 100755 --- a/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py +++ b/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py @@ -1,15 +1,17 @@ from kedro.pipeline import Pipeline, node, pipeline from .nodes import parse_xsl -from actesdataset import XMLDataSet +#from actesdataset import XMLDataSet -from kedro.io import PartitionedDataSet +#from kedro.io import PartitionedDataSet -#from kedro.framework.session import KedroSession -#from kedro.context import KedroContext, load_context +from kedro.framework.session import KedroSession + +with KedroSession.create() as session: + context = session.load_context() + catalog = context.catalog + print("----------------------------") + print(catalog.list()) -#with KedroSession.create() as session: -# context = session.load_context() -# catalog = context.catalog #catalog.add(data_set_name="mon_test_de_catalogue", # data_set=XMLDataSet( @@ -17,7 +19,6 @@ from kedro.io import PartitionedDataSet # ), # replace=True, # ) -#print(catalog.list()) #def from_dict(dico): @@ -74,21 +75,17 @@ def nodes_factory(): # adding programmatically an input catalog entry input_catalog_name = "load_full_xml_catalog" + in_catalog_key # FIXME : à récuperer du catalogue Patitioned - input_filepath = "data/01_raw/xml/Anjou/" + in_catalog_key + ".html" - catalog.add(data_set_name=input_catalog_name, - data_set=XMLDataSet(filepath=input_filepath), - replace=True) -# if input_catalog_name in catalog.list(): -# print("OK") -# else: -# print("NOK") +# input_filepath = "data/01_raw/xml/Anjou/" + in_catalog_key + ".html" +# catalog.add(data_set_name=input_catalog_name, +# data_set=XMLDataSet(filepath=input_filepath), +# replace=True) # adding programmatically an output catalog entry output_catalog_name = "preprocess_full_catalog_html" + in_catalog_key # FIXME : à récuperer du catalogue Patitioned - output_filepath = "data/02_intermediate/xml/Anjou/" + in_catalog_key + ".html" - catalog.add(data_set_name=output_catalog_name, - data_set=XMLDataSet(filepath=output_filepath), - replace=True) +# output_filepath = "data/02_intermediate/xml/Anjou/" + in_catalog_key + ".html" +# catalog.add(data_set_name=output_catalog_name, +# data_set=XMLDataSet(filepath=output_filepath), +# replace=True) # constructing the node programmatically nodes.append(node( func=parse_xsl, @@ -97,22 +94,20 @@ def nodes_factory(): name=in_catalog_key, tags="xsl", )) - # XXX -# context.catalog = catalog return nodes -#nodes = nodes_factory() +nodes = nodes_factory() def create_pipeline(**kwargs): - return pipeline( - [ - node( - func=parse_xsl, - inputs=["load_xml", "params:xlststylesheet"], - outputs="preprocess_html", - name="preprocess_html", - tags="xsl", - ), - ] - ) + return pipeline(nodes) +# [ +# node( +# func=parse_xsl, +# inputs=["load_xml", "params:xlststylesheet"], +# outputs="preprocess_html", +# name="preprocess_html", +# tags="xsl", +# ), +# ] +# ) diff --git a/actes-princiers/src/actes_princiers/settings.py b/actes-princiers/src/actes_princiers/settings.py index 4a59efb..ced9072 100644 --- a/actes-princiers/src/actes_princiers/settings.py +++ b/actes-princiers/src/actes_princiers/settings.py @@ -34,7 +34,8 @@ https://kedro.readthedocs.io/en/stable/kedro_project_setup/settings.html.""" # Class that manages Kedro's library components. # from kedro.framework.context import KedroContext -# CONTEXT_CLASS = KedroContext +from .mycontext import ProjectContext +CONTEXT_CLASS = ProjectContext # Class that manages the Data Catalog. # from kedro.io import DataCatalog diff --git a/actes-princiers/src/run.py b/actes-princiers/src/run.py index 5e256b0..1152c17 100644 --- a/actes-princiers/src/run.py +++ b/actes-princiers/src/run.py @@ -7,7 +7,8 @@ from kedro.framework.context import KedroContext, load_package_context from kedro.pipeline import Pipeline from actes_princiers.pipeline_registry import register_pipelines -#bnhm.pipeline import create_pipelines +from actes_princiers.actesdataset import XMLDataSet + class ProjectContext(KedroContext): @@ -21,13 +22,21 @@ class ProjectContext(KedroContext): def _get_catalog(self, *args, **kwargs): catalog = super()._get_catalog(*args, **kwargs) + catalog.add(data_set_name="mon_test_de_catalogue", + data_set=XMLDataSet( + filepath="data/02_intermediate/xml/Anjou/test.dat", + ), + replace=True, + ) + return catalog def run_package(): # Entry point for running a Kedro project packaged with `kedro package` # using `python -m .run` command. project_context = load_package_context( - project_path=Path.cwd(), package_name=Path(__file__).resolve().parent.name + project_path=Path.cwd(), + package_name=Path(__file__).resolve().parent.name ) project_context.run()