diff --git a/actes-princiers/notebooks/LoadDataCatalog.ipynb b/actes-princiers/notebooks/LoadDataCatalog.ipynb index eb5998a..608ed58 100644 --- a/actes-princiers/notebooks/LoadDataCatalog.ipynb +++ b/actes-princiers/notebooks/LoadDataCatalog.ipynb @@ -12,18 +12,32 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "ae9bc24c", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
<kedro.io.data_catalog.DataCatalog object at 0x7f2d5be6e740>\n",
+       "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
+       " in <module>:1                                                                                    \n",
+       "                                                                                                  \n",
+       " 1 catalog                                                                                      \n",
+       "   2 dir(catalog)                                                                                 \n",
+       "   3                                                                                              \n",
+       "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "NameError: name 'catalog' is not defined\n",
        "
\n" ], "text/plain": [ - "\u001b[1m<\u001b[0m\u001b[1;95mkedro.io.data_catalog.DataCatalog\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x7f2d5be6e740\u001b[0m\u001b[1m>\u001b[0m\n" + "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n", + "\u001b[31m│\u001b[0m in \u001b[92m\u001b[0m:\u001b[94m1\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1 catalog \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2 \u001b[0m\u001b[96mdir\u001b[0m(catalog) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m3 \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", + "\u001b[1;91mNameError: \u001b[0mname \u001b[32m'catalog'\u001b[0m is not defined\n" ] }, "metadata": {}, @@ -31,7 +45,8 @@ } ], "source": [ - "catalog" + "catalog\n", + "dir(catalog)" ] }, { @@ -597,7 +612,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.9.2" } }, "nbformat": 4, diff --git a/actes-princiers/src/actes_princiers/customcontext.py b/actes-princiers/src/actes_princiers/customcontext.py index 202a92b..6238cb7 100644 --- a/actes-princiers/src/actes_princiers/customcontext.py +++ b/actes-princiers/src/actes_princiers/customcontext.py @@ -7,6 +7,10 @@ class ProjectContext(KedroContext): project_version = "0.1" package_name = "actes_princiers" + def get_params(self): + houses = self.config_loader.get("params*") + return params + def get_houses(self): """loading from generic configuration file (that is, the global houses `houses.yaml`)""" diff --git a/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py b/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py index e6604e9..80e6a9e 100755 --- a/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py +++ b/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py @@ -2,20 +2,11 @@ import logging from pathlib import Path from typing import Dict -from lxml import etree - from actesdataset import EtreeXMLDataSet logger = logging.getLogger(__name__) -def transform(source_doc: etree._ElementTree, xlststylesheet: str) -> str: - "performs XML transformation on each dataset" - xslt_doc = etree.parse(xlststylesheet) - xslt_transformer = etree.XSLT(xslt_doc) - return str(xslt_transformer(source_doc)) - - def parse_xml_collection(datasets: Dict[str, EtreeXMLDataSet], param: str) -> Dict[str, EtreeXMLDataSet]: "node function entry point, performs batch processing" output_datasets = dict() @@ -24,10 +15,8 @@ def parse_xml_collection(datasets: Dict[str, EtreeXMLDataSet], param: str) -> Di # the dataset **is not** registered in kedro's catalog dataset._load() descr = dataset._describe() - logger.info(f"dataset {descr} loaded") -# logger.info(str(dataset._describe())) -# logger.info(dataset.source_doc) - output_source_doc = transform(dataset.get_source_doc(), param) +# logger.info(f"dataset {descr} loaded") + output_source_doc = dataset.transform() # set dataset's output filepath output_filepath = dataset.filepath.replace("01_raw", "02_intermediate") output_xmldataset = EtreeXMLDataSet(output_filepath) diff --git a/actes-princiers/src/actesdataset.py b/actes-princiers/src/actesdataset.py index 903ad52..38eb786 100644 --- a/actes-princiers/src/actesdataset.py +++ b/actes-princiers/src/actesdataset.py @@ -11,6 +11,19 @@ from kedro.framework.session import KedroSession logger = logging.getLogger(__name__) +with KedroSession.create() as session: + context = session.load_context() + xlststylesheet = context.params['xsltstylesheet'] + +#xlststylesheet = "templates/xsl/actes_princiers.xsl" +# FIXME make this function a classmethod ? +def _xslt(xsltstylesheet): + "performs XML transformation on each dataset" + xslt_doc = etree.parse(xlststylesheet) + xslt_transformer = etree.XSLT(xslt_doc) + return xslt_transformer + +xslt_transformer = _xslt(xlststylesheet) class XMLDataSet: "Abstract base class for an XML dataset loader" @@ -18,28 +31,11 @@ class XMLDataSet: def __init__(self, filepath: str) -> None: self._filepath = filepath -# def _load(self): -# "kedro's API-like loader" -# pass -# -# def _save(self, data:str) -> None: -# "kedro's API-like saver" -# pass - @property def filepath(self) -> str: "xml file's filename getters" return self._filepath - # FIXME à supprimer MAIS alors il faut rapatrier transform()... - def get_source_doc(self) -> str: - "XML source_doc (xml as a string) getter" - if hasattr(self, 'source_doc'): - return self.source_doc - else: - attr_error_msg = str(self._describe()) - raise AttributeError(f"XMLDataSet bject {attr_error_msg} has no attribute named : 'source_doc'") - def _describe(self) -> Dict[str, Any]: "kedro's API-like repr()" return dict(filepath=self._filepath) @@ -63,7 +59,9 @@ class EtreeXMLDataSet(XMLDataSet): "kedro's API-like saver" with open(self._filepath, 'w') as fhandle: fhandle.write(data) - + + def transform(self): + return str(xslt_transformer(self.source_doc)) class XMLDataSetCollection(AbstractDataSet): """Stores instances of ``XMLDataSet``