source_doc ok

develop
gwen 3 years ago
parent 94759a4820
commit 47f19eb93c

@ -12,18 +12,32 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 2,
"id": "ae9bc24c", "id": "ae9bc24c",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/html": [ "text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">kedro.io.data_catalog.DataCatalog</span><span style=\"color: #000000; text-decoration-color: #000000\"> object at </span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0x7f2d5be6e740</span><span style=\"font-weight: bold\">&gt;</span>\n", "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">╭─────────────────────────────── </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">Traceback </span><span style=\"color: #bf7f7f; text-decoration-color: #bf7f7f; font-weight: bold\">(most recent call last)</span><span style=\"color: #800000; text-decoration-color: #800000\"> ────────────────────────────────╮</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">&lt;module&gt;</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1 catalog <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2 </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">dir</span>(catalog) <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3 </span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">╰──────────────────────────────────────────────────────────────────────────────────────────────────╯</span>\n",
"<span style=\"color: #ff0000; text-decoration-color: #ff0000; font-weight: bold\">NameError: </span>name <span style=\"color: #008000; text-decoration-color: #008000\">'catalog'</span> is not defined\n",
"</pre>\n" "</pre>\n"
], ],
"text/plain": [ "text/plain": [
"\u001b[1m<\u001b[0m\u001b[1;95mkedro.io.data_catalog.DataCatalog\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x7f2d5be6e740\u001b[0m\u001b[1m>\u001b[0m\n" "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n",
"\u001b[31m│\u001b[0m in \u001b[92m<module>\u001b[0m:\u001b[94m1\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1 catalog \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m2 \u001b[0m\u001b[96mdir\u001b[0m(catalog) \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m3 \u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n",
"\u001b[1;91mNameError: \u001b[0mname \u001b[32m'catalog'\u001b[0m is not defined\n"
] ]
}, },
"metadata": {}, "metadata": {},
@ -31,7 +45,8 @@
} }
], ],
"source": [ "source": [
"catalog" "catalog\n",
"dir(catalog)"
] ]
}, },
{ {
@ -597,7 +612,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.6" "version": "3.9.2"
} }
}, },
"nbformat": 4, "nbformat": 4,

@ -7,6 +7,10 @@ class ProjectContext(KedroContext):
project_version = "0.1" project_version = "0.1"
package_name = "actes_princiers" package_name = "actes_princiers"
def get_params(self):
houses = self.config_loader.get("params*")
return params
def get_houses(self): def get_houses(self):
"""loading from generic configuration file """loading from generic configuration file
(that is, the global houses `houses.yaml`)""" (that is, the global houses `houses.yaml`)"""

@ -2,20 +2,11 @@ import logging
from pathlib import Path from pathlib import Path
from typing import Dict from typing import Dict
from lxml import etree
from actesdataset import EtreeXMLDataSet from actesdataset import EtreeXMLDataSet
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def transform(source_doc: etree._ElementTree, xlststylesheet: str) -> str:
"performs XML transformation on each dataset"
xslt_doc = etree.parse(xlststylesheet)
xslt_transformer = etree.XSLT(xslt_doc)
return str(xslt_transformer(source_doc))
def parse_xml_collection(datasets: Dict[str, EtreeXMLDataSet], param: str) -> Dict[str, EtreeXMLDataSet]: def parse_xml_collection(datasets: Dict[str, EtreeXMLDataSet], param: str) -> Dict[str, EtreeXMLDataSet]:
"node function entry point, performs batch processing" "node function entry point, performs batch processing"
output_datasets = dict() output_datasets = dict()
@ -24,10 +15,8 @@ def parse_xml_collection(datasets: Dict[str, EtreeXMLDataSet], param: str) -> Di
# the dataset **is not** registered in kedro's catalog # the dataset **is not** registered in kedro's catalog
dataset._load() dataset._load()
descr = dataset._describe() descr = dataset._describe()
logger.info(f"dataset {descr} loaded") # logger.info(f"dataset {descr} loaded")
# logger.info(str(dataset._describe())) output_source_doc = dataset.transform()
# logger.info(dataset.source_doc)
output_source_doc = transform(dataset.get_source_doc(), param)
# set dataset's output filepath # set dataset's output filepath
output_filepath = dataset.filepath.replace("01_raw", "02_intermediate") output_filepath = dataset.filepath.replace("01_raw", "02_intermediate")
output_xmldataset = EtreeXMLDataSet(output_filepath) output_xmldataset = EtreeXMLDataSet(output_filepath)

@ -11,6 +11,19 @@ from kedro.framework.session import KedroSession
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with KedroSession.create() as session:
context = session.load_context()
xlststylesheet = context.params['xsltstylesheet']
#xlststylesheet = "templates/xsl/actes_princiers.xsl"
# FIXME make this function a classmethod ?
def _xslt(xsltstylesheet):
"performs XML transformation on each dataset"
xslt_doc = etree.parse(xlststylesheet)
xslt_transformer = etree.XSLT(xslt_doc)
return xslt_transformer
xslt_transformer = _xslt(xlststylesheet)
class XMLDataSet: class XMLDataSet:
"Abstract base class for an XML dataset loader" "Abstract base class for an XML dataset loader"
@ -18,28 +31,11 @@ class XMLDataSet:
def __init__(self, filepath: str) -> None: def __init__(self, filepath: str) -> None:
self._filepath = filepath self._filepath = filepath
# def _load(self):
# "kedro's API-like loader"
# pass
#
# def _save(self, data:str) -> None:
# "kedro's API-like saver"
# pass
@property @property
def filepath(self) -> str: def filepath(self) -> str:
"xml file's filename getters" "xml file's filename getters"
return self._filepath return self._filepath
# FIXME à supprimer MAIS alors il faut rapatrier transform()...
def get_source_doc(self) -> str:
"XML source_doc (xml as a string) getter"
if hasattr(self, 'source_doc'):
return self.source_doc
else:
attr_error_msg = str(self._describe())
raise AttributeError(f"XMLDataSet bject {attr_error_msg} has no attribute named : 'source_doc'")
def _describe(self) -> Dict[str, Any]: def _describe(self) -> Dict[str, Any]:
"kedro's API-like repr()" "kedro's API-like repr()"
return dict(filepath=self._filepath) return dict(filepath=self._filepath)
@ -63,7 +59,9 @@ class EtreeXMLDataSet(XMLDataSet):
"kedro's API-like saver" "kedro's API-like saver"
with open(self._filepath, 'w') as fhandle: with open(self._filepath, 'w') as fhandle:
fhandle.write(data) fhandle.write(data)
def transform(self):
return str(xslt_transformer(self.source_doc))
class XMLDataSetCollection(AbstractDataSet): class XMLDataSetCollection(AbstractDataSet):
"""Stores instances of ``XMLDataSet`` """Stores instances of ``XMLDataSet``

Loading…
Cancel
Save