source_doc ok

develop
gwen 3 years ago
parent 94759a4820
commit 47f19eb93c

@ -12,18 +12,32 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"id": "ae9bc24c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">kedro.io.data_catalog.DataCatalog</span><span style=\"color: #000000; text-decoration-color: #000000\"> object at </span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0x7f2d5be6e740</span><span style=\"font-weight: bold\">&gt;</span>\n",
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">╭─────────────────────────────── </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">Traceback </span><span style=\"color: #bf7f7f; text-decoration-color: #bf7f7f; font-weight: bold\">(most recent call last)</span><span style=\"color: #800000; text-decoration-color: #800000\"> ────────────────────────────────╮</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">&lt;module&gt;</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1 catalog <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2 </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">dir</span>(catalog) <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3 </span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">╰──────────────────────────────────────────────────────────────────────────────────────────────────╯</span>\n",
"<span style=\"color: #ff0000; text-decoration-color: #ff0000; font-weight: bold\">NameError: </span>name <span style=\"color: #008000; text-decoration-color: #008000\">'catalog'</span> is not defined\n",
"</pre>\n"
],
"text/plain": [
"\u001b[1m<\u001b[0m\u001b[1;95mkedro.io.data_catalog.DataCatalog\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x7f2d5be6e740\u001b[0m\u001b[1m>\u001b[0m\n"
"\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n",
"\u001b[31m│\u001b[0m in \u001b[92m<module>\u001b[0m:\u001b[94m1\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1 catalog \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m2 \u001b[0m\u001b[96mdir\u001b[0m(catalog) \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m3 \u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n",
"\u001b[1;91mNameError: \u001b[0mname \u001b[32m'catalog'\u001b[0m is not defined\n"
]
},
"metadata": {},
@ -31,7 +45,8 @@
}
],
"source": [
"catalog"
"catalog\n",
"dir(catalog)"
]
},
{
@ -597,7 +612,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
"version": "3.9.2"
}
},
"nbformat": 4,

@ -7,6 +7,10 @@ class ProjectContext(KedroContext):
project_version = "0.1"
package_name = "actes_princiers"
def get_params(self):
houses = self.config_loader.get("params*")
return params
def get_houses(self):
"""loading from generic configuration file
(that is, the global houses `houses.yaml`)"""

@ -2,20 +2,11 @@ import logging
from pathlib import Path
from typing import Dict
from lxml import etree
from actesdataset import EtreeXMLDataSet
logger = logging.getLogger(__name__)
def transform(source_doc: etree._ElementTree, xlststylesheet: str) -> str:
"performs XML transformation on each dataset"
xslt_doc = etree.parse(xlststylesheet)
xslt_transformer = etree.XSLT(xslt_doc)
return str(xslt_transformer(source_doc))
def parse_xml_collection(datasets: Dict[str, EtreeXMLDataSet], param: str) -> Dict[str, EtreeXMLDataSet]:
"node function entry point, performs batch processing"
output_datasets = dict()
@ -24,10 +15,8 @@ def parse_xml_collection(datasets: Dict[str, EtreeXMLDataSet], param: str) -> Di
# the dataset **is not** registered in kedro's catalog
dataset._load()
descr = dataset._describe()
logger.info(f"dataset {descr} loaded")
# logger.info(str(dataset._describe()))
# logger.info(dataset.source_doc)
output_source_doc = transform(dataset.get_source_doc(), param)
# logger.info(f"dataset {descr} loaded")
output_source_doc = dataset.transform()
# set dataset's output filepath
output_filepath = dataset.filepath.replace("01_raw", "02_intermediate")
output_xmldataset = EtreeXMLDataSet(output_filepath)

@ -11,6 +11,19 @@ from kedro.framework.session import KedroSession
logger = logging.getLogger(__name__)
with KedroSession.create() as session:
context = session.load_context()
xlststylesheet = context.params['xsltstylesheet']
#xlststylesheet = "templates/xsl/actes_princiers.xsl"
# FIXME make this function a classmethod ?
def _xslt(xsltstylesheet):
"performs XML transformation on each dataset"
xslt_doc = etree.parse(xlststylesheet)
xslt_transformer = etree.XSLT(xslt_doc)
return xslt_transformer
xslt_transformer = _xslt(xlststylesheet)
class XMLDataSet:
"Abstract base class for an XML dataset loader"
@ -18,28 +31,11 @@ class XMLDataSet:
def __init__(self, filepath: str) -> None:
self._filepath = filepath
# def _load(self):
# "kedro's API-like loader"
# pass
#
# def _save(self, data:str) -> None:
# "kedro's API-like saver"
# pass
@property
def filepath(self) -> str:
"xml file's filename getters"
return self._filepath
# FIXME à supprimer MAIS alors il faut rapatrier transform()...
def get_source_doc(self) -> str:
"XML source_doc (xml as a string) getter"
if hasattr(self, 'source_doc'):
return self.source_doc
else:
attr_error_msg = str(self._describe())
raise AttributeError(f"XMLDataSet bject {attr_error_msg} has no attribute named : 'source_doc'")
def _describe(self) -> Dict[str, Any]:
"kedro's API-like repr()"
return dict(filepath=self._filepath)
@ -63,7 +59,9 @@ class EtreeXMLDataSet(XMLDataSet):
"kedro's API-like saver"
with open(self._filepath, 'w') as fhandle:
fhandle.write(data)
def transform(self):
return str(xslt_transformer(self.source_doc))
class XMLDataSetCollection(AbstractDataSet):
"""Stores instances of ``XMLDataSet``

Loading…
Cancel
Save