diff --git a/actes-princiers/conf/base/catalog.yml b/actes-princiers/conf/base/catalog.yml index c95f620..e3e110d 100644 --- a/actes-princiers/conf/base/catalog.yml +++ b/actes-princiers/conf/base/catalog.yml @@ -4,12 +4,12 @@ bourbon: type: actesdataset.XMLDataSetCollection housename: bourbon folderpath: data/01_raw/houses/bourbon + outputfolderpath: data/02_intermediate/houses/bourbon/xml -# FIXME change the path to data/02_intermediate/houses/bourbon/xml bourbon_content: type: actesdataset.XMLDataSetCollection housename: bourbon - folderpath: data/02_intermediate/houses/bourbon + folderpath: data/02_intermediate/houses/bourbon/xml #bourbon_json: # type: actesdataset.XMLDataSetCollection diff --git a/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py b/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py index ee9eb9a..147c04d 100755 --- a/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py +++ b/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py @@ -7,7 +7,7 @@ from actesdataset import EtreeXMLDataSet logger = logging.getLogger(__name__) -def parse_xml_collection(datasets: Dict[str, EtreeXMLDataSet], param: str) -> Dict[str, EtreeXMLDataSet]: +def parse_xml_collection(datasets: Dict[str, EtreeXMLDataSet]) -> Dict[str, EtreeXMLDataSet]: "node function entry point, performs batch processing" output_datasets = dict() for dataset_filenamestem, dataset in datasets.items(): @@ -18,6 +18,7 @@ def parse_xml_collection(datasets: Dict[str, EtreeXMLDataSet], param: str) -> Di logger.info(f"dataset {descr} loaded") output_source_doc = dataset.transform() # set dataset's output filepath +# output_filepath = _outputfolderpath output_filepath = dataset.filepath.replace("01_raw", "02_intermediate") output_xmldataset = EtreeXMLDataSet(output_filepath) # let's create subfolders now, if they don't exist diff --git a/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py b/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py index 9fa811f..4e5e9e0 100755 --- a/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py +++ b/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py @@ -8,19 +8,19 @@ def create_pipeline(**kwargs) -> Pipeline: [ node( func=parse_xml_collection, - inputs=["bourbon", "params:xsltstylesheet"], + inputs=["bourbon"], outputs="bourbon_content", name="bourbon_ds_collection", ), node( func=parse_xml_collection, - inputs=["berry", "params:xsltstylesheet"], + inputs=["berry"], outputs="berry_content", name="berry_ds_collection", ), node( func=parse_xml_collection, - inputs=["anjou", "params:xsltstylesheet"], + inputs=["anjou"], outputs="anjou_content", name="anjou_ds_collection", ), diff --git a/actes-princiers/src/actesdataset.py b/actes-princiers/src/actesdataset.py index 7680eec..7d7840a 100644 --- a/actes-princiers/src/actesdataset.py +++ b/actes-princiers/src/actesdataset.py @@ -1,6 +1,6 @@ import logging import json -from typing import Dict, Any +from typing import Dict, Any, Optional from pathlib import Path from lxml import etree @@ -121,9 +121,11 @@ class XMLDataSetCollection(AbstractDataSet): """ def __init__(self, housename: str, - folderpath: str) -> None: + folderpath: str, + outputfolderpath: Optional[str]=None) -> None: self._housename = housename self._folderpath = Path(folderpath) + self._outputfolderpath = outputfolderpath def _load(self) -> dict[str, EtreeXMLDataSet]: "kedro's API loader method"