diff --git a/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py b/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py index 167245a..e6604e9 100755 --- a/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py +++ b/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py @@ -23,15 +23,21 @@ def parse_xml_collection(datasets: Dict[str, EtreeXMLDataSet], param: str) -> Di # a manual load is required here, because # the dataset **is not** registered in kedro's catalog dataset._load() + descr = dataset._describe() + logger.info(f"dataset {descr} loaded") +# logger.info(str(dataset._describe())) +# logger.info(dataset.source_doc) output_source_doc = transform(dataset.get_source_doc(), param) # set dataset's output filepath output_filepath = dataset.filepath.replace("01_raw", "02_intermediate") output_xmldataset = EtreeXMLDataSet(output_filepath) - output_xmldataset.set_source_doc(output_source_doc) - output_datasets[dataset_filenamestem] = output_xmldataset + # let's create subfolders now, if they don't exist output_filepath = Path(output_filepath) output_xmldataset_dir = output_filepath.parent output_xmldataset_dir.mkdir(parents=True, exist_ok=True) + + output_xmldataset._save(output_source_doc) + output_datasets[dataset_filenamestem] = output_xmldataset return output_datasets diff --git a/actes-princiers/src/actesdataset.py b/actes-princiers/src/actesdataset.py index bd425f4..bc0fe4e 100644 --- a/actes-princiers/src/actesdataset.py +++ b/actes-princiers/src/actesdataset.py @@ -82,14 +82,6 @@ class XMLDataSetCollection(AbstractDataSet): folderpath: str) -> None: self._housename = housename self._folderpath = Path(folderpath) - - def get_datasets(self) -> Dict[str, Any]: - "datasets mapper getter" - if hasattr(self, 'datasets'): - return self.datasets - else: - attr_error_msg = str(self._describe()) - raise AttributeError(f"Object {attr_error_msg} has no attribute named : 'datasets'") def _load(self) -> dict[str, EtreeXMLDataSet]: "kedro's API loader method" @@ -99,10 +91,14 @@ class XMLDataSetCollection(AbstractDataSet): filepath=str(filepath)) return self.datasets - def _save(self, datasets: dict[str, Any]) -> None: - "kedro's API saver method" - for stemfilename, dataset in datasets.items(): - dataset._save(dataset.get_source_doc()) + def _save(self, data) -> None: + """kedro's API saver method + +  There is **nothing to save**, because +  this dataset collections is a *container* dataset. + this method is here only because kedro requires it. +  """ + pass def _describe(self) -> dict[str, Any]: "kedro's API repr()"