cleaning

3 years ago · 0a5af99004
parent e06f83a7f5
commit 0a5af99004
2 changed files with 16 additions and 14 deletions
--- a/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py
+++ b/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py
@ -23,15 +23,21 @@ def parse_xml_collection(datasets: Dict[str, EtreeXMLDataSet], param: str) -> Di
        # a manual load is required here, because
        # the dataset **is not** registered in kedro's catalog
        dataset._load()
        descr = dataset._describe()
        logger.info(f"dataset {descr} loaded")
 #        logger.info(str(dataset._describe()))
 #        logger.info(dataset.source_doc)
        output_source_doc = transform(dataset.get_source_doc(), param)
        # set dataset's output filepath
        output_filepath = dataset.filepath.replace("01_raw", "02_intermediate")
        output_xmldataset = EtreeXMLDataSet(output_filepath)
-        output_xmldataset.set_source_doc(output_source_doc)
+
        output_datasets[dataset_filenamestem] = output_xmldataset
        # let's create subfolders now, if they don't exist
        output_filepath = Path(output_filepath)
        output_xmldataset_dir = output_filepath.parent
        output_xmldataset_dir.mkdir(parents=True, exist_ok=True)
        output_xmldataset._save(output_source_doc)
        output_datasets[dataset_filenamestem] = output_xmldataset
    return output_datasets
--- a/actes-princiers/src/actesdataset.py
+++ b/actes-princiers/src/actesdataset.py
@ -82,14 +82,6 @@ class XMLDataSetCollection(AbstractDataSet):
        folderpath: str) -> None:
        self._housename = housename
        self._folderpath = Path(folderpath)
    def get_datasets(self) -> Dict[str, Any]:
        "datasets mapper getter"
        if hasattr(self, 'datasets'):
            return self.datasets
        else:
            attr_error_msg = str(self._describe())
            raise AttributeError(f"Object {attr_error_msg} has no attribute named : 'datasets'")
    def _load(self) -> dict[str, EtreeXMLDataSet]:
        "kedro's API loader method"
@ -99,10 +91,14 @@ class XMLDataSetCollection(AbstractDataSet):
                filepath=str(filepath))
        return self.datasets
-    def _save(self, datasets: dict[str, Any]) -> None:
+    def _save(self, data) -> None:
-        "kedro's API saver method"
+        """kedro's API saver method
-        for stemfilename, dataset in datasets.items():
+        
-            dataset._save(dataset.get_source_doc())
+         There is **nothing to save**, because  
         this dataset collections is a *container* dataset.
         this method is here only because kedro requires it.
         """ 
        pass
    def _describe(self) -> dict[str, Any]:
        "kedro's API repr()"