develop
gwen 3 years ago
parent e06f83a7f5
commit 0a5af99004

@ -23,15 +23,21 @@ def parse_xml_collection(datasets: Dict[str, EtreeXMLDataSet], param: str) -> Di
# a manual load is required here, because # a manual load is required here, because
# the dataset **is not** registered in kedro's catalog # the dataset **is not** registered in kedro's catalog
dataset._load() dataset._load()
descr = dataset._describe()
logger.info(f"dataset {descr} loaded")
# logger.info(str(dataset._describe()))
# logger.info(dataset.source_doc)
output_source_doc = transform(dataset.get_source_doc(), param) output_source_doc = transform(dataset.get_source_doc(), param)
# set dataset's output filepath # set dataset's output filepath
output_filepath = dataset.filepath.replace("01_raw", "02_intermediate") output_filepath = dataset.filepath.replace("01_raw", "02_intermediate")
output_xmldataset = EtreeXMLDataSet(output_filepath) output_xmldataset = EtreeXMLDataSet(output_filepath)
output_xmldataset.set_source_doc(output_source_doc)
output_datasets[dataset_filenamestem] = output_xmldataset
# let's create subfolders now, if they don't exist # let's create subfolders now, if they don't exist
output_filepath = Path(output_filepath) output_filepath = Path(output_filepath)
output_xmldataset_dir = output_filepath.parent output_xmldataset_dir = output_filepath.parent
output_xmldataset_dir.mkdir(parents=True, exist_ok=True) output_xmldataset_dir.mkdir(parents=True, exist_ok=True)
output_xmldataset._save(output_source_doc)
output_datasets[dataset_filenamestem] = output_xmldataset
return output_datasets return output_datasets

@ -82,14 +82,6 @@ class XMLDataSetCollection(AbstractDataSet):
folderpath: str) -> None: folderpath: str) -> None:
self._housename = housename self._housename = housename
self._folderpath = Path(folderpath) self._folderpath = Path(folderpath)
def get_datasets(self) -> Dict[str, Any]:
"datasets mapper getter"
if hasattr(self, 'datasets'):
return self.datasets
else:
attr_error_msg = str(self._describe())
raise AttributeError(f"Object {attr_error_msg} has no attribute named : 'datasets'")
def _load(self) -> dict[str, EtreeXMLDataSet]: def _load(self) -> dict[str, EtreeXMLDataSet]:
"kedro's API loader method" "kedro's API loader method"
@ -99,10 +91,14 @@ class XMLDataSetCollection(AbstractDataSet):
filepath=str(filepath)) filepath=str(filepath))
return self.datasets return self.datasets
def _save(self, datasets: dict[str, Any]) -> None: def _save(self, data) -> None:
"kedro's API saver method" """kedro's API saver method
for stemfilename, dataset in datasets.items():
dataset._save(dataset.get_source_doc())  There is **nothing to save**, because
 this dataset collections is a *container* dataset.
this method is here only because kedro requires it.
 """
pass
def _describe(self) -> dict[str, Any]: def _describe(self) -> dict[str, Any]:
"kedro's API repr()" "kedro's API repr()"

Loading…
Cancel
Save