|
|
|
@ -23,15 +23,21 @@ def parse_xml_collection(datasets: Dict[str, EtreeXMLDataSet], param: str) -> Di
|
|
|
|
# a manual load is required here, because
|
|
|
|
# a manual load is required here, because
|
|
|
|
# the dataset **is not** registered in kedro's catalog
|
|
|
|
# the dataset **is not** registered in kedro's catalog
|
|
|
|
dataset._load()
|
|
|
|
dataset._load()
|
|
|
|
|
|
|
|
descr = dataset._describe()
|
|
|
|
|
|
|
|
logger.info(f"dataset {descr} loaded")
|
|
|
|
|
|
|
|
# logger.info(str(dataset._describe()))
|
|
|
|
|
|
|
|
# logger.info(dataset.source_doc)
|
|
|
|
output_source_doc = transform(dataset.get_source_doc(), param)
|
|
|
|
output_source_doc = transform(dataset.get_source_doc(), param)
|
|
|
|
# set dataset's output filepath
|
|
|
|
# set dataset's output filepath
|
|
|
|
output_filepath = dataset.filepath.replace("01_raw", "02_intermediate")
|
|
|
|
output_filepath = dataset.filepath.replace("01_raw", "02_intermediate")
|
|
|
|
output_xmldataset = EtreeXMLDataSet(output_filepath)
|
|
|
|
output_xmldataset = EtreeXMLDataSet(output_filepath)
|
|
|
|
output_xmldataset.set_source_doc(output_source_doc)
|
|
|
|
|
|
|
|
output_datasets[dataset_filenamestem] = output_xmldataset
|
|
|
|
|
|
|
|
# let's create subfolders now, if they don't exist
|
|
|
|
# let's create subfolders now, if they don't exist
|
|
|
|
output_filepath = Path(output_filepath)
|
|
|
|
output_filepath = Path(output_filepath)
|
|
|
|
output_xmldataset_dir = output_filepath.parent
|
|
|
|
output_xmldataset_dir = output_filepath.parent
|
|
|
|
output_xmldataset_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
output_xmldataset_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
output_xmldataset._save(output_source_doc)
|
|
|
|
|
|
|
|
output_datasets[dataset_filenamestem] = output_xmldataset
|
|
|
|
return output_datasets
|
|
|
|
return output_datasets
|
|
|
|
|
|
|
|
|
|
|
|
|