|
|
|
|
@ -12,16 +12,15 @@ logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
with KedroSession.create() as session:
|
|
|
|
|
context = session.load_context()
|
|
|
|
|
catalog = context.get_catalog()
|
|
|
|
|
# catalog = context.get_catalog() # FIXME : ça porte à confusion de renvoyer un dict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_xml_collection(datasetcol: XMLDataSetCollection) -> XMLDataSetCollection:
|
|
|
|
|
"node function entry point, performs batch processing"
|
|
|
|
|
datasets = datasetcol.datasets
|
|
|
|
|
housename = datasetcol._housename
|
|
|
|
|
# outputfolderpath = f"data/02_intermediate/houses/{housename}/xml"
|
|
|
|
|
output_catalog = catalog[housename + '_xmlcontent']
|
|
|
|
|
outputfolderpath = output_catalog['folderpath']
|
|
|
|
|
output_datasets = XMLDataSetCollection(housename, str(outputfolderpath))
|
|
|
|
|
output_datasets = context.catalog.load(housename + '_xmlcontent')
|
|
|
|
|
outputfolderpath = output_datasets._folderpath
|
|
|
|
|
for dataset_filenamestem, dataset in datasets.items():
|
|
|
|
|
# a manual load is required here, because
|
|
|
|
|
# the dataset **is not** registered in kedro's catalog
|
|
|
|
|
@ -43,9 +42,8 @@ def make_json_collection(datasetcol: BsXMLDataSetCollection) -> JSONDataSetColle
|
|
|
|
|
"node function entry point, performs batch processing"
|
|
|
|
|
datasets = datasetcol.datasets
|
|
|
|
|
housename = datasetcol._housename
|
|
|
|
|
output_catalog = catalog[housename + '_jsonoutput']
|
|
|
|
|
outputfolderpath = output_catalog['folderpath']
|
|
|
|
|
output_datasets = JSONDataSetCollection(housename, str(outputfolderpath))
|
|
|
|
|
output_datasets = context.catalog.load(housename + '_jsonoutput')
|
|
|
|
|
outputfolderpath = output_datasets._folderpath
|
|
|
|
|
for dataset_filenamestem, dataset in datasets.items():
|
|
|
|
|
# a manual load is required here, because
|
|
|
|
|
# the dataset **is not** registered in kedro's catalog
|
|
|
|
|
|