|
|
|
|
@ -5,9 +5,8 @@ from typing import Dict
|
|
|
|
|
from kedro.framework.session import KedroSession
|
|
|
|
|
|
|
|
|
|
from actesdataset import EtreeXMLDataSet, BsXMLDataSet, JSONDataSet
|
|
|
|
|
from actesdataset import (XMLDataSetCollection, BsXMLDataSetCollection)
|
|
|
|
|
# JSONDataSetCollection)
|
|
|
|
|
# FullJSONDataSetCollection)
|
|
|
|
|
from actesdataset import (XMLDataSetCollection, BsXMLDataSetCollection,
|
|
|
|
|
JSONDataSetCollection)
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
@ -22,7 +21,6 @@ def parse_xml_collection(datasetcol: XMLDataSetCollection) -> XMLDataSetCollecti
|
|
|
|
|
# outputfolderpath = f"data/02_intermediate/houses/{housename}/xml"
|
|
|
|
|
output_catalog = catalog[housename + '_xmlcontent']
|
|
|
|
|
outputfolderpath = output_catalog['folderpath']
|
|
|
|
|
# output_datasets = dict()
|
|
|
|
|
output_datasets = XMLDataSetCollection(housename, str(outputfolderpath))
|
|
|
|
|
for dataset_filenamestem, dataset in datasets.items():
|
|
|
|
|
# a manual load is required here, because
|
|
|
|
|
@ -41,13 +39,13 @@ def parse_xml_collection(datasetcol: XMLDataSetCollection) -> XMLDataSetCollecti
|
|
|
|
|
return output_datasets
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def make_json_collection(datasetcol: BsXMLDataSetCollection) -> Dict[str, BsXMLDataSet]:
|
|
|
|
|
def make_json_collection(datasetcol: BsXMLDataSetCollection) -> JSONDataSetCollection:
|
|
|
|
|
"node function entry point, performs batch processing"
|
|
|
|
|
datasets = datasetcol.datasets
|
|
|
|
|
housename = datasetcol._housename
|
|
|
|
|
output_catalog = catalog[housename + '_jsonoutput']
|
|
|
|
|
outputfolderpath = output_catalog['folderpath']
|
|
|
|
|
output_datasets = dict()
|
|
|
|
|
output_datasets = JSONDataSetCollection(housename, str(outputfolderpath))
|
|
|
|
|
for dataset_filenamestem, dataset in datasets.items():
|
|
|
|
|
# a manual load is required here, because
|
|
|
|
|
# the dataset **is not** registered in kedro's catalog
|
|
|
|
|
@ -55,13 +53,13 @@ def make_json_collection(datasetcol: BsXMLDataSetCollection) -> Dict[str, BsXMLD
|
|
|
|
|
output_source_doc = dataset.transform()
|
|
|
|
|
# set dataset's output filepath
|
|
|
|
|
output_filepath = outputfolderpath / Path(dataset_filenamestem).with_suffix(".json")
|
|
|
|
|
output_xmldataset = BsXMLDataSet(str(output_filepath))
|
|
|
|
|
output_xmldataset = JSONDataSet(str(output_filepath))
|
|
|
|
|
# let's create subfolders, if they don't exist
|
|
|
|
|
output_xmldataset_dir = output_filepath.parent
|
|
|
|
|
output_xmldataset_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
# save on file
|
|
|
|
|
output_xmldataset._save(output_source_doc)
|
|
|
|
|
output_datasets[dataset_filenamestem] = output_xmldataset
|
|
|
|
|
output_datasets.datasets[dataset_filenamestem] = output_xmldataset
|
|
|
|
|
return output_datasets
|
|
|
|
|
|
|
|
|
|
#def add_xmlcontent_tojson(jsondoc: JSONDataSetCollection, xmlcontent: XMLDataSetCollection) -> Dict[str, JSONDataSet]:
|
|
|
|
|
|