|
|
|
|
@ -6,7 +6,7 @@ from kedro.framework.session import KedroSession
|
|
|
|
|
|
|
|
|
|
from actesdataset import EtreeXMLDataSet, BsXMLDataSet, JSONDataSet
|
|
|
|
|
from actesdataset import (XMLDataSetCollection, BsXMLDataSetCollection,
|
|
|
|
|
JSONDataSetCollection)
|
|
|
|
|
JSONDataSetCollection, TextDataSetCollection)
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
@ -60,15 +60,29 @@ def make_json_collection(datasetcol: BsXMLDataSetCollection) -> JSONDataSetColle
|
|
|
|
|
output_datasets.datasets[dataset_filenamestem] = output_xmldataset
|
|
|
|
|
return output_datasets
|
|
|
|
|
|
|
|
|
|
#def add_xmlcontent_tojson(jsondoc: JSONDataSetCollection, xmlcontent: XMLDataSetCollection) -> Dict[str, JSONDataSet]:
|
|
|
|
|
def add_xmlcontent_tojson(jsondoc: JSONDataSetCollection, xmlcontent: TextDataSetCollection) -> JSONDataSetCollection:
|
|
|
|
|
"adds xmlcontent to the json"
|
|
|
|
|
jsondatasets = jsondoc.datasets
|
|
|
|
|
housename = jsondoc._housename
|
|
|
|
|
output_datasets = context.catalog.load(housename + '_fulljsonoutput')
|
|
|
|
|
outputfolderpath = output_datasets._folderpath
|
|
|
|
|
xmldatasets = xmlcontent.datasets
|
|
|
|
|
for dataset_filenamestem, dataset in jsondatasets.items():
|
|
|
|
|
document = dataset._load()
|
|
|
|
|
output_filepath = outputfolderpath / Path(dataset_filenamestem).with_suffix(".json")
|
|
|
|
|
output_xmldataset = JSONDataSet(str(output_filepath))
|
|
|
|
|
# json dict update with xmlcontent
|
|
|
|
|
if dataset_filenamestem in xmldatasets:
|
|
|
|
|
xmlds = xmldatasets[dataset_filenamestem]
|
|
|
|
|
# xmlds._load()
|
|
|
|
|
document['xmlcontent'] = xmldatasets[dataset_filenamestem]._load()
|
|
|
|
|
else:
|
|
|
|
|
raise KeyError(f"xmlcontent datasets does not have the key : {dataset_filenamestem}")
|
|
|
|
|
# let's create subfolders, if they don't exist
|
|
|
|
|
output_xmldataset_dir = output_filepath.parent
|
|
|
|
|
output_xmldataset_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
# save on file
|
|
|
|
|
output_xmldataset._save(document)
|
|
|
|
|
output_datasets.datasets[dataset_filenamestem] = output_xmldataset
|
|
|
|
|
return output_datasets
|
|
|
|
|
|
|
|
|
|
# logger.info("9999999999999999999999" + str(xmlcontent.datasets.keys()))
|
|
|
|
|
# json_datasets = jsondoc.datasets
|
|
|
|
|
## xmlcontent._load()
|
|
|
|
|
# logger.info(str(xmlcontent))
|
|
|
|
|
# xmlcontent = xmlcontent.datasets
|
|
|
|
|
# for dataset_filenamestem, dataset in json_datasets.items():
|
|
|
|
|
# document = dataset._load()
|
|
|
|
|
# document['xmlcontent'] = xmlcontent[dataset_filenamestem].source_doc
|
|
|
|
|
# return json_datasets
|
|
|
|
|
#
|
|
|
|
|
|