|
|
|
|
@ -2,16 +2,21 @@ import logging
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from typing import Dict
|
|
|
|
|
|
|
|
|
|
from kedro.framework.session import KedroSession
|
|
|
|
|
|
|
|
|
|
from actesdataset import EtreeXMLDataSet, XMLDataSetCollection
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
with KedroSession.create() as session:
|
|
|
|
|
context = session.load_context()
|
|
|
|
|
catalog = context.get_catalog()
|
|
|
|
|
# bourbon = catalog['bourbon_xmlcontent']
|
|
|
|
|
# logger.info("+++++++++++++++++++" + bourbon['folderpath'])
|
|
|
|
|
outputfolderpath = catalog['bourbon_xmlcontent']['folderpath']
|
|
|
|
|
|
|
|
|
|
def parse_xml_collection(datasetcollection: XMLDataSetCollection) -> Dict[str, EtreeXMLDataSet]:
|
|
|
|
|
def parse_xml_collection(datasets: Dict[str, EtreeXMLDataSet]) -> Dict[str, EtreeXMLDataSet]:
|
|
|
|
|
"node function entry point, performs batch processing"
|
|
|
|
|
# collection mapping
|
|
|
|
|
datasets = datasetcollection.datasets
|
|
|
|
|
outputfolderpath = datasetcollection.outputfolderpath
|
|
|
|
|
output_datasets = dict()
|
|
|
|
|
for dataset_filenamestem, dataset in datasets.items():
|
|
|
|
|
# a manual load is required here, because
|
|
|
|
|
|