develop
gwen 3 years ago
parent c8a3cb16f1
commit 021dcab8fb

@ -8,17 +8,17 @@ from actesdataset import EtreeXMLDataSet, XMLDataSetCollection
logger = logging.getLogger(__name__)
#with KedroSession.create() as session:
# context = session.load_context()
# catalog = context.get_catalog()
with KedroSession.create() as session:
context = session.load_context()
catalog = context.get_catalog()
def parse_xml_collection(datasetcol: XMLDataSetCollection) -> Dict[str, EtreeXMLDataSet]:
"node function entry point, performs batch processing"
datasets = datasetcol.datasets
housename = datasetcol._housename
outputfolderpath = f"data/02_intermediate/houses/{housename}/xml"
# output_catalog = catalog[housename + '_xmlcontent']
# outputfolderpath = output_catalog['folderpath']
# outputfolderpath = f"data/02_intermediate/houses/{housename}/xml"
output_catalog = catalog[housename + '_xmlcontent']
outputfolderpath = output_catalog['folderpath']
output_datasets = dict()
for dataset_filenamestem, dataset in datasets.items():
# a manual load is required here, because

@ -10,10 +10,16 @@ def create_pipeline(**kwargs) -> Pipeline:
node(
func=parse_xml_collection,
inputs="bourbon",
outputs=None, #"bourbon_xmlcontent",
outputs="bourbon_xmlcontent",
name="bourbon_ds_collection",
),
# node(
# func=parse_json_collection,
# inputs="bourbon",
# outputs="bourbon_json",
# name="bourbon_json_ds_collection",
# ),
# node(
# func=parse_xml_collection,
# inputs="berry",
# outputs=None, #"berry_xmlcontent",

Loading…
Cancel
Save