develop
gwen 3 years ago
parent 73e8524fd1
commit c8a3cb16f1

@ -10,32 +10,32 @@ bourbon_xmlcontent:
housename: bourbon
folderpath: data/02_intermediate/houses/bourbon/xml
#bourbon_json:
# type: actesdataset.XMLDataSetCollection
# housename: bourbon
# folderpath: data/02_intermediate/houses/bourbon/json
bourbon_json:
type: actesdataset.XMLDataSetCollection
housename: bourbon
folderpath: data/02_intermediate/houses/bourbon/json
# ________________________________________________________________________
## ________________________________________________________________________
berry:
type: actesdataset.XMLDataSetCollection
housename: berry
folderpath: data/01_raw/houses/berry
#berry:
# type: actesdataset.XMLDataSetCollection
# housename: berry
# folderpath: data/01_raw/houses/berry
berry_xmlcontent:
type: actesdataset.XMLDataSetCollection
housename: berry
folderpath: data/02_intermediate/houses/berry/xml
#berry_xmlcontent:
# type: actesdataset.XMLDataSetCollection
# housename: berry
# folderpath: data/02_intermediate/houses/berry/xml
# ________________________________________________________________________
## ________________________________________________________________________
anjou:
type: actesdataset.XMLDataSetCollection
housename: berry
folderpath: data/01_raw/houses/anjou
#anjou:
# type: actesdataset.XMLDataSetCollection
# housename: berry
# folderpath: data/01_raw/houses/anjou
anjou_xmlcontent:
type: actesdataset.XMLDataSetCollection
housename: berry
folderpath: data/02_intermediate/houses/anjou/xml
#anjou_xmlcontent:
# type: actesdataset.XMLDataSetCollection
# housename: berry
# folderpath: data/02_intermediate/houses/anjou/xml

@ -8,16 +8,17 @@ from actesdataset import EtreeXMLDataSet, XMLDataSetCollection
logger = logging.getLogger(__name__)
with KedroSession.create() as session:
context = session.load_context()
catalog = context.get_catalog()
#with KedroSession.create() as session:
# context = session.load_context()
# catalog = context.get_catalog()
def parse_xml_collection(datasetcol: XMLDataSetCollection) -> Dict[str, EtreeXMLDataSet]:
"node function entry point, performs batch processing"
datasets = datasetcol.datasets
housename = datasetcol.housename
output_catalog = catalog[housename + '_xmlcontent']
outputfolderpath = output_catalog['folderpath']
housename = datasetcol._housename
outputfolderpath = f"data/02_intermediate/houses/{housename}/xml"
# output_catalog = catalog[housename + '_xmlcontent']
# outputfolderpath = output_catalog['folderpath']
output_datasets = dict()
for dataset_filenamestem, dataset in datasets.items():
# a manual load is required here, because

@ -9,22 +9,22 @@ def create_pipeline(**kwargs) -> Pipeline:
[
node(
func=parse_xml_collection,
inputs=["bourbon"],
outputs="bourbon_xmlcontent",
inputs="bourbon",
outputs=None, #"bourbon_xmlcontent",
name="bourbon_ds_collection",
),
node(
func=parse_xml_collection,
inputs=["berry"],
outputs="berry_xmlcontent",
name="berry_ds_collection",
),
node(
func=parse_xml_collection,
inputs=["anjou"],
outputs="anjou_xmlcontent",
name="anjou_ds_collection",
),
# node(
# func=parse_xml_collection,
# inputs="berry",
# outputs=None, #"berry_xmlcontent",
# name="berry_ds_collection",
# ),
# node(
# func=parse_xml_collection,
# inputs="anjou",
# outputs=None, # "anjou_xmlcontent",
# name="anjou_ds_collection",
# ),
]
)

@ -122,7 +122,7 @@ class XMLDataSetCollection(AbstractDataSet):
def __init__(self,
housename: str,
folderpath: str) -> None:
self.housename = housename
self._housename = housename
self._folderpath = Path(folderpath)
def _load(self) -> dict[str, EtreeXMLDataSet]:
@ -145,7 +145,7 @@ class XMLDataSetCollection(AbstractDataSet):
def _describe(self) -> dict[str, Any]:
"kedro's API repr()"
return dict(name=self.housename, folderpath=self._folderpath)
return dict(name=self._housename, folderpath=self._folderpath)
#class TextDataSet:
# """loads/saves data from/to a text file using an underlying filesystem

Loading…
Cancel
Save