develop
gwen 3 years ago
parent 73e8524fd1
commit c8a3cb16f1

@ -10,32 +10,32 @@ bourbon_xmlcontent:
housename: bourbon housename: bourbon
folderpath: data/02_intermediate/houses/bourbon/xml folderpath: data/02_intermediate/houses/bourbon/xml
#bourbon_json: bourbon_json:
# type: actesdataset.XMLDataSetCollection type: actesdataset.XMLDataSetCollection
# housename: bourbon housename: bourbon
# folderpath: data/02_intermediate/houses/bourbon/json folderpath: data/02_intermediate/houses/bourbon/json
# ________________________________________________________________________ ## ________________________________________________________________________
berry: #berry:
type: actesdataset.XMLDataSetCollection # type: actesdataset.XMLDataSetCollection
housename: berry # housename: berry
folderpath: data/01_raw/houses/berry # folderpath: data/01_raw/houses/berry
berry_xmlcontent: #berry_xmlcontent:
type: actesdataset.XMLDataSetCollection # type: actesdataset.XMLDataSetCollection
housename: berry # housename: berry
folderpath: data/02_intermediate/houses/berry/xml # folderpath: data/02_intermediate/houses/berry/xml
# ________________________________________________________________________ ## ________________________________________________________________________
anjou: #anjou:
type: actesdataset.XMLDataSetCollection # type: actesdataset.XMLDataSetCollection
housename: berry # housename: berry
folderpath: data/01_raw/houses/anjou # folderpath: data/01_raw/houses/anjou
anjou_xmlcontent: #anjou_xmlcontent:
type: actesdataset.XMLDataSetCollection # type: actesdataset.XMLDataSetCollection
housename: berry # housename: berry
folderpath: data/02_intermediate/houses/anjou/xml # folderpath: data/02_intermediate/houses/anjou/xml

@ -8,16 +8,17 @@ from actesdataset import EtreeXMLDataSet, XMLDataSetCollection
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
with KedroSession.create() as session: #with KedroSession.create() as session:
context = session.load_context() # context = session.load_context()
catalog = context.get_catalog() # catalog = context.get_catalog()
def parse_xml_collection(datasetcol: XMLDataSetCollection) -> Dict[str, EtreeXMLDataSet]: def parse_xml_collection(datasetcol: XMLDataSetCollection) -> Dict[str, EtreeXMLDataSet]:
"node function entry point, performs batch processing" "node function entry point, performs batch processing"
datasets = datasetcol.datasets datasets = datasetcol.datasets
housename = datasetcol.housename housename = datasetcol._housename
output_catalog = catalog[housename + '_xmlcontent'] outputfolderpath = f"data/02_intermediate/houses/{housename}/xml"
outputfolderpath = output_catalog['folderpath'] # output_catalog = catalog[housename + '_xmlcontent']
# outputfolderpath = output_catalog['folderpath']
output_datasets = dict() output_datasets = dict()
for dataset_filenamestem, dataset in datasets.items(): for dataset_filenamestem, dataset in datasets.items():
# a manual load is required here, because # a manual load is required here, because

@ -9,22 +9,22 @@ def create_pipeline(**kwargs) -> Pipeline:
[ [
node( node(
func=parse_xml_collection, func=parse_xml_collection,
inputs=["bourbon"], inputs="bourbon",
outputs="bourbon_xmlcontent", outputs=None, #"bourbon_xmlcontent",
name="bourbon_ds_collection", name="bourbon_ds_collection",
), ),
node( # node(
func=parse_xml_collection, # func=parse_xml_collection,
inputs=["berry"], # inputs="berry",
outputs="berry_xmlcontent", # outputs=None, #"berry_xmlcontent",
name="berry_ds_collection", # name="berry_ds_collection",
), # ),
node( # node(
func=parse_xml_collection, # func=parse_xml_collection,
inputs=["anjou"], # inputs="anjou",
outputs="anjou_xmlcontent", # outputs=None, # "anjou_xmlcontent",
name="anjou_ds_collection", # name="anjou_ds_collection",
), # ),
] ]
) )

@ -122,9 +122,9 @@ class XMLDataSetCollection(AbstractDataSet):
def __init__(self, def __init__(self,
housename: str, housename: str,
folderpath: str) -> None: folderpath: str) -> None:
self.housename = housename self._housename = housename
self._folderpath = Path(folderpath) self._folderpath = Path(folderpath)
def _load(self) -> dict[str, EtreeXMLDataSet]: def _load(self) -> dict[str, EtreeXMLDataSet]:
"kedro's API loader method" "kedro's API loader method"
self.datasets = dict() self.datasets = dict()
@ -133,7 +133,7 @@ class XMLDataSetCollection(AbstractDataSet):
filepath=str(filepath)) filepath=str(filepath))
# return self.datasets # return self.datasets
return self return self
def _save(self, data) -> None: def _save(self, data) -> None:
"""kedro's API saver method """kedro's API saver method
@ -145,7 +145,7 @@ class XMLDataSetCollection(AbstractDataSet):
def _describe(self) -> dict[str, Any]: def _describe(self) -> dict[str, Any]:
"kedro's API repr()" "kedro's API repr()"
return dict(name=self.housename, folderpath=self._folderpath) return dict(name=self._housename, folderpath=self._folderpath)
#class TextDataSet: #class TextDataSet:
# """loads/saves data from/to a text file using an underlying filesystem # """loads/saves data from/to a text file using an underlying filesystem

Loading…
Cancel
Save