From c8a3cb16f11705560c43c116214577ce8df48ff2 Mon Sep 17 00:00:00 2001 From: gwen Date: Thu, 6 Jul 2023 17:59:28 +0200 Subject: [PATCH] bourbon --- actes-princiers/conf/base/catalog.yml | 44 +++++++++---------- .../pipelines/xml_processing/nodes.py | 13 +++--- .../pipelines/xml_processing/pipeline.py | 28 ++++++------ actes-princiers/src/actesdataset.py | 8 ++-- 4 files changed, 47 insertions(+), 46 deletions(-) diff --git a/actes-princiers/conf/base/catalog.yml b/actes-princiers/conf/base/catalog.yml index f410e62..8d305a3 100644 --- a/actes-princiers/conf/base/catalog.yml +++ b/actes-princiers/conf/base/catalog.yml @@ -10,32 +10,32 @@ bourbon_xmlcontent: housename: bourbon folderpath: data/02_intermediate/houses/bourbon/xml -#bourbon_json: -# type: actesdataset.XMLDataSetCollection -# housename: bourbon -# folderpath: data/02_intermediate/houses/bourbon/json +bourbon_json: + type: actesdataset.XMLDataSetCollection + housename: bourbon + folderpath: data/02_intermediate/houses/bourbon/json -# ________________________________________________________________________ +## ________________________________________________________________________ -berry: - type: actesdataset.XMLDataSetCollection - housename: berry - folderpath: data/01_raw/houses/berry +#berry: +# type: actesdataset.XMLDataSetCollection +# housename: berry +# folderpath: data/01_raw/houses/berry -berry_xmlcontent: - type: actesdataset.XMLDataSetCollection - housename: berry - folderpath: data/02_intermediate/houses/berry/xml +#berry_xmlcontent: +# type: actesdataset.XMLDataSetCollection +# housename: berry +# folderpath: data/02_intermediate/houses/berry/xml -# ________________________________________________________________________ +## ________________________________________________________________________ -anjou: - type: actesdataset.XMLDataSetCollection - housename: berry - folderpath: data/01_raw/houses/anjou +#anjou: +# type: actesdataset.XMLDataSetCollection +# housename: berry +# folderpath: data/01_raw/houses/anjou -anjou_xmlcontent: - type: actesdataset.XMLDataSetCollection - housename: berry - folderpath: data/02_intermediate/houses/anjou/xml +#anjou_xmlcontent: +# type: actesdataset.XMLDataSetCollection +# housename: berry +# folderpath: data/02_intermediate/houses/anjou/xml diff --git a/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py b/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py index d1330ef..0736d03 100755 --- a/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py +++ b/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py @@ -8,16 +8,17 @@ from actesdataset import EtreeXMLDataSet, XMLDataSetCollection logger = logging.getLogger(__name__) -with KedroSession.create() as session: - context = session.load_context() - catalog = context.get_catalog() +#with KedroSession.create() as session: +# context = session.load_context() +# catalog = context.get_catalog() def parse_xml_collection(datasetcol: XMLDataSetCollection) -> Dict[str, EtreeXMLDataSet]: "node function entry point, performs batch processing" datasets = datasetcol.datasets - housename = datasetcol.housename - output_catalog = catalog[housename + '_xmlcontent'] - outputfolderpath = output_catalog['folderpath'] + housename = datasetcol._housename + outputfolderpath = f"data/02_intermediate/houses/{housename}/xml" +# output_catalog = catalog[housename + '_xmlcontent'] +# outputfolderpath = output_catalog['folderpath'] output_datasets = dict() for dataset_filenamestem, dataset in datasets.items(): # a manual load is required here, because diff --git a/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py b/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py index f2c3c3f..a5b8313 100755 --- a/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py +++ b/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py @@ -9,22 +9,22 @@ def create_pipeline(**kwargs) -> Pipeline: [ node( func=parse_xml_collection, - inputs=["bourbon"], - outputs="bourbon_xmlcontent", + inputs="bourbon", + outputs=None, #"bourbon_xmlcontent", name="bourbon_ds_collection", ), - node( - func=parse_xml_collection, - inputs=["berry"], - outputs="berry_xmlcontent", - name="berry_ds_collection", - ), - node( - func=parse_xml_collection, - inputs=["anjou"], - outputs="anjou_xmlcontent", - name="anjou_ds_collection", - ), +# node( +# func=parse_xml_collection, +# inputs="berry", +# outputs=None, #"berry_xmlcontent", +# name="berry_ds_collection", +# ), +# node( +# func=parse_xml_collection, +# inputs="anjou", +# outputs=None, # "anjou_xmlcontent", +# name="anjou_ds_collection", +# ), ] ) diff --git a/actes-princiers/src/actesdataset.py b/actes-princiers/src/actesdataset.py index 8f2d135..ce73e00 100644 --- a/actes-princiers/src/actesdataset.py +++ b/actes-princiers/src/actesdataset.py @@ -122,9 +122,9 @@ class XMLDataSetCollection(AbstractDataSet): def __init__(self, housename: str, folderpath: str) -> None: - self.housename = housename + self._housename = housename self._folderpath = Path(folderpath) - + def _load(self) -> dict[str, EtreeXMLDataSet]: "kedro's API loader method" self.datasets = dict() @@ -133,7 +133,7 @@ class XMLDataSetCollection(AbstractDataSet): filepath=str(filepath)) # return self.datasets return self - + def _save(self, data) -> None: """kedro's API saver method @@ -145,7 +145,7 @@ class XMLDataSetCollection(AbstractDataSet): def _describe(self) -> dict[str, Any]: "kedro's API repr()" - return dict(name=self.housename, folderpath=self._folderpath) + return dict(name=self._housename, folderpath=self._folderpath) #class TextDataSet: # """loads/saves data from/to a text file using an underlying filesystem