From d695c8b8f0a1ae64941d0b5bf824c93b9f4533e4 Mon Sep 17 00:00:00 2001 From: gwen Date: Sat, 8 Jul 2023 15:38:56 +0200 Subject: [PATCH] abstractmethod --- .../pipelines/xml_processing/nodes.py | 18 +++++++++++++++--- .../pipelines/xml_processing/pipeline.py | 5 +++-- actes-princiers/src/actesdataset.py | 15 +++++++++++---- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py b/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py index ee01cab..934f977 100755 --- a/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py +++ b/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py @@ -4,8 +4,9 @@ from typing import Dict from kedro.framework.session import KedroSession -from actesdataset import EtreeXMLDataSet, BsXMLDataSet -from actesdataset import XMLDataSetCollection, JSONDataSetCollection +from actesdataset import EtreeXMLDataSet, BsXMLDataSet, JSONDataSet +from actesdataset import (XMLDataSetCollection, JSONDataSetCollection) +# FullJSONDataSetCollection) logger = logging.getLogger(__name__) @@ -61,4 +62,15 @@ def parse_json_collection(datasetcol: JSONDataSetCollection) -> Dict[str, BsXMLD output_datasets[dataset_filenamestem] = output_xmldataset return output_datasets -#def add_xmlcontent_tojson(jsondoc: JSONDataSetCollection, xmlcontent: XMLDataSetCollection) -> +#def add_xmlcontent_tojson(jsondoc: JSONDataSetCollection, xmlcontent: XMLDataSetCollection) -> Dict[str, JSONDataSet]: + +# logger.info("9999999999999999999999" + str(xmlcontent.datasets.keys())) +# json_datasets = jsondoc.datasets +## xmlcontent._load() +# logger.info(str(xmlcontent)) +# xmlcontent = xmlcontent.datasets +# for dataset_filenamestem, dataset in json_datasets.items(): +# document = dataset._load() +# document['xmlcontent'] = xmlcontent[dataset_filenamestem].source_doc +# return json_datasets +# diff --git a/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py b/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py index 04f94bc..dd76016 100755 --- a/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py +++ b/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py @@ -1,7 +1,8 @@ from kedro.pipeline import Pipeline, node, pipeline -from .nodes import parse_xml_collection, parse_json_collection +from .nodes import (parse_xml_collection, parse_json_collection) +# add_xmlcontent_tojson) def create_pipeline(**kwargs) -> Pipeline: @@ -21,7 +22,7 @@ def create_pipeline(**kwargs) -> Pipeline: ), # node( # func=add_xmlcontent_tojson, -# inputs=["bourbon_jsonoutput", "bourbon_xmlcontent"], +# inputs=["bourbon_json", "bourbon_xmlcontent"], # outputs="bourbon_fulljson", # name="bourbon_fulljson_ds_collection", # ), diff --git a/actes-princiers/src/actesdataset.py b/actes-princiers/src/actesdataset.py index b42163b..aba98f5 100644 --- a/actes-princiers/src/actesdataset.py +++ b/actes-princiers/src/actesdataset.py @@ -2,6 +2,8 @@ import logging import json from typing import Dict, Any from pathlib import Path +from abc import ABC, abstractmethod + from lxml import etree from bs4 import BeautifulSoup @@ -25,7 +27,7 @@ def _xslt(xsltstylesheet): xslt_transformer = _xslt(xlststylesheet) -class XMLDataSet: +class XMLDataSet(ABC): "Abstract base class for an XML dataset loader" def __init__(self, filepath: str) -> None: @@ -40,6 +42,12 @@ class XMLDataSet: "kedro's API-like repr()" return dict(filepath=self._filepath) + @abstractmethod + def _load(self): + pass + + def _save(self, data:str) -> None: + pass class EtreeXMLDataSet(XMLDataSet): "XMLDataSet loader with lxml.etree (lxml.etree._ElementTree)" @@ -159,7 +167,7 @@ class JSONDataSetCollection(DataSetCollection): return self -class JSONDataSet(AbstractDataSet): +class JSONDataSet: #(AbstractDataSet): def __init__(self, filepath: str): self._filepath = filepath @@ -179,12 +187,11 @@ class FullJSONDataSetCollection(DataSetCollection): def _load(self) -> dict[str, JSONDataSet]: "kedro's API loader method" self.datasets = dict() - for filepath in sorted(self._folderpath.glob("*.xml")): + for filepath in sorted(self._folderpath.glob("*.json")): self.datasets[filepath.stem] = JSONDataSet( filepath=str(filepath)) return self - #class TextDataSet: # """loads/saves data from/to a text file using an underlying filesystem # example usage