abstractmethod

develop
gwen 3 years ago
parent 6d0e19ae94
commit d695c8b8f0

@ -4,8 +4,9 @@ from typing import Dict
from kedro.framework.session import KedroSession from kedro.framework.session import KedroSession
from actesdataset import EtreeXMLDataSet, BsXMLDataSet from actesdataset import EtreeXMLDataSet, BsXMLDataSet, JSONDataSet
from actesdataset import XMLDataSetCollection, JSONDataSetCollection from actesdataset import (XMLDataSetCollection, JSONDataSetCollection)
# FullJSONDataSetCollection)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -61,4 +62,15 @@ def parse_json_collection(datasetcol: JSONDataSetCollection) -> Dict[str, BsXMLD
output_datasets[dataset_filenamestem] = output_xmldataset output_datasets[dataset_filenamestem] = output_xmldataset
return output_datasets return output_datasets
#def add_xmlcontent_tojson(jsondoc: JSONDataSetCollection, xmlcontent: XMLDataSetCollection) -> #def add_xmlcontent_tojson(jsondoc: JSONDataSetCollection, xmlcontent: XMLDataSetCollection) -> Dict[str, JSONDataSet]:
# logger.info("9999999999999999999999" + str(xmlcontent.datasets.keys()))
# json_datasets = jsondoc.datasets
## xmlcontent._load()
# logger.info(str(xmlcontent))
# xmlcontent = xmlcontent.datasets
# for dataset_filenamestem, dataset in json_datasets.items():
# document = dataset._load()
# document['xmlcontent'] = xmlcontent[dataset_filenamestem].source_doc
# return json_datasets
#

@ -1,7 +1,8 @@
from kedro.pipeline import Pipeline, node, pipeline from kedro.pipeline import Pipeline, node, pipeline
from .nodes import parse_xml_collection, parse_json_collection from .nodes import (parse_xml_collection, parse_json_collection)
# add_xmlcontent_tojson)
def create_pipeline(**kwargs) -> Pipeline: def create_pipeline(**kwargs) -> Pipeline:
@ -21,7 +22,7 @@ def create_pipeline(**kwargs) -> Pipeline:
), ),
# node( # node(
# func=add_xmlcontent_tojson, # func=add_xmlcontent_tojson,
# inputs=["bourbon_jsonoutput", "bourbon_xmlcontent"], # inputs=["bourbon_json", "bourbon_xmlcontent"],
# outputs="bourbon_fulljson", # outputs="bourbon_fulljson",
# name="bourbon_fulljson_ds_collection", # name="bourbon_fulljson_ds_collection",
# ), # ),

@ -2,6 +2,8 @@ import logging
import json import json
from typing import Dict, Any from typing import Dict, Any
from pathlib import Path from pathlib import Path
from abc import ABC, abstractmethod
from lxml import etree from lxml import etree
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -25,7 +27,7 @@ def _xslt(xsltstylesheet):
xslt_transformer = _xslt(xlststylesheet) xslt_transformer = _xslt(xlststylesheet)
class XMLDataSet: class XMLDataSet(ABC):
"Abstract base class for an XML dataset loader" "Abstract base class for an XML dataset loader"
def __init__(self, filepath: str) -> None: def __init__(self, filepath: str) -> None:
@ -40,6 +42,12 @@ class XMLDataSet:
"kedro's API-like repr()" "kedro's API-like repr()"
return dict(filepath=self._filepath) return dict(filepath=self._filepath)
@abstractmethod
def _load(self):
pass
def _save(self, data:str) -> None:
pass
class EtreeXMLDataSet(XMLDataSet): class EtreeXMLDataSet(XMLDataSet):
"XMLDataSet loader with lxml.etree (lxml.etree._ElementTree)" "XMLDataSet loader with lxml.etree (lxml.etree._ElementTree)"
@ -159,7 +167,7 @@ class JSONDataSetCollection(DataSetCollection):
return self return self
class JSONDataSet(AbstractDataSet): class JSONDataSet: #(AbstractDataSet):
def __init__(self, filepath: str): def __init__(self, filepath: str):
self._filepath = filepath self._filepath = filepath
@ -179,12 +187,11 @@ class FullJSONDataSetCollection(DataSetCollection):
def _load(self) -> dict[str, JSONDataSet]: def _load(self) -> dict[str, JSONDataSet]:
"kedro's API loader method" "kedro's API loader method"
self.datasets = dict() self.datasets = dict()
for filepath in sorted(self._folderpath.glob("*.xml")): for filepath in sorted(self._folderpath.glob("*.json")):
self.datasets[filepath.stem] = JSONDataSet( self.datasets[filepath.stem] = JSONDataSet(
filepath=str(filepath)) filepath=str(filepath))
return self return self
#class TextDataSet: #class TextDataSet:
# """loads/saves data from/to a text file using an underlying filesystem # """loads/saves data from/to a text file using an underlying filesystem
# example usage # example usage

Loading…
Cancel
Save