abstractmethod

develop
gwen 3 years ago
parent 6d0e19ae94
commit d695c8b8f0

@ -4,8 +4,9 @@ from typing import Dict
from kedro.framework.session import KedroSession
from actesdataset import EtreeXMLDataSet, BsXMLDataSet
from actesdataset import XMLDataSetCollection, JSONDataSetCollection
from actesdataset import EtreeXMLDataSet, BsXMLDataSet, JSONDataSet
from actesdataset import (XMLDataSetCollection, JSONDataSetCollection)
# FullJSONDataSetCollection)
logger = logging.getLogger(__name__)
@ -61,4 +62,15 @@ def parse_json_collection(datasetcol: JSONDataSetCollection) -> Dict[str, BsXMLD
output_datasets[dataset_filenamestem] = output_xmldataset
return output_datasets
#def add_xmlcontent_tojson(jsondoc: JSONDataSetCollection, xmlcontent: XMLDataSetCollection) ->
#def add_xmlcontent_tojson(jsondoc: JSONDataSetCollection, xmlcontent: XMLDataSetCollection) -> Dict[str, JSONDataSet]:
# logger.info("9999999999999999999999" + str(xmlcontent.datasets.keys()))
# json_datasets = jsondoc.datasets
## xmlcontent._load()
# logger.info(str(xmlcontent))
# xmlcontent = xmlcontent.datasets
# for dataset_filenamestem, dataset in json_datasets.items():
# document = dataset._load()
# document['xmlcontent'] = xmlcontent[dataset_filenamestem].source_doc
# return json_datasets
#

@ -1,7 +1,8 @@
from kedro.pipeline import Pipeline, node, pipeline
from .nodes import parse_xml_collection, parse_json_collection
from .nodes import (parse_xml_collection, parse_json_collection)
# add_xmlcontent_tojson)
def create_pipeline(**kwargs) -> Pipeline:
@ -21,7 +22,7 @@ def create_pipeline(**kwargs) -> Pipeline:
),
# node(
# func=add_xmlcontent_tojson,
# inputs=["bourbon_jsonoutput", "bourbon_xmlcontent"],
# inputs=["bourbon_json", "bourbon_xmlcontent"],
# outputs="bourbon_fulljson",
# name="bourbon_fulljson_ds_collection",
# ),

@ -2,6 +2,8 @@ import logging
import json
from typing import Dict, Any
from pathlib import Path
from abc import ABC, abstractmethod
from lxml import etree
from bs4 import BeautifulSoup
@ -25,7 +27,7 @@ def _xslt(xsltstylesheet):
xslt_transformer = _xslt(xlststylesheet)
class XMLDataSet:
class XMLDataSet(ABC):
"Abstract base class for an XML dataset loader"
def __init__(self, filepath: str) -> None:
@ -40,6 +42,12 @@ class XMLDataSet:
"kedro's API-like repr()"
return dict(filepath=self._filepath)
@abstractmethod
def _load(self):
pass
def _save(self, data:str) -> None:
pass
class EtreeXMLDataSet(XMLDataSet):
"XMLDataSet loader with lxml.etree (lxml.etree._ElementTree)"
@ -159,7 +167,7 @@ class JSONDataSetCollection(DataSetCollection):
return self
class JSONDataSet(AbstractDataSet):
class JSONDataSet: #(AbstractDataSet):
def __init__(self, filepath: str):
self._filepath = filepath
@ -179,12 +187,11 @@ class FullJSONDataSetCollection(DataSetCollection):
def _load(self) -> dict[str, JSONDataSet]:
"kedro's API loader method"
self.datasets = dict()
for filepath in sorted(self._folderpath.glob("*.xml")):
for filepath in sorted(self._folderpath.glob("*.json")):
self.datasets[filepath.stem] = JSONDataSet(
filepath=str(filepath))
return self
#class TextDataSet:
# """loads/saves data from/to a text file using an underlying filesystem
# example usage

Loading…
Cancel
Save