|
|
|
@ -2,6 +2,8 @@ import logging
|
|
|
|
import json
|
|
|
|
import json
|
|
|
|
from typing import Dict, Any
|
|
|
|
from typing import Dict, Any
|
|
|
|
from pathlib import Path
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
from abc import ABC, abstractmethod
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from lxml import etree
|
|
|
|
from lxml import etree
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
@ -25,7 +27,7 @@ def _xslt(xsltstylesheet):
|
|
|
|
|
|
|
|
|
|
|
|
xslt_transformer = _xslt(xlststylesheet)
|
|
|
|
xslt_transformer = _xslt(xlststylesheet)
|
|
|
|
|
|
|
|
|
|
|
|
class XMLDataSet:
|
|
|
|
class XMLDataSet(ABC):
|
|
|
|
"Abstract base class for an XML dataset loader"
|
|
|
|
"Abstract base class for an XML dataset loader"
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, filepath: str) -> None:
|
|
|
|
def __init__(self, filepath: str) -> None:
|
|
|
|
@ -40,6 +42,12 @@ class XMLDataSet:
|
|
|
|
"kedro's API-like repr()"
|
|
|
|
"kedro's API-like repr()"
|
|
|
|
return dict(filepath=self._filepath)
|
|
|
|
return dict(filepath=self._filepath)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
|
|
|
def _load(self):
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _save(self, data:str) -> None:
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
class EtreeXMLDataSet(XMLDataSet):
|
|
|
|
class EtreeXMLDataSet(XMLDataSet):
|
|
|
|
"XMLDataSet loader with lxml.etree (lxml.etree._ElementTree)"
|
|
|
|
"XMLDataSet loader with lxml.etree (lxml.etree._ElementTree)"
|
|
|
|
@ -159,7 +167,7 @@ class JSONDataSetCollection(DataSetCollection):
|
|
|
|
return self
|
|
|
|
return self
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class JSONDataSet(AbstractDataSet):
|
|
|
|
class JSONDataSet: #(AbstractDataSet):
|
|
|
|
def __init__(self, filepath: str):
|
|
|
|
def __init__(self, filepath: str):
|
|
|
|
self._filepath = filepath
|
|
|
|
self._filepath = filepath
|
|
|
|
|
|
|
|
|
|
|
|
@ -179,12 +187,11 @@ class FullJSONDataSetCollection(DataSetCollection):
|
|
|
|
def _load(self) -> dict[str, JSONDataSet]:
|
|
|
|
def _load(self) -> dict[str, JSONDataSet]:
|
|
|
|
"kedro's API loader method"
|
|
|
|
"kedro's API loader method"
|
|
|
|
self.datasets = dict()
|
|
|
|
self.datasets = dict()
|
|
|
|
for filepath in sorted(self._folderpath.glob("*.xml")):
|
|
|
|
for filepath in sorted(self._folderpath.glob("*.json")):
|
|
|
|
self.datasets[filepath.stem] = JSONDataSet(
|
|
|
|
self.datasets[filepath.stem] = JSONDataSet(
|
|
|
|
filepath=str(filepath))
|
|
|
|
filepath=str(filepath))
|
|
|
|
return self
|
|
|
|
return self
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#class TextDataSet:
|
|
|
|
#class TextDataSet:
|
|
|
|
# """loads/saves data from/to a text file using an underlying filesystem
|
|
|
|
# """loads/saves data from/to a text file using an underlying filesystem
|
|
|
|
# example usage
|
|
|
|
# example usage
|
|
|
|
|