tests pour traitement par lots

develop
gwen 3 years ago
parent f4d14cd68c
commit 59ce8f1f1b

@ -0,0 +1,23 @@
Coding Standards
====================
Import ordering
-------------------
1. builtins imports
2. pip installed imports
3. framework imports
4. local project imports
.. rubric:: Sample
.. block-code:: python
from typing import Dict
from pathlib import Path
from kedro.framework.context import KedroContext, load_package_context
from kedro.pipeline import Pipeline
from actes_princiers.pipeline_registry import register_pipelines

@ -5,7 +5,7 @@ Documentation technique du projet Actes princiers
:maxdepth: 1
data
coding_standards
Indices and tables
==================

@ -0,0 +1,50 @@
from typing import Dict
from kedro.framework.context import KedroContext
from kedro.pipeline import Pipeline
from actesdataset import XMLDataSet
def catalog_factory(catalog):
input_catalog = catalog.load("load_full_xml_catalog")
#output_catalog = catalog.load("preprocess_full_catalog_html")
for in_catalog_key, in_catalog_value in input_catalog.items():
# adding programmatically an input catalog entry
input_catalog_name = "load_full_xml_catalog" + in_catalog_key
# FIXME : à récuperer du catalogue Patitioned
input_filepath = "data/01_raw/xml/Anjou/" + in_catalog_key + ".xml"
catalog.add(data_set_name=input_catalog_name,
data_set=XMLDataSet(filepath=input_filepath),
replace=True)
# adding programmatically an output catalog entry
output_catalog_name = "preprocess_full_catalog_html" + in_catalog_key
# FIXME : à récuperer du catalogue Patitioned
output_filepath = "data/02_intermediate/xml/Anjou/" + in_catalog_key + ".html"
catalog.add(data_set_name=output_catalog_name,
data_set=XMLDataSet(filepath=output_filepath),
replace=True)
class ProjectContext(KedroContext):
project_name = "actes princiers"
project_version = "0.1"
package_name = "actes_princiers"
def _get_pipelines(self) -> Dict[str, Pipeline]:
# return create_pipelines()
return register_pipelines()
def _get_catalog(self, *args, **kwargs):
catalog = super()._get_catalog(*args, **kwargs)
catalog.add(data_set_name="mon_test_de_catalogue",
data_set=XMLDataSet(
filepath="data/02_intermediate/xml/Anjou/test.dat",
),
replace=True,
)
catalog_factory(catalog)
return catalog

@ -1,15 +1,17 @@
from kedro.pipeline import Pipeline, node, pipeline
from .nodes import parse_xsl
from actesdataset import XMLDataSet
#from actesdataset import XMLDataSet
from kedro.io import PartitionedDataSet
#from kedro.io import PartitionedDataSet
#from kedro.framework.session import KedroSession
#from kedro.context import KedroContext, load_context
from kedro.framework.session import KedroSession
with KedroSession.create() as session:
context = session.load_context()
catalog = context.catalog
print("----------------------------")
print(catalog.list())
#with KedroSession.create() as session:
# context = session.load_context()
# catalog = context.catalog
#catalog.add(data_set_name="mon_test_de_catalogue",
# data_set=XMLDataSet(
@ -17,7 +19,6 @@ from kedro.io import PartitionedDataSet
# ),
# replace=True,
# )
#print(catalog.list())
#def from_dict(dico):
@ -74,21 +75,17 @@ def nodes_factory():
# adding programmatically an input catalog entry
input_catalog_name = "load_full_xml_catalog" + in_catalog_key
# FIXME : à récuperer du catalogue Patitioned
input_filepath = "data/01_raw/xml/Anjou/" + in_catalog_key + ".html"
catalog.add(data_set_name=input_catalog_name,
data_set=XMLDataSet(filepath=input_filepath),
replace=True)
# if input_catalog_name in catalog.list():
# print("OK")
# else:
# print("NOK")
# input_filepath = "data/01_raw/xml/Anjou/" + in_catalog_key + ".html"
# catalog.add(data_set_name=input_catalog_name,
# data_set=XMLDataSet(filepath=input_filepath),
# replace=True)
# adding programmatically an output catalog entry
output_catalog_name = "preprocess_full_catalog_html" + in_catalog_key
# FIXME : à récuperer du catalogue Patitioned
output_filepath = "data/02_intermediate/xml/Anjou/" + in_catalog_key + ".html"
catalog.add(data_set_name=output_catalog_name,
data_set=XMLDataSet(filepath=output_filepath),
replace=True)
# output_filepath = "data/02_intermediate/xml/Anjou/" + in_catalog_key + ".html"
# catalog.add(data_set_name=output_catalog_name,
# data_set=XMLDataSet(filepath=output_filepath),
# replace=True)
# constructing the node programmatically
nodes.append(node(
func=parse_xsl,
@ -97,22 +94,20 @@ def nodes_factory():
name=in_catalog_key,
tags="xsl",
))
# XXX
# context.catalog = catalog
return nodes
#nodes = nodes_factory()
nodes = nodes_factory()
def create_pipeline(**kwargs):
return pipeline(
[
node(
func=parse_xsl,
inputs=["load_xml", "params:xlststylesheet"],
outputs="preprocess_html",
name="preprocess_html",
tags="xsl",
),
]
)
return pipeline(nodes)
# [
# node(
# func=parse_xsl,
# inputs=["load_xml", "params:xlststylesheet"],
# outputs="preprocess_html",
# name="preprocess_html",
# tags="xsl",
# ),
# ]
# )

@ -34,7 +34,8 @@ https://kedro.readthedocs.io/en/stable/kedro_project_setup/settings.html."""
# Class that manages Kedro's library components.
# from kedro.framework.context import KedroContext
# CONTEXT_CLASS = KedroContext
from .mycontext import ProjectContext
CONTEXT_CLASS = ProjectContext
# Class that manages the Data Catalog.
# from kedro.io import DataCatalog

@ -7,7 +7,8 @@ from kedro.framework.context import KedroContext, load_package_context
from kedro.pipeline import Pipeline
from actes_princiers.pipeline_registry import register_pipelines
#bnhm.pipeline import create_pipelines
from actes_princiers.actesdataset import XMLDataSet
class ProjectContext(KedroContext):
@ -21,13 +22,21 @@ class ProjectContext(KedroContext):
def _get_catalog(self, *args, **kwargs):
catalog = super()._get_catalog(*args, **kwargs)
catalog.add(data_set_name="mon_test_de_catalogue",
data_set=XMLDataSet(
filepath="data/02_intermediate/xml/Anjou/test.dat",
),
replace=True,
)
return catalog
def run_package():
# Entry point for running a Kedro project packaged with `kedro package`
# using `python -m <project_package>.run` command.
project_context = load_package_context(
project_path=Path.cwd(), package_name=Path(__file__).resolve().parent.name
project_path=Path.cwd(),
package_name=Path(__file__).resolve().parent.name
)
project_context.run()

Loading…
Cancel
Save