|
|
|
|
@ -1,15 +1,17 @@
|
|
|
|
|
from kedro.pipeline import Pipeline, node, pipeline
|
|
|
|
|
from .nodes import parse_xsl
|
|
|
|
|
from actesdataset import XMLDataSet
|
|
|
|
|
#from actesdataset import XMLDataSet
|
|
|
|
|
|
|
|
|
|
from kedro.io import PartitionedDataSet
|
|
|
|
|
#from kedro.io import PartitionedDataSet
|
|
|
|
|
|
|
|
|
|
#from kedro.framework.session import KedroSession
|
|
|
|
|
#from kedro.context import KedroContext, load_context
|
|
|
|
|
from kedro.framework.session import KedroSession
|
|
|
|
|
|
|
|
|
|
with KedroSession.create() as session:
|
|
|
|
|
context = session.load_context()
|
|
|
|
|
catalog = context.catalog
|
|
|
|
|
print("----------------------------")
|
|
|
|
|
print(catalog.list())
|
|
|
|
|
|
|
|
|
|
#with KedroSession.create() as session:
|
|
|
|
|
# context = session.load_context()
|
|
|
|
|
# catalog = context.catalog
|
|
|
|
|
|
|
|
|
|
#catalog.add(data_set_name="mon_test_de_catalogue",
|
|
|
|
|
# data_set=XMLDataSet(
|
|
|
|
|
@ -17,7 +19,6 @@ from kedro.io import PartitionedDataSet
|
|
|
|
|
# ),
|
|
|
|
|
# replace=True,
|
|
|
|
|
# )
|
|
|
|
|
#print(catalog.list())
|
|
|
|
|
|
|
|
|
|
#def from_dict(dico):
|
|
|
|
|
|
|
|
|
|
@ -74,21 +75,17 @@ def nodes_factory():
|
|
|
|
|
# adding programmatically an input catalog entry
|
|
|
|
|
input_catalog_name = "load_full_xml_catalog" + in_catalog_key
|
|
|
|
|
# FIXME : à récuperer du catalogue Patitioned
|
|
|
|
|
input_filepath = "data/01_raw/xml/Anjou/" + in_catalog_key + ".html"
|
|
|
|
|
catalog.add(data_set_name=input_catalog_name,
|
|
|
|
|
data_set=XMLDataSet(filepath=input_filepath),
|
|
|
|
|
replace=True)
|
|
|
|
|
# if input_catalog_name in catalog.list():
|
|
|
|
|
# print("OK")
|
|
|
|
|
# else:
|
|
|
|
|
# print("NOK")
|
|
|
|
|
# input_filepath = "data/01_raw/xml/Anjou/" + in_catalog_key + ".html"
|
|
|
|
|
# catalog.add(data_set_name=input_catalog_name,
|
|
|
|
|
# data_set=XMLDataSet(filepath=input_filepath),
|
|
|
|
|
# replace=True)
|
|
|
|
|
# adding programmatically an output catalog entry
|
|
|
|
|
output_catalog_name = "preprocess_full_catalog_html" + in_catalog_key
|
|
|
|
|
# FIXME : à récuperer du catalogue Patitioned
|
|
|
|
|
output_filepath = "data/02_intermediate/xml/Anjou/" + in_catalog_key + ".html"
|
|
|
|
|
catalog.add(data_set_name=output_catalog_name,
|
|
|
|
|
data_set=XMLDataSet(filepath=output_filepath),
|
|
|
|
|
replace=True)
|
|
|
|
|
# output_filepath = "data/02_intermediate/xml/Anjou/" + in_catalog_key + ".html"
|
|
|
|
|
# catalog.add(data_set_name=output_catalog_name,
|
|
|
|
|
# data_set=XMLDataSet(filepath=output_filepath),
|
|
|
|
|
# replace=True)
|
|
|
|
|
# constructing the node programmatically
|
|
|
|
|
nodes.append(node(
|
|
|
|
|
func=parse_xsl,
|
|
|
|
|
@ -97,22 +94,20 @@ def nodes_factory():
|
|
|
|
|
name=in_catalog_key,
|
|
|
|
|
tags="xsl",
|
|
|
|
|
))
|
|
|
|
|
# XXX
|
|
|
|
|
# context.catalog = catalog
|
|
|
|
|
return nodes
|
|
|
|
|
|
|
|
|
|
#nodes = nodes_factory()
|
|
|
|
|
nodes = nodes_factory()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_pipeline(**kwargs):
|
|
|
|
|
return pipeline(
|
|
|
|
|
[
|
|
|
|
|
node(
|
|
|
|
|
func=parse_xsl,
|
|
|
|
|
inputs=["load_xml", "params:xlststylesheet"],
|
|
|
|
|
outputs="preprocess_html",
|
|
|
|
|
name="preprocess_html",
|
|
|
|
|
tags="xsl",
|
|
|
|
|
),
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
return pipeline(nodes)
|
|
|
|
|
# [
|
|
|
|
|
# node(
|
|
|
|
|
# func=parse_xsl,
|
|
|
|
|
# inputs=["load_xml", "params:xlststylesheet"],
|
|
|
|
|
# outputs="preprocess_html",
|
|
|
|
|
# name="preprocess_html",
|
|
|
|
|
# tags="xsl",
|
|
|
|
|
# ),
|
|
|
|
|
# ]
|
|
|
|
|
# )
|
|
|
|
|
|