develop
gwen 3 years ago
parent 30c76bba9a
commit 2bf13c78be

@ -19,11 +19,13 @@ A best-practice setup includes initialising git and creating a virtual environme
- install kedro `pip install kedro` - install kedro `pip install kedro`
- Install the packages and libraries `pip install -r src/requirements.txt` - Install the packages and libraries `pip install -r src/requirements.txt`
**go to `actes-princiers`'s folder**
Then open a terminal in the `actes-princiers`'s folder Then open a terminal in the `actes-princiers`'s folder
and launch jupyter : `kedro jupyter notebook` and launch jupyter : `kedro jupyter notebook`
or start the ipython prompt : `kedro ipython` or start the ipython prompt : `kedro ipython`
## Launching the pipeline ## Launching the pipelines
Open a terminal in the `actes-princiers`'s folder and launch kedro Open a terminal in the `actes-princiers`'s folder and launch kedro
@ -41,6 +43,12 @@ or a search by tags with:
`kedro viz` `kedro viz`
## Building the docs
`./build-docs.sh docs`
the html built doc is `here <docs/build/html/>`_
## Developper's rules and guidelines ## Developper's rules and guidelines
Declare any dependencies in `src/requirements.txt` for `pip` installation. Declare any dependencies in `src/requirements.txt` for `pip` installation.

@ -0,0 +1,14 @@
#!/usr/bin/env bash
set -e
# Exit script if you try to use an uninitialized variable.
set -o nounset
action=$1
if [ "$action" == "linkcheck" ]; then
sphinx-build -WETan -j auto -D language=en -b linkcheck -d docs/build/doctrees docs/source docs/build/linkcheck
elif [ "$action" == "docs" ]; then
sphinx-build -WETa -j auto -D language=en -b html -d docs/build/doctrees docs/source docs/build/html
fi

@ -0,0 +1 @@
rm -rf data/02_intermediate/xml/*

Binary file not shown.

After

Width:  |  Height:  |  Size: 70 KiB

@ -11,7 +11,7 @@ Import ordering
.. rubric:: Sample .. rubric:: Sample
.. block-code:: python .. code-block:: python
from typing import Dict from typing import Dict
from pathlib import Path from pathlib import Path

@ -1,27 +1,9 @@
#!/usr/bin/env python3
# actes_princiers documentation build # actes_princiers documentation build
# configuration file, created by sphinx-quickstart. # configuration file, created by sphinx-quickstart.
# #
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out # All configuration values have a default; values that are commented out
# serve to show the default. # serve to show the default.
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import re
from kedro.framework.cli.utils import find_stylesheets
#from actes_princiers import __version__ as release
release = "0.1" release = "0.1"
# -- Project information ----------------------------------------------------- # -- Project information -----------------------------------------------------
@ -30,14 +12,10 @@ project = "actes_princiers"
author = "Jean-Damien" author = "Jean-Damien"
# The short X.Y version. # The short X.Y version.
version = re.match(r"^([0-9]+\.[0-9]+).*", release).group(1) version = "0.1"
# -- General configuration --------------------------------------------------- # -- General configuration ---------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be # Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones. # ones.
@ -55,7 +33,7 @@ extensions = [
# enable autosummary plugin (table of contents for modules/classes/class # enable autosummary plugin (table of contents for modules/classes/class
# methods) # methods)
autosummary_generate = True #autosummary_generate = True
# Add any paths that contain templates here, relative to this directory. # Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"] templates_path = ["_templates"]
@ -82,20 +60,34 @@ exclude_patterns = ["_build", "**.ipynb_checkpoints"]
# The name of the Pygments (syntax highlighting) style to use. # The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx" pygments_style = "sphinx"
default_role = 'code'
# -- Options for HTML output ------------------------------------------------- # -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for # The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes. # a list of builtin themes.
# #
# html_theme = "sphinx_rtd_theme" html_theme = "sphinx_rtd_theme"
html_theme = "bizstyle"
html_title = "Actes Princiers"
html_short_title = "Actes Princiers"
html_show_sourcelink = False
html_show_sphinx = False
html_show_copyright = True
html_logo = "_static/logo.jpg"
copyright = '2020, Jean-Damien Genero'
# Theme options are theme-specific and customize the look and feel of a theme # Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the # further. For a list of options available for each theme, see the
# documentation. # documentation.
# #
html_theme_options = {"collapse_navigation": False, "style_external_links": True} html_theme_options = {
"collapse_navigation": False,
"style_external_links": True,
'display_version': False,
'logo_only': True,
# 'style_nav_header_background': 'white'
}
# Add any paths that contain custom static files (such as style sheets) here, # Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files, # relative to this directory. They are copied after the builtin static files,
@ -199,24 +191,24 @@ todo_include_todos = False
nbsphinx_kernel_name = "python3" nbsphinx_kernel_name = "python3"
def remove_arrows_in_examples(lines): #def remove_arrows_in_examples(lines):
for i, line in enumerate(lines): # for i, line in enumerate(lines):
lines[i] = line.replace(">>>", "") # lines[i] = line.replace(">>>", "")
def autodoc_process_docstring(app, what, name, obj, options, lines): #def autodoc_process_docstring(app, what, name, obj, options, lines):
remove_arrows_in_examples(lines) # remove_arrows_in_examples(lines)
def skip(app, what, name, obj, skip, options): #def skip(app, what, name, obj, skip, options):
if name == "__init__": # if name == "__init__":
return False # return False
return skip # return skip
def setup(app): #def setup(app):
app.connect("autodoc-process-docstring", autodoc_process_docstring) # app.connect("autodoc-process-docstring", autodoc_process_docstring)
app.connect("autodoc-skip-member", skip) # app.connect("autodoc-skip-member", skip)
# add Kedro stylesheets # # add Kedro stylesheets
for stylesheet in find_stylesheets(): # for stylesheet in find_stylesheets():
app.add_css_file(stylesheet) # app.add_css_file(stylesheet)

@ -1,5 +1,5 @@
Documentation technique du projet Actes princiers Projet Actes Princiers
==================================================== =========================
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
@ -7,8 +7,6 @@ Documentation technique du projet Actes princiers
data data
coding_standards coding_standards
Indices and tables .. rubric:: Index
==================
* :ref:`genindex` * :ref:`genindex`
* :ref:`search`

@ -7,9 +7,8 @@ from kedro.pipeline import Pipeline, node, pipeline
from actesdataset import XMLDataSet from actesdataset import XMLDataSet
#from actes_princiers.pipelines.xml_processing.nodes import parse_xsl
def tree(directory, relative_to=None): def tree(directory, relative_to=None):
"helper that returns a directory tree structure"
trees = dict() trees = dict()
for path in sorted(directory.rglob("*.xml")): for path in sorted(directory.rglob("*.xml")):
trees[path.stem] = str(path.relative_to(relative_to)) trees[path.stem] = str(path.relative_to(relative_to))
@ -46,97 +45,27 @@ def house_dataset_loader(catalog):
nodes_description.append(node_description) nodes_description.append(node_description)
return nodes_description return nodes_description
# TODO : next step, pipeline step
#def create_pipeline(**kwargs) -> Dict[str, Pipeline]:
# """
# :return: a mapping "pipeline_name", Pipeline() object
# """
# nodes_description = kwargs['nodes_description']
# dataset_pipeline = pipeline(nodes_factory(nodes_description))
# return {
# "__default__": Pipeline(
# dataset_pipeline
# )
# }
class ProjectContext(KedroContext): class ProjectContext(KedroContext):
project_name = "actes princiers" project_name = "actes princiers"
project_version = "0.1" project_version = "0.1"
package_name = "actes_princiers" package_name = "actes_princiers"
# def get_houses_config(self): def get_houses_config(self):
# """loading from generic configuration file """loading from generic configuration file
# (that is, the global houses `houses.yaml`)""" (that is, the global houses `houses.yaml`)"""
# houses_file = self.config_loader.get("houses*") houses_file = self.config_loader.get("houses*")
# return houses_file['houses'] # FIXME : put this in attribute in the context
return houses_file['houses']
# def houses_data_catalog_loader(self):
# "generic houses PartitionedDataSet"
# houses = self.get_houses_config()
# for house in houses:
# house_name = house['name']
# # FIXME : absolutely not necessary.
# # just retrieve the data's tree directory
## self.custom_catalog.add(house_name, PartitionedDataSet(
## # FIXME put this path in the project's configuration
## dataset=XMLDataSet,
## filename_suffix='.xml'))
# path='data/01_raw/xml/' + house_name,
# def houses_dataset_factory(self):
# "loads all the datasets corresponding to the programmatically loaded catalogs"
# houses = self.get_houses_config()
# for house in houses:
# house_name = house['name']
# self._house_dataset_loader(house_name)
# def house_dataset_loader(self):
# # FIXME : just dataset catalog, not PartitionedCatalog
# # **or** a custom DataSet Catalog that lists
# # copied on the partitionedDataSet
# #input_catalog = self.custom_catalog.load(house_name)
# #for dataset_name, in_catalog_load_func in input_catalog.items():
# # FIXME : retrieve root path from config
# # or make an autopath function helper
# data_root_path = Path.cwd() / 'data' / '01_raw' / 'xml'
# # FIXME : remove the str() function here
# for dataset_name, dataset_path in tree(str(data_root_path)).items():
## for dataset_name, in_catalog_load_func in input_catalog.items():
## in_catalog_value = in_catalog_load_func()
# # adding programmatically an input catalog entry
## dataset_name = house_name + "_" + dataset_name
# # FIXME : how to set this filename ?
## dataset_path = "data/01_raw/xml/" + house_name + "/" + dataset_name + ".xml"
#
# self.custom_catalog.add(data_set_name=dataset_name,
# data_set=XMLDataSet(filepath=dataset_path),
# replace=True)
# # adding an output catalog entry
# output_dataset_name = + "_output"
# # FIXME pas propre : faire ça avec la pathlib...
# # pas la peine de mettre une extension ".html" ici
# output_dataset_path = dataset_path.replace("01_raw", "02_intermediate")
# self.custom_catalog.add(data_set_name=output_dataset_name,
# data_set=XMLDataSet(filepath=output_dataset_path),
# replace=True)
# # usefull information for the next stage (the pipeline stage)
# self.nodes_description = dict(
# inputs=dataset_name,
# outputs=output_dataset_name,
# name=dataset_name)
def _get_catalog(self, *args, **kwargs): def _get_catalog(self, *args, **kwargs):
"catalog loader entry point" "catalog loader entry point"
# loading yaml defined catalogs # loading yaml defined catalogs
catalog = super()._get_catalog(*args, **kwargs) catalog = super()._get_catalog(*args, **kwargs)
# kedro.io.data_catalog.DataCatalog # kedro.io.data_catalog.DataCatalog
# si je veux vraiment mettre ça dans le catalog global
# adding houses generic catalog
# self.houses_data_catalog_loader()
# adding the datasets that corresponds to the generic catalogs
# self.houses_dataset_factory()
# adding data sets # adding data sets
self.nodes_description = house_dataset_loader(catalog) self.nodes_description = house_dataset_loader(catalog)
return catalog return catalog
def prepare_pipeline_creation(self):
return self.nodes_description

@ -3,15 +3,15 @@ from kedro.framework.session import KedroSession
from .nodes import parse_xsl from .nodes import parse_xsl
# we need the context here in order to access to prepare_pipeline_creation()
with KedroSession.create() as session: with KedroSession.create() as session:
context = session.load_context() context = session.load_context()
# we have to call the catalog attribute,
# because it makes a call to the _get_catalog() of the context method
catalog = context.catalog catalog = context.catalog
# print("----------------------------")
# print(context.nodes_description)
# print(catalog.list())
def nodes_factory(nodes_description): def nodes_factory(nodes_description):
"nodes creation"
nodes = [] nodes = []
for node_description in nodes_description: for node_description in nodes_description:
nodes.append(node( nodes.append(node(
@ -23,11 +23,6 @@ def nodes_factory(nodes_description):
)) ))
return nodes return nodes
def pipeline_factory(nodes_description):
return pipeline(nodes_factory(nodes_description))
def create_pipeline(**kwargs): def create_pipeline(**kwargs):
return pipeline(pipeline_factory(context.nodes_description)) "pipeline entry point needed by the global pipeline registry"
return pipeline(nodes_factory(context.prepare_pipeline_creation()))

@ -2,6 +2,7 @@ pandas>=2.0.2
nbsphinx>=0.9.2 nbsphinx>=0.9.2
lxml>=4.6.3 lxml>=4.6.3
python-slugify>=8.0.1 python-slugify>=8.0.1
sphinx-rtd-theme>=1.2.2
black~=22.0 black~=22.0
flake8>=3.7.9, <5.0 flake8>=3.7.9, <5.0
ipython>=7.31.1, <8.0; python_version < '3.8' ipython>=7.31.1, <8.0; python_version < '3.8'

Loading…
Cancel
Save