diff --git a/README.md b/README.md index 5fb7c35..4268ca9 100644 --- a/README.md +++ b/README.md @@ -19,11 +19,13 @@ A best-practice setup includes initialising git and creating a virtual environme - install kedro `pip install kedro` - Install the packages and libraries `pip install -r src/requirements.txt` +**go to `actes-princiers`'s folder** + Then open a terminal in the `actes-princiers`'s folder and launch jupyter : `kedro jupyter notebook` or start the ipython prompt : `kedro ipython` -## Launching the pipeline +## Launching the pipelines Open a terminal in the `actes-princiers`'s folder and launch kedro @@ -41,6 +43,12 @@ or a search by tags with: `kedro viz` +## Building the docs + +`./build-docs.sh docs` + +the html built doc is `here `_ + ## Developper's rules and guidelines Declare any dependencies in `src/requirements.txt` for `pip` installation. diff --git a/actes-princiers/build-docs.sh b/actes-princiers/build-docs.sh new file mode 100755 index 0000000..d55076e --- /dev/null +++ b/actes-princiers/build-docs.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +set -e + +# Exit script if you try to use an uninitialized variable. +set -o nounset + +action=$1 + +if [ "$action" == "linkcheck" ]; then + sphinx-build -WETan -j auto -D language=en -b linkcheck -d docs/build/doctrees docs/source docs/build/linkcheck +elif [ "$action" == "docs" ]; then + sphinx-build -WETa -j auto -D language=en -b html -d docs/build/doctrees docs/source docs/build/html +fi diff --git a/actes-princiers/clean_intermediate_data.sh b/actes-princiers/clean_intermediate_data.sh new file mode 100755 index 0000000..9caa69c --- /dev/null +++ b/actes-princiers/clean_intermediate_data.sh @@ -0,0 +1 @@ +rm -rf data/02_intermediate/xml/* diff --git a/actes-princiers/docs/source/_static/logo.jpg b/actes-princiers/docs/source/_static/logo.jpg new file mode 100644 index 0000000..20942fd Binary files /dev/null and b/actes-princiers/docs/source/_static/logo.jpg differ diff --git a/actes-princiers/docs/source/coding_standards.rst b/actes-princiers/docs/source/coding_standards.rst index e940bc4..0c98755 100644 --- a/actes-princiers/docs/source/coding_standards.rst +++ b/actes-princiers/docs/source/coding_standards.rst @@ -11,7 +11,7 @@ Import ordering .. rubric:: Sample -.. block-code:: python +.. code-block:: python from typing import Dict from pathlib import Path diff --git a/actes-princiers/docs/source/conf.py b/actes-princiers/docs/source/conf.py index 683f842..bee65d2 100644 --- a/actes-princiers/docs/source/conf.py +++ b/actes-princiers/docs/source/conf.py @@ -1,27 +1,9 @@ -#!/usr/bin/env python3 - - # actes_princiers documentation build # configuration file, created by sphinx-quickstart. # -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# # All configuration values have a default; values that are commented out # serve to show the default. -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import re - -from kedro.framework.cli.utils import find_stylesheets - -#from actes_princiers import __version__ as release release = "0.1" # -- Project information ----------------------------------------------------- @@ -30,14 +12,10 @@ project = "actes_princiers" author = "Jean-Damien" # The short X.Y version. -version = re.match(r"^([0-9]+\.[0-9]+).*", release).group(1) +version = "0.1" # -- General configuration --------------------------------------------------- -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' - # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. @@ -55,7 +33,7 @@ extensions = [ # enable autosummary plugin (table of contents for modules/classes/class # methods) -autosummary_generate = True +#autosummary_generate = True # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] @@ -82,20 +60,34 @@ exclude_patterns = ["_build", "**.ipynb_checkpoints"] # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" +default_role = 'code' # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -# html_theme = "sphinx_rtd_theme" -html_theme = "bizstyle" +html_theme = "sphinx_rtd_theme" + +html_title = "Actes Princiers" +html_short_title = "Actes Princiers" +html_show_sourcelink = False +html_show_sphinx = False +html_show_copyright = True +html_logo = "_static/logo.jpg" +copyright = '2020, Jean-Damien Genero' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # -html_theme_options = {"collapse_navigation": False, "style_external_links": True} +html_theme_options = { + "collapse_navigation": False, + "style_external_links": True, + 'display_version': False, + 'logo_only': True, +# 'style_nav_header_background': 'white' + } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -199,24 +191,24 @@ todo_include_todos = False nbsphinx_kernel_name = "python3" -def remove_arrows_in_examples(lines): - for i, line in enumerate(lines): - lines[i] = line.replace(">>>", "") +#def remove_arrows_in_examples(lines): +# for i, line in enumerate(lines): +# lines[i] = line.replace(">>>", "") -def autodoc_process_docstring(app, what, name, obj, options, lines): - remove_arrows_in_examples(lines) +#def autodoc_process_docstring(app, what, name, obj, options, lines): +# remove_arrows_in_examples(lines) -def skip(app, what, name, obj, skip, options): - if name == "__init__": - return False - return skip +#def skip(app, what, name, obj, skip, options): +# if name == "__init__": +# return False +# return skip -def setup(app): - app.connect("autodoc-process-docstring", autodoc_process_docstring) - app.connect("autodoc-skip-member", skip) - # add Kedro stylesheets - for stylesheet in find_stylesheets(): - app.add_css_file(stylesheet) +#def setup(app): +# app.connect("autodoc-process-docstring", autodoc_process_docstring) +# app.connect("autodoc-skip-member", skip) +# # add Kedro stylesheets +# for stylesheet in find_stylesheets(): +# app.add_css_file(stylesheet) diff --git a/actes-princiers/docs/source/index.rst b/actes-princiers/docs/source/index.rst index e0632c7..b7955b3 100644 --- a/actes-princiers/docs/source/index.rst +++ b/actes-princiers/docs/source/index.rst @@ -1,5 +1,5 @@ -Documentation technique du projet Actes princiers -==================================================== +Projet Actes Princiers +========================= .. toctree:: :maxdepth: 1 @@ -7,8 +7,6 @@ Documentation technique du projet Actes princiers data coding_standards -Indices and tables -================== +.. rubric:: Index * :ref:`genindex` -* :ref:`search` diff --git a/actes-princiers/src/actes_princiers/customcontext.py b/actes-princiers/src/actes_princiers/customcontext.py index ca60bb0..b2c2fd4 100644 --- a/actes-princiers/src/actes_princiers/customcontext.py +++ b/actes-princiers/src/actes_princiers/customcontext.py @@ -7,9 +7,8 @@ from kedro.pipeline import Pipeline, node, pipeline from actesdataset import XMLDataSet -#from actes_princiers.pipelines.xml_processing.nodes import parse_xsl - def tree(directory, relative_to=None): + "helper that returns a directory tree structure" trees = dict() for path in sorted(directory.rglob("*.xml")): trees[path.stem] = str(path.relative_to(relative_to)) @@ -46,97 +45,27 @@ def house_dataset_loader(catalog): nodes_description.append(node_description) return nodes_description -# TODO : next step, pipeline step -#def create_pipeline(**kwargs) -> Dict[str, Pipeline]: -# """ -# :return: a mapping "pipeline_name", Pipeline() object -# """ -# nodes_description = kwargs['nodes_description'] -# dataset_pipeline = pipeline(nodes_factory(nodes_description)) -# return { -# "__default__": Pipeline( -# dataset_pipeline -# ) -# } - class ProjectContext(KedroContext): project_name = "actes princiers" project_version = "0.1" package_name = "actes_princiers" -# def get_houses_config(self): -# """loading from generic configuration file -# (that is, the global houses `houses.yaml`)""" -# houses_file = self.config_loader.get("houses*") -# return houses_file['houses'] - -# def houses_data_catalog_loader(self): -# "generic houses PartitionedDataSet" -# houses = self.get_houses_config() -# for house in houses: -# house_name = house['name'] -# # FIXME : absolutely not necessary. -# # just retrieve the data's tree directory -## self.custom_catalog.add(house_name, PartitionedDataSet( -## # FIXME put this path in the project's configuration -## dataset=XMLDataSet, -## filename_suffix='.xml')) -# path='data/01_raw/xml/' + house_name, - -# def houses_dataset_factory(self): -# "loads all the datasets corresponding to the programmatically loaded catalogs" -# houses = self.get_houses_config() -# for house in houses: -# house_name = house['name'] -# self._house_dataset_loader(house_name) - -# def house_dataset_loader(self): -# # FIXME : just dataset catalog, not PartitionedCatalog -# # **or** a custom DataSet Catalog that lists -# # copied on the partitionedDataSet -# #input_catalog = self.custom_catalog.load(house_name) -# #for dataset_name, in_catalog_load_func in input_catalog.items(): - -# # FIXME : retrieve root path from config -# # or make an autopath function helper -# data_root_path = Path.cwd() / 'data' / '01_raw' / 'xml' -# # FIXME : remove the str() function here -# for dataset_name, dataset_path in tree(str(data_root_path)).items(): -## for dataset_name, in_catalog_load_func in input_catalog.items(): -## in_catalog_value = in_catalog_load_func() -# # adding programmatically an input catalog entry -## dataset_name = house_name + "_" + dataset_name -# # FIXME : how to set this filename ? -## dataset_path = "data/01_raw/xml/" + house_name + "/" + dataset_name + ".xml" -# -# self.custom_catalog.add(data_set_name=dataset_name, -# data_set=XMLDataSet(filepath=dataset_path), -# replace=True) -# # adding an output catalog entry -# output_dataset_name = + "_output" -# # FIXME pas propre : faire ça avec la pathlib... -# # pas la peine de mettre une extension ".html" ici -# output_dataset_path = dataset_path.replace("01_raw", "02_intermediate") -# self.custom_catalog.add(data_set_name=output_dataset_name, -# data_set=XMLDataSet(filepath=output_dataset_path), -# replace=True) -# # usefull information for the next stage (the pipeline stage) -# self.nodes_description = dict( -# inputs=dataset_name, -# outputs=output_dataset_name, -# name=dataset_name) + def get_houses_config(self): + """loading from generic configuration file + (that is, the global houses `houses.yaml`)""" + houses_file = self.config_loader.get("houses*") + # FIXME : put this in attribute in the context + return houses_file['houses'] def _get_catalog(self, *args, **kwargs): "catalog loader entry point" # loading yaml defined catalogs catalog = super()._get_catalog(*args, **kwargs) # kedro.io.data_catalog.DataCatalog - # si je veux vraiment mettre ça dans le catalog global - # adding houses generic catalog -# self.houses_data_catalog_loader() - # adding the datasets that corresponds to the generic catalogs -# self.houses_dataset_factory() # adding data sets self.nodes_description = house_dataset_loader(catalog) return catalog + def prepare_pipeline_creation(self): + return self.nodes_description + diff --git a/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py b/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py index 0c95d30..b48a16b 100755 --- a/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py +++ b/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py @@ -3,15 +3,15 @@ from kedro.framework.session import KedroSession from .nodes import parse_xsl +# we need the context here in order to access to prepare_pipeline_creation() with KedroSession.create() as session: context = session.load_context() + # we have to call the catalog attribute, + # because it makes a call to the _get_catalog() of the context method catalog = context.catalog -# print("----------------------------") -# print(context.nodes_description) -# print(catalog.list()) - def nodes_factory(nodes_description): + "nodes creation" nodes = [] for node_description in nodes_description: nodes.append(node( @@ -23,11 +23,6 @@ def nodes_factory(nodes_description): )) return nodes -def pipeline_factory(nodes_description): - return pipeline(nodes_factory(nodes_description)) - - def create_pipeline(**kwargs): - return pipeline(pipeline_factory(context.nodes_description)) - - + "pipeline entry point needed by the global pipeline registry" + return pipeline(nodes_factory(context.prepare_pipeline_creation())) diff --git a/actes-princiers/src/requirements.txt b/actes-princiers/src/requirements.txt index 127ecc8..921783a 100644 --- a/actes-princiers/src/requirements.txt +++ b/actes-princiers/src/requirements.txt @@ -2,6 +2,7 @@ pandas>=2.0.2 nbsphinx>=0.9.2 lxml>=4.6.3 python-slugify>=8.0.1 +sphinx-rtd-theme>=1.2.2 black~=22.0 flake8>=3.7.9, <5.0 ipython>=7.31.1, <8.0; python_version < '3.8'