docs

3 years ago · 2bf13c78be
parent 30c76bba9a
commit 2bf13c78be
10 changed files with 79 additions and 141 deletions
--- a/README.md
+++ b/README.md
@ -19,11 +19,13 @@ A best-practice setup includes initialising git and creating a virtual environme
 - install kedro `pip install kedro`
 - Install the packages and libraries `pip install -r src/requirements.txt`
 **go to `actes-princiers`'s folder**
 Then open a terminal in the `actes-princiers`'s folder 
 and launch jupyter : `kedro jupyter notebook` 
 or start the ipython prompt : `kedro ipython`
-## Launching the pipeline
+## Launching the pipelines
 Open a terminal in the `actes-princiers`'s folder and launch kedro
@ -41,6 +43,12 @@ or a search by tags with:
 `kedro viz`
 ## Building the docs
 `./build-docs.sh docs` 
 the html built doc is `here <docs/build/html/>`_
 ## Developper's rules and guidelines
 Declare any dependencies in `src/requirements.txt` for `pip` installation.
--- a/actes-princiers/build-docs.sh
+++ b/actes-princiers/build-docs.sh
@ -0,0 +1,14 @@
 #!/usr/bin/env bash
 set -e
 # Exit script if you try to use an uninitialized variable.
 set -o nounset
 action=$1
 if [ "$action" == "linkcheck" ]; then
  sphinx-build -WETan -j auto -D language=en -b linkcheck -d docs/build/doctrees docs/source docs/build/linkcheck
 elif [ "$action" == "docs" ]; then
  sphinx-build -WETa -j auto -D language=en -b html -d docs/build/doctrees docs/source docs/build/html
 fi
--- a/actes-princiers/clean_intermediate_data.sh
+++ b/actes-princiers/clean_intermediate_data.sh
@ -0,0 +1 @@
 rm -rf data/02_intermediate/xml/*
--- a/actes-princiers/docs/source/_static/logo.jpg
+++ b/actes-princiers/docs/source/_static/logo.jpg
--- a/actes-princiers/docs/source/coding_standards.rst
+++ b/actes-princiers/docs/source/coding_standards.rst
@ -11,7 +11,7 @@ Import ordering
 .. rubric:: Sample
-.. block-code:: python
+.. code-block:: python
    from typing import Dict
    from pathlib import Path
--- a/actes-princiers/docs/source/conf.py
+++ b/actes-princiers/docs/source/conf.py
@ -1,27 +1,9 @@
 #!/usr/bin/env python3
 # actes_princiers documentation build
 # configuration file, created by sphinx-quickstart.
 #
 # This file is execfile()d with the current directory set to its
 # containing dir.
 #
 # Note that not all possible configuration values are present in this
 # autogenerated file.
 #
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
 import re
 from kedro.framework.cli.utils import find_stylesheets
 #from actes_princiers import __version__ as release
 release = "0.1"
 # -- Project information -----------------------------------------------------
@ -30,14 +12,10 @@ project = "actes_princiers"
 author = "Jean-Damien"
 # The short X.Y version.
-version = re.match(r"^([0-9]+\.[0-9]+).*", release).group(1)
+version = "0.1"
 # -- General configuration ---------------------------------------------------
 # If your documentation needs a minimal Sphinx version, state it here.
 #
 # needs_sphinx = '1.0'
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
@ -55,7 +33,7 @@ extensions = [
 # enable autosummary plugin (table of contents for modules/classes/class
 # methods)
-autosummary_generate = True
+#autosummary_generate = True
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]
@ -82,20 +60,34 @@ exclude_patterns = ["_build", "**.ipynb_checkpoints"]
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = "sphinx"
 default_role = 'code'
 # -- Options for HTML output -------------------------------------------------
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-# html_theme = "sphinx_rtd_theme"
+html_theme = "sphinx_rtd_theme"
-html_theme = "bizstyle"
+
 html_title = "Actes Princiers"
 html_short_title = "Actes Princiers"
 html_show_sourcelink = False
 html_show_sphinx = False
 html_show_copyright = True
 html_logo = "_static/logo.jpg"
 copyright = '2020, Jean-Damien Genero'
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
 #
-html_theme_options = {"collapse_navigation": False, "style_external_links": True}
+html_theme_options = {
    "collapse_navigation": False, 
    "style_external_links": True,
    'display_version': False,
    'logo_only': True,
 #    'style_nav_header_background': 'white'
    }
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
@ -199,24 +191,24 @@ todo_include_todos = False
 nbsphinx_kernel_name = "python3"
-def remove_arrows_in_examples(lines):
+#def remove_arrows_in_examples(lines):
-    for i, line in enumerate(lines):
+#    for i, line in enumerate(lines):
-        lines[i] = line.replace(">>>", "")
+#        lines[i] = line.replace(">>>", "")
-def autodoc_process_docstring(app, what, name, obj, options, lines):
+#def autodoc_process_docstring(app, what, name, obj, options, lines):
-    remove_arrows_in_examples(lines)
+#    remove_arrows_in_examples(lines)
-def skip(app, what, name, obj, skip, options):
+#def skip(app, what, name, obj, skip, options):
-    if name == "__init__":
+#    if name == "__init__":
-        return False
+#        return False
-    return skip
+#    return skip
-def setup(app):
+#def setup(app):
-    app.connect("autodoc-process-docstring", autodoc_process_docstring)
+#    app.connect("autodoc-process-docstring", autodoc_process_docstring)
-    app.connect("autodoc-skip-member", skip)
+#    app.connect("autodoc-skip-member", skip)
-    # add Kedro stylesheets
+#    # add Kedro stylesheets
-    for stylesheet in find_stylesheets():
+#    for stylesheet in find_stylesheets():
-        app.add_css_file(stylesheet)
+#        app.add_css_file(stylesheet)
--- a/actes-princiers/docs/source/index.rst
+++ b/actes-princiers/docs/source/index.rst
@ -1,5 +1,5 @@
-Documentation technique du projet Actes princiers
+Projet Actes Princiers
-====================================================
+=========================
 .. toctree::
   :maxdepth: 1
@ -7,8 +7,6 @@ Documentation technique du projet Actes princiers
   data
   coding_standards
-Indices and tables
+.. rubric:: Index
 ==================
 * :ref:`genindex`
 * :ref:`search`
--- a/actes-princiers/src/actes_princiers/customcontext.py
+++ b/actes-princiers/src/actes_princiers/customcontext.py
@ -7,9 +7,8 @@ from kedro.pipeline import Pipeline, node, pipeline
 from actesdataset import XMLDataSet
 #from actes_princiers.pipelines.xml_processing.nodes import parse_xsl
 def tree(directory, relative_to=None):
    "helper that returns a directory tree structure" 
    trees = dict()
    for path in sorted(directory.rglob("*.xml")):
        trees[path.stem] = str(path.relative_to(relative_to))
@ -46,97 +45,27 @@ def house_dataset_loader(catalog):
        nodes_description.append(node_description)
    return nodes_description
 # TODO : next step, pipeline step
 #def create_pipeline(**kwargs) -> Dict[str, Pipeline]:
 #    """
 #    :return: a mapping "pipeline_name", Pipeline() object
 #    """
 #    nodes_description = kwargs['nodes_description']
 #    dataset_pipeline = pipeline(nodes_factory(nodes_description))
 #    return  {
 #    "__default__": Pipeline(
 #        dataset_pipeline
 #        )
 #    }
 class ProjectContext(KedroContext):
    project_name = "actes princiers"
    project_version = "0.1"
    package_name = "actes_princiers"
-#    def get_houses_config(self):
+    def get_houses_config(self):
-#        """loading from generic configuration file 
+        """loading from generic configuration file 
-#        (that is, the global houses `houses.yaml`)"""
+        (that is, the global houses `houses.yaml`)"""
-#        houses_file = self.config_loader.get("houses*")
+        houses_file = self.config_loader.get("houses*")
-#        return houses_file['houses']
+        # FIXME : put this in attribute in the context
-
+        return houses_file['houses']
 #    def houses_data_catalog_loader(self):
 #        "generic houses PartitionedDataSet"
 #        houses = self.get_houses_config()
 #        for house in houses:
 #            house_name = house['name']
 #            # FIXME : absolutely not necessary. 
 #            # just retrieve the data's tree directory
 ##            self.custom_catalog.add(house_name, PartitionedDataSet(
 ##                # FIXME put this path in the project's configuration
 ##                dataset=XMLDataSet,
 ##                filename_suffix='.xml'))
 #            path='data/01_raw/xml/' + house_name, 
 #    def houses_dataset_factory(self):
 #        "loads all the datasets corresponding to the programmatically loaded catalogs"
 #        houses = self.get_houses_config()
 #        for house in houses:
 #            house_name = house['name']
 #            self._house_dataset_loader(house_name)
 #    def house_dataset_loader(self):
 #        # FIXME : just dataset catalog, not PartitionedCatalog
 #        # **or** a custom DataSet Catalog that lists
 #        # copied on the partitionedDataSet 
 #        #input_catalog = self.custom_catalog.load(house_name)
 #        #for dataset_name, in_catalog_load_func in input_catalog.items():
 #        # FIXME : retrieve root path from config
 #        # or make an autopath function helper
 #        data_root_path = Path.cwd() / 'data' / '01_raw' / 'xml'
 #        # FIXME : remove the str() function here
 #        for dataset_name, dataset_path in tree(str(data_root_path)).items():         
 ##        for dataset_name, in_catalog_load_func in input_catalog.items():
 ##            in_catalog_value = in_catalog_load_func()
 #            # adding programmatically an input catalog entry
 ##            dataset_name = house_name + "_" + dataset_name
 #            # FIXME : how to set this filename ? 
 ##            dataset_path = "data/01_raw/xml/" +  house_name + "/" + dataset_name + ".xml"
 #            
 #            self.custom_catalog.add(data_set_name=dataset_name, 
 #                data_set=XMLDataSet(filepath=dataset_path),
 #                replace=True)
 #            # adding an output catalog entry
 #            output_dataset_name =  + "_output"
 #            # FIXME pas propre : faire ça avec la pathlib...
 #            # pas la peine de mettre une extension ".html" ici
 #            output_dataset_path = dataset_path.replace("01_raw", "02_intermediate") 
 #            self.custom_catalog.add(data_set_name=output_dataset_name, 
 #                data_set=XMLDataSet(filepath=output_dataset_path),
 #                replace=True)
 #        # usefull information for the next stage (the pipeline stage)
 #        self.nodes_description = dict(
 #                inputs=dataset_name,
 #                outputs=output_dataset_name,
 #                name=dataset_name)
    def _get_catalog(self, *args, **kwargs):
        "catalog loader entry point"
        # loading yaml defined catalogs 
        catalog = super()._get_catalog(*args, **kwargs)
        # kedro.io.data_catalog.DataCatalog
        # si je veux vraiment mettre ça dans le catalog global
        # adding houses generic catalog
 #        self.houses_data_catalog_loader()
        # adding the datasets that corresponds to the generic catalogs
 #        self.houses_dataset_factory()
        # adding data sets
        self.nodes_description = house_dataset_loader(catalog)
        return catalog 
    def prepare_pipeline_creation(self):
        return self.nodes_description
--- a/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py
+++ b/actes-princiers/src/actes_princiers/pipelines/xml_processing/pipeline.py
@ -3,15 +3,15 @@ from kedro.framework.session import KedroSession
 from .nodes import parse_xsl
 # we need the context here in order to access to prepare_pipeline_creation()
 with KedroSession.create() as session:
    context = session.load_context()
    # we have to call the catalog attribute, 
    # because it makes a call to the _get_catalog() of the context method
    catalog = context.catalog
 #    print("----------------------------")
 #    print(context.nodes_description)
 #    print(catalog.list())
 def nodes_factory(nodes_description):
    "nodes creation"
    nodes = []
    for node_description in nodes_description:
        nodes.append(node(
@ -23,11 +23,6 @@ def nodes_factory(nodes_description):
            ))
    return nodes
 def pipeline_factory(nodes_description):
    return pipeline(nodes_factory(nodes_description))
 def create_pipeline(**kwargs):
-    return pipeline(pipeline_factory(context.nodes_description))
+    "pipeline entry point needed by the global pipeline registry"
-
+    return pipeline(nodes_factory(context.prepare_pipeline_creation()))
--- a/actes-princiers/src/requirements.txt
+++ b/actes-princiers/src/requirements.txt
@ -2,6 +2,7 @@ pandas>=2.0.2
 nbsphinx>=0.9.2
 lxml>=4.6.3
 python-slugify>=8.0.1
 sphinx-rtd-theme>=1.2.2
 black~=22.0
 flake8>=3.7.9, <5.0
 ipython>=7.31.1, <8.0; python_version < '3.8'