From a18ef017c62df2c9cc7eaea6569f3da28a393315 Mon Sep 17 00:00:00 2001 From: gwen Date: Tue, 31 Mar 2026 14:13:08 +0200 Subject: [PATCH] kedro --- kedro/concatenation.md | 119 +++++++++++++ kedro/kedro_new/essai.py | 45 +++++ .../kedro_from_scratch/myproject/.gitignore | 162 ++++++++++++++++++ .../kedro_from_scratch/myproject/README.md | 101 +++++++++++ .../myproject/conf/README.md | 26 +++ .../myproject/conf/base/catalog.yml | 17 ++ .../myproject/conf/base/catalog.yml.ori | 4 + .../myproject/conf/base/parameters.yml | 0 .../conf/base/parameters_mon_pipeline.yml | 5 + .../myproject/conf/local/.gitkeep | 0 .../myproject/notebooks/.gitkeep | 0 .../kedro_from_scratch/myproject/notes.txt | 1 + .../myproject/pyproject.toml | 33 ++++ .../myproject/requirements.txt | 7 + .../myproject/src/myproject/__init__.py | 4 + .../myproject/src/myproject/__main__.py | 24 +++ .../src/myproject/pipeline_registry.py | 21 +++ .../src/myproject/pipelines/__init__.py | 0 .../pipelines/mon_pipeline/__init__.py | 10 ++ .../myproject/pipelines/mon_pipeline/nodes.py | 4 + .../pipelines/mon_pipeline/pipeline.py | 10 ++ .../pipelines/process_data/__init__.py | 0 .../myproject/pipelines/process_data/node.py | 14 ++ .../pipelines/process_data/pipeline.py | 13 ++ .../myproject/src/myproject/settings.py | 46 +++++ .../tests/pipelines/mon_pipeline/__init__.py | 0 .../pipelines/mon_pipeline/test_pipeline.py | 9 + 27 files changed, 675 insertions(+) create mode 100644 kedro/concatenation.md create mode 100644 kedro/kedro_new/essai.py create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/.gitignore create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/README.md create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/conf/README.md create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/conf/base/catalog.yml create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/conf/base/catalog.yml.ori create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/conf/base/parameters.yml create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/conf/base/parameters_mon_pipeline.yml create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/conf/local/.gitkeep create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/notebooks/.gitkeep create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/notes.txt create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/pyproject.toml create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/requirements.txt create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/__init__.py create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/__main__.py create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipeline_registry.py create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/__init__.py create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/mon_pipeline/__init__.py create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/mon_pipeline/nodes.py create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/mon_pipeline/pipeline.py create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/process_data/__init__.py create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/process_data/node.py create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/process_data/pipeline.py create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/settings.py create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/tests/pipelines/mon_pipeline/__init__.py create mode 100644 kedro/kedro_new/kedro_from_scratch/myproject/tests/pipelines/mon_pipeline/test_pipeline.py diff --git a/kedro/concatenation.md b/kedro/concatenation.md new file mode 100644 index 0000000..a827d77 --- /dev/null +++ b/kedro/concatenation.md @@ -0,0 +1,119 @@ +Concaténation +================== + + +Voici un exemple simple de pipeline Kedro qui charge plusieurs fichiers CSV, les concatène et écrit le résultat dans un nouveau fichier CSV. + +### Structure du projet + +Voici une structure de projet Kedro typique pour cet exemple : + +``` +my_kedro_project/ +│ +├── conf/ +│ ├── base/ +│ │ ├── catalog.yml +│ │ └── parameters.yml +│ └── local/ +│ ├── catalog.yml +│ └── parameters.yml +│ +├── src/ +│ └── my_kedro_project/ +│ ├── __init__.py +│ ├── pipeline_registry.py +│ ├── nodes.py +│ └── pipeline.py +│ +├── data/ +│ ├── 01_raw/ +│ │ ├── file1.csv +│ │ ├── file2.csv +│ │ └── file3.csv +│ └── 02_intermediate/ +│ └── concatenated.csv +│ +└── pyproject.toml +``` + +### Configuration du catalogue de données + +Dans `conf/base/catalog.yml`, configurez les datasets : + +```yaml +file1: + type: pandas.CSVDataSet + filepath: data/01_raw/file1.csv + +file2: + type: pandas.CSVDataSet + filepath: data/01_raw/file2.csv + +file3: + type: pandas.CSVDataSet + filepath: data/01_raw/file3.csv + +concatenated: + type: pandas.CSVDataSet + filepath: data/02_intermediate/concatenated.csv +``` + +### Définition des nœuds + +Dans `src/my_kedro_project/nodes.py`, définissez les nœuds pour concaténer les fichiers CSV : + +```python +import pandas as pd +from kedro.pipeline import node + +def concatenate_csvs(file1: pd.DataFrame, file2: pd.DataFrame, file3: pd.DataFrame) -> pd.DataFrame: + return pd.concat([file1, file2, file3]) + +node_concatenate = node( + func=concatenate_csvs, + inputs=["file1", "file2", "file3"], + outputs="concatenated", + name="concatenate_csvs_node" +) +``` + +### Définition du pipeline + +Dans `src/my_kedro_project/pipeline.py`, créez le pipeline : + +```python +from kedro.pipeline import Pipeline +from my_kedro_project.nodes import node_concatenate + +def create_pipeline(**kwargs) -> Pipeline: + return Pipeline([node_concatenate]) +``` + +### Registre de pipeline + +Dans `src/my_kedro_project/pipeline_registry.py`, enregistrez le pipeline : + +```python +from kedro.pipeline import Pipeline +from my_kedro_project.pipeline import create_pipeline + +def register_pipelines() -> Dict[str, Pipeline]: + return { + "__default__": create_pipeline(), + } +``` + +### Exécution du pipeline + +Pour exécuter le pipeline, utilisez la commande suivante dans le répertoire racine de votre projet : + +```bash +kedro run +``` + +Cela chargera les fichiers CSV `file1.csv`, `file2.csv`, et `file3.csv` depuis le répertoire `data/01_raw/`, les concaténera et écrira le résultat dans `data/02_intermediate/concatenated.csv`. + +### Conclusion + +Cet exemple montre comment configurer un pipeline Kedro pour charger plusieurs fichiers CSV, les concaténer et écrire le résultat dans un nouveau fichier CSV. Vous pouvez adapter cette structure pour des workflows plus complexes en ajoutant d'autres nœuds et pipelines selon vos besoins. diff --git a/kedro/kedro_new/essai.py b/kedro/kedro_new/essai.py new file mode 100644 index 0000000..eab4251 --- /dev/null +++ b/kedro/kedro_new/essai.py @@ -0,0 +1,45 @@ +from kedro.framework.session import KedroSession +from kedro.framework.startup import bootstrap_project + +# Bootstrap le projet Kedro +project_path = "." +bootstrap_project(project_path) + +from kedro.pipeline import Pipeline, node +#from kedro.io import DataCatalog + +def process_data(data): + # Votre logique de traitement ici + return data.drop(columns=['flag', 'k', 'index', 'rate']) + +mynode = node( + func=process_data, + inputs="fake_data", + outputs="processed_data", + name="process_data_node", +) + +pipeline = Pipeline([mynode]) + + +# Crée une session Kedro +with KedroSession.create(project_path=project_path) as session: + context = session.load_context() + catalog = context.catalog + +# # Exécutez votre pipeline ici +# runner = context.runner +# runner.run(pipeline, catalog) + +from kedro.runner import SequentialRunner + +# Créez un runner +runner = SequentialRunner() + +# Exécutez votre pipeline +runner.run(pipeline, catalog) + + + +#catalog = DataCatalog.from_config("catalog.yml") + diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/.gitignore b/kedro/kedro_new/kedro_from_scratch/myproject/.gitignore new file mode 100644 index 0000000..df9cbef --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/.gitignore @@ -0,0 +1,162 @@ +########################## +# KEDRO PROJECT + +# ignore all local configuration +conf/local/** +!conf/local/.gitkeep +.telemetry + +# ignore potentially sensitive credentials files +conf/**/*credentials* + +# ignore everything in the following folders +data/** + +# except their sub-folders +!data/**/ + +# also keep all .gitkeep files +!.gitkeep + +# ignore kedro-viz metadata +.viz + +# ignore file based logs +*.log + +########################## +# Common files + +# IntelliJ +.idea/ +*.iml +out/ +.idea_modules/ + +### macOS +*.DS_Store +.AppleDouble +.LSOverride +.Trashes + +# Vim +*~ +.*.swo +.*.swp + +# emacs +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc + +# JIRA plugin +atlassian-ide-plugin.xml + +# C extensions +*.so + +### Python template +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +.static_storage/ +.media/ +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +.ipython/profile_default/history.sqlite +.ipython/profile_default/startup/README + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# mlflow local runs +mlruns/* diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/README.md b/kedro/kedro_new/kedro_from_scratch/myproject/README.md new file mode 100644 index 0000000..a57b444 --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/README.md @@ -0,0 +1,101 @@ +# myproject + +[![Powered by Kedro](https://img.shields.io/badge/powered_by-kedro-ffc900?logo=kedro)](https://kedro.org) + +## Overview + +This is your new Kedro project, which was generated using `kedro 0.19.11`. + +Take a look at the [Kedro documentation](https://docs.kedro.org) to get started. + +## Rules and guidelines + +In order to get the best out of the template: + +* Don't remove any lines from the `.gitignore` file we provide +* Make sure your results can be reproduced by following a data engineering convention +* Don't commit data to your repository +* Don't commit any credentials or your local configuration to your repository. Keep all your credentials and local configuration in `conf/local/` + +## How to install dependencies + +Declare any dependencies in `requirements.txt` for `pip` installation. + +To install them, run: + +``` +pip install -r requirements.txt +``` + +## How to run your Kedro pipeline + +You can run your Kedro project with: + +``` +kedro run +``` + +## How to test your Kedro project + +Have a look at the file `src/tests/test_run.py` for instructions on how to write your tests. You can run your tests as follows: + +``` +pytest +``` + +You can configure the coverage threshold in your project's `pyproject.toml` file under the `[tool.coverage.report]` section. + + +## Project dependencies + +To see and update the dependency requirements for your project use `requirements.txt`. You can install the project requirements with `pip install -r requirements.txt`. + +[Further information about project dependencies](https://docs.kedro.org/en/stable/kedro_project_setup/dependencies.html#project-specific-dependencies) + +## How to work with Kedro and notebooks + +> Note: Using `kedro jupyter` or `kedro ipython` to run your notebook provides these variables in scope: `context`, 'session', `catalog`, and `pipelines`. +> +> Jupyter, JupyterLab, and IPython are already included in the project requirements by default, so once you have run `pip install -r requirements.txt` you will not need to take any extra steps before you use them. + +### Jupyter +To use Jupyter notebooks in your Kedro project, you need to install Jupyter: + +``` +pip install jupyter +``` + +After installing Jupyter, you can start a local notebook server: + +``` +kedro jupyter notebook +``` + +### JupyterLab +To use JupyterLab, you need to install it: + +``` +pip install jupyterlab +``` + +You can also start JupyterLab: + +``` +kedro jupyter lab +``` + +### IPython +And if you want to run an IPython session: + +``` +kedro ipython +``` + +### How to ignore notebook output cells in `git` +To automatically strip out all output cell contents before committing to `git`, you can use tools like [`nbstripout`](https://github.com/kynan/nbstripout). For example, you can add a hook in `.git/config` with `nbstripout --install`. This will run `nbstripout` before anything is committed to `git`. + +> *Note:* Your output cells will be retained locally. + +## Package your Kedro project + +[Further information about building project documentation and packaging your project](https://docs.kedro.org/en/stable/tutorial/package_a_project.html) diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/conf/README.md b/kedro/kedro_new/kedro_from_scratch/myproject/conf/README.md new file mode 100644 index 0000000..4379b1e --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/conf/README.md @@ -0,0 +1,26 @@ +# What is this for? + +This folder should be used to store configuration files used by Kedro or by separate tools. + +This file can be used to provide users with instructions for how to reproduce local configuration with their own credentials. You can edit the file however you like, but you may wish to retain the information below and add your own section in the [Instructions](#Instructions) section. + +## Local configuration + +The `local` folder should be used for configuration that is either user-specific (e.g. IDE configuration) or protected (e.g. security keys). + +> *Note:* Please do not check in any local configuration to version control. + +## Base configuration + +The `base` folder is for shared configuration, such as non-sensitive and project-related configuration that may be shared across team members. + +WARNING: Please do not put access credentials in the base configuration folder. + +## Instructions + + + + +## Need help? + +[Find out more about configuration from the Kedro documentation](https://docs.kedro.org/en/stable/kedro_project_setup/configuration.html). diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/conf/base/catalog.yml b/kedro/kedro_new/kedro_from_scratch/myproject/conf/base/catalog.yml new file mode 100644 index 0000000..45e1689 --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/conf/base/catalog.yml @@ -0,0 +1,17 @@ +fake_data: + type: pandas.CSVDataset + filepath: data/01_raw/fake_data.csv + load_args: + sep: ',' + header: 0 + save_args: + index: False + +processed_data: + type: pandas.CSVDataset + filepath: data/02_intermediate/fake_data2.csv + load_args: + sep: ',' + header: 0 + save_args: + index: False diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/conf/base/catalog.yml.ori b/kedro/kedro_new/kedro_from_scratch/myproject/conf/base/catalog.yml.ori new file mode 100644 index 0000000..789fc96 --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/conf/base/catalog.yml.ori @@ -0,0 +1,4 @@ +# Here you can define all your datasets by using simple YAML syntax. +# +# Documentation for this file format can be found in "The Data Catalog" +# Link: https://docs.kedro.org/en/stable/data/data_catalog.html diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/conf/base/parameters.yml b/kedro/kedro_new/kedro_from_scratch/myproject/conf/base/parameters.yml new file mode 100644 index 0000000..e69de29 diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/conf/base/parameters_mon_pipeline.yml b/kedro/kedro_new/kedro_from_scratch/myproject/conf/base/parameters_mon_pipeline.yml new file mode 100644 index 0000000..352f433 --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/conf/base/parameters_mon_pipeline.yml @@ -0,0 +1,5 @@ +# This is a boilerplate parameters config generated for pipeline 'mon_pipeline' +# using Kedro 0.19.11. +# +# Documentation for this file format can be found in "Parameters" +# Link: https://docs.kedro.org/en/0.19.11/configuration/parameters.html diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/conf/local/.gitkeep b/kedro/kedro_new/kedro_from_scratch/myproject/conf/local/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/notebooks/.gitkeep b/kedro/kedro_new/kedro_from_scratch/myproject/notebooks/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/notes.txt b/kedro/kedro_new/kedro_from_scratch/myproject/notes.txt new file mode 100644 index 0000000..b7da5ee --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/notes.txt @@ -0,0 +1 @@ +kedro pipeline create mypipeline diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/pyproject.toml b/kedro/kedro_new/kedro_from_scratch/myproject/pyproject.toml new file mode 100644 index 0000000..ec79061 --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/pyproject.toml @@ -0,0 +1,33 @@ +[build-system] +requires = [ "setuptools",] +build-backend = "setuptools.build_meta" + +[project] +requires-python = ">=3.9" +name = "myproject" +readme = "README.md" +dynamic = [ "version",] +dependencies = [ "ipython>=8.10", "jupyterlab>=3.0", "notebook", "kedro~=0.19.11",] + +[project.scripts] +myproject = "myproject.__main__:main" + +[tool.kedro] +package_name = "myproject" +project_name = "myproject" +kedro_init_version = "0.19.11" +tools = "['None']" +example_pipeline = "False" +source_dir = "src" + +[project.entry-points."kedro.hooks"] + +[tool.setuptools.dynamic.version] +attr = "myproject.__version__" + +[tool.setuptools.packages.find] +where = [ "src",] +namespaces = false + +[tool.kedro_telemetry] +project_id = "75d7fbdcd62f438baccd916558ab8048" diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/requirements.txt b/kedro/kedro_new/kedro_from_scratch/myproject/requirements.txt new file mode 100644 index 0000000..1821bf1 --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/requirements.txt @@ -0,0 +1,7 @@ +ipython>=8.10 +jupyterlab>=3.0 +kedro~=0.19.11 +notebook +pandas +kedro-datasets[pandas.CSVDataSet] + diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/__init__.py b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/__init__.py new file mode 100644 index 0000000..4c3a178 --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/__init__.py @@ -0,0 +1,4 @@ +"""myproject +""" + +__version__ = "0.1" diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/__main__.py b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/__main__.py new file mode 100644 index 0000000..76ff69c --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/__main__.py @@ -0,0 +1,24 @@ +"""myproject file for ensuring the package is executable +as `myproject` and `python -m myproject` +""" +import sys +from pathlib import Path +from typing import Any + +from kedro.framework.cli.utils import find_run_command +from kedro.framework.project import configure_project + + +def main(*args, **kwargs) -> Any: + package_name = Path(__file__).parent.name + configure_project(package_name) + + interactive = hasattr(sys, 'ps1') + kwargs["standalone_mode"] = not interactive + + run = find_run_command(package_name) + return run(*args, **kwargs) + + +if __name__ == "__main__": + main() diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipeline_registry.py b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipeline_registry.py new file mode 100644 index 0000000..6e62db3 --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipeline_registry.py @@ -0,0 +1,21 @@ +"""Project pipelines.""" +from __future__ import annotations + +from kedro.framework.project import find_pipelines +from kedro.pipeline import Pipeline +from myproject.pipelines.process_data.pipeline import pipeline as process_data_pipeline + +def register_pipelines() -> dict[str, Pipeline]: + """Register the project's pipelines. + + Returns: + A mapping from pipeline names to ``Pipeline`` objects. + """ +# pipelines = find_pipelines() +# pipelines["process_data"] = process_data_pipeline, +## pipelines["__default__"] = sum(pipelines.values()) +# return pipelines + return { + "process_data": process_data_pipeline, + "__default__": process_data_pipeline, + } diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/__init__.py b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/mon_pipeline/__init__.py b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/mon_pipeline/__init__.py new file mode 100644 index 0000000..4b1f03d --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/mon_pipeline/__init__.py @@ -0,0 +1,10 @@ +""" +This is a boilerplate pipeline 'mon_pipeline' +generated using Kedro 0.19.11 +""" + +from .pipeline import create_pipeline + +__all__ = ["create_pipeline"] + +__version__ = "0.1" diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/mon_pipeline/nodes.py b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/mon_pipeline/nodes.py new file mode 100644 index 0000000..f65c544 --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/mon_pipeline/nodes.py @@ -0,0 +1,4 @@ +""" +This is a boilerplate pipeline 'mon_pipeline' +generated using Kedro 0.19.11 +""" diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/mon_pipeline/pipeline.py b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/mon_pipeline/pipeline.py new file mode 100644 index 0000000..1a26de4 --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/mon_pipeline/pipeline.py @@ -0,0 +1,10 @@ +""" +This is a boilerplate pipeline 'mon_pipeline' +generated using Kedro 0.19.11 +""" + +from kedro.pipeline import node, Pipeline, pipeline # noqa + + +def create_pipeline(**kwargs) -> Pipeline: + return pipeline([]) diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/process_data/__init__.py b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/process_data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/process_data/node.py b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/process_data/node.py new file mode 100644 index 0000000..a5241ab --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/process_data/node.py @@ -0,0 +1,14 @@ +import pandas as pd + +def process_data(data: pd.DataFrame) -> pd.DataFrame: + """ + Fonction de traitement des données. + + Args: + data (pd.DataFrame): Les données d'entrée. + + Returns: + pd.DataFrame: Les données traitées. + """ + # Logique de traitement : suppression de certaines colonnes + return data.drop(columns=['flag', 'k', 'index', 'rate']) diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/process_data/pipeline.py b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/process_data/pipeline.py new file mode 100644 index 0000000..b07d2c3 --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/pipelines/process_data/pipeline.py @@ -0,0 +1,13 @@ +from kedro.pipeline import Pipeline, node +from .node import process_data + +# Création d'un nœud avec la fonction process_data +mynode = node( + func=process_data, + inputs="fake_data", + outputs="processed_data", + name="process_data_node", +) + +# Création du pipeline avec le nœud +pipeline = Pipeline([mynode]) diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/settings.py b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/settings.py new file mode 100644 index 0000000..130c45b --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/src/myproject/settings.py @@ -0,0 +1,46 @@ +"""Project settings. There is no need to edit this file unless you want to change values +from the Kedro defaults. For further information, including these default values, see +https://docs.kedro.org/en/stable/kedro_project_setup/settings.html.""" + +# Instantiated project hooks. +# For example, after creating a hooks.py and defining a ProjectHooks class there, do +# from myproject.hooks import ProjectHooks +# Hooks are executed in a Last-In-First-Out (LIFO) order. +# HOOKS = (ProjectHooks(),) + +# Installed plugins for which to disable hook auto-registration. +# DISABLE_HOOKS_FOR_PLUGINS = ("kedro-viz",) + +# Class that manages storing KedroSession data. +# from kedro.framework.session.store import BaseSessionStore +# SESSION_STORE_CLASS = BaseSessionStore +# Keyword arguments to pass to the `SESSION_STORE_CLASS` constructor. +# SESSION_STORE_ARGS = { +# "path": "./sessions" +# } + +# Directory that holds configuration. +# CONF_SOURCE = "conf" + +# Class that manages how configuration is loaded. +# from kedro.config import OmegaConfigLoader + +# CONFIG_LOADER_CLASS = OmegaConfigLoader + +# Keyword arguments to pass to the `CONFIG_LOADER_CLASS` constructor. +CONFIG_LOADER_ARGS = { + "base_env": "base", + "default_run_env": "local", + # "config_patterns": { + # "spark" : ["spark*/"], + # "parameters": ["parameters*", "parameters*/**", "**/parameters*"], + # } +} + +# Class that manages Kedro's library components. +# from kedro.framework.context import KedroContext +# CONTEXT_CLASS = KedroContext + +# Class that manages the Data Catalog. +# from kedro.io import DataCatalog +# DATA_CATALOG_CLASS = DataCatalog diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/tests/pipelines/mon_pipeline/__init__.py b/kedro/kedro_new/kedro_from_scratch/myproject/tests/pipelines/mon_pipeline/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/kedro/kedro_new/kedro_from_scratch/myproject/tests/pipelines/mon_pipeline/test_pipeline.py b/kedro/kedro_new/kedro_from_scratch/myproject/tests/pipelines/mon_pipeline/test_pipeline.py new file mode 100644 index 0000000..c5e0040 --- /dev/null +++ b/kedro/kedro_new/kedro_from_scratch/myproject/tests/pipelines/mon_pipeline/test_pipeline.py @@ -0,0 +1,9 @@ +""" +This is a boilerplate test file for pipeline 'mon_pipeline' +generated using Kedro 0.19.11. +Please add your pipeline tests here. + +Kedro recommends using `pytest` framework, more info about it can be found +in the official documentation: +https://docs.pytest.org/en/latest/getting-started.html +"""