diff --git a/README.md b/README.md index 9a264d6..32ef2fa 100644 --- a/README.md +++ b/README.md @@ -19,12 +19,14 @@ A best-practice setup includes initialising git and creating a virtual environme - install kedro `pip install kedro` - Install the packages and libraries `pip install -r src/requirements.txt` -Then open a terminal in the `actes-princiers` folder +Then open a terminal in the `actes-princiers`'s folder and launch jupyter : `kedro jupyter notebook` or start the ipython prompt : `kedro ipython` ## Launching the pipeline +Open a terminal in the `actes-princiers`'s folder and launch kedro + `kedro run` ## Visualizing the pipelines diff --git a/actes-princiers/src/actes_princiers/pipelines/data_processing/nodes.py b/actes-princiers/src/actes_princiers/pipelines/data_processing/nodes.py index c300a92..b03a03f 100755 --- a/actes-princiers/src/actes_princiers/pipelines/data_processing/nodes.py +++ b/actes-princiers/src/actes_princiers/pipelines/data_processing/nodes.py @@ -1,16 +1,17 @@ import pandas as pd import numpy as np -def _is_true(x: pd.Series) -> pd.Series: - return x == "t" +#def _is_true(x: pd.Series) -> pd.Series: +# return x == "t" -def _parse_percentage(x: pd.Series) -> pd.Series: - x = x.str.replace("%", "") - x = x.astype(float) / 100 - return x +#def _parse_percentage(x: pd.Series) -> pd.Series: +# x = x.str.replace("%", "") +# x = x.astype(float) / 100 +# return x def preprocess_actors(actors: pd.DataFrame) -> pd.DataFrame: actors.replace("XXXX", np.NaN) return actors +#def parse_xsl( diff --git a/actes-princiers/src/actes_princiers/pipelines/data_processing/pipeline.py b/actes-princiers/src/actes_princiers/pipelines/data_processing/pipeline.py index 2faa9dc..ee51626 100755 --- a/actes-princiers/src/actes_princiers/pipelines/data_processing/pipeline.py +++ b/actes-princiers/src/actes_princiers/pipelines/data_processing/pipeline.py @@ -12,5 +12,11 @@ def create_pipeline(**kwargs) -> Pipeline: outputs="preprocessed_actors", name="preprocess_actors_node", ), +# node( +# func=parse_xsl, +# inputs="actors", +# outputs="preprocessed_actors", +# name="preprocess_actors_node", +# ), ] )