cleaning and configure logger
parent
9535597e91
commit
e6164dc134
@ -1,3 +0,0 @@
|
||||
"Data Processing pipeline"
|
||||
|
||||
from .pipeline import create_pipeline # NOQA
|
||||
@ -1,17 +0,0 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
#def _is_true(x: pd.Series) -> pd.Series:
|
||||
# return x == "t"
|
||||
|
||||
#def _parse_percentage(x: pd.Series) -> pd.Series:
|
||||
# x = x.str.replace("%", "")
|
||||
# x = x.astype(float) / 100
|
||||
# return x
|
||||
|
||||
def preprocess_actors(actors: pd.DataFrame) -> pd.DataFrame:
|
||||
actors.replace("XXXX", np.NaN)
|
||||
return actors
|
||||
|
||||
#def parse_xsl(
|
||||
|
||||
@ -1,22 +0,0 @@
|
||||
from kedro.pipeline import Pipeline, node, pipeline
|
||||
|
||||
from .nodes import preprocess_actors
|
||||
|
||||
|
||||
def create_pipeline(**kwargs) -> Pipeline:
|
||||
return pipeline(
|
||||
[
|
||||
node(
|
||||
func=preprocess_actors,
|
||||
inputs="actors",
|
||||
outputs="preprocessed_actors",
|
||||
name="preprocess_actors_node",
|
||||
),
|
||||
# node(
|
||||
# func=parse_xsl,
|
||||
# inputs="actors",
|
||||
# outputs="preprocessed_actors",
|
||||
# name="preprocess_actors_node",
|
||||
# ),
|
||||
]
|
||||
)
|
||||
@ -1,3 +0,0 @@
|
||||
"Data Processing pipeline"
|
||||
|
||||
from .pipeline import create_pipeline # NOQA
|
||||
@ -1,16 +0,0 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
#def _is_true(x: pd.Series) -> pd.Series:
|
||||
# return x == "t"
|
||||
|
||||
#def _parse_percentage(x: pd.Series) -> pd.Series:
|
||||
# x = x.str.replace("%", "")
|
||||
# x = x.astype(float) / 100
|
||||
# return x
|
||||
|
||||
def test_dataset(actors: pd.DataFrame) -> pd.DataFrame:
|
||||
actors.replace("XXXX", np.NaN)
|
||||
# print(actors.head())
|
||||
return actors
|
||||
|
||||
@ -1,16 +0,0 @@
|
||||
from kedro.pipeline import Pipeline, node, pipeline
|
||||
|
||||
from .nodes import test_dataset
|
||||
|
||||
|
||||
def create_pipeline(**kwargs) -> Pipeline:
|
||||
return pipeline(
|
||||
[
|
||||
node(
|
||||
func=test_dataset,
|
||||
inputs="dataset_test",
|
||||
outputs="preprocessed_dataset_test",
|
||||
name="process_test_dataset_node",
|
||||
),
|
||||
]
|
||||
)
|
||||
@ -1,24 +0,0 @@
|
||||
from pathlib import Path, PurePosixPath
|
||||
import pandas as pd
|
||||
from kedro.io import AbstractDataSet
|
||||
|
||||
class MyOwnDataSet(AbstractDataSet[pd.DataFrame, pd.DataFrame]):
|
||||
def __init__(self, filepath, load_args=None, save_args=None):
|
||||
# print("------------------------------------", str(load_args))
|
||||
# def __init__(self, filepath, param1, param2=True):
|
||||
self._filepath = PurePosixPath(filepath)
|
||||
# self._param1 = param1
|
||||
# self._param2 = param2
|
||||
|
||||
def _load(self) -> pd.DataFrame:
|
||||
return pd.read_csv(self._filepath)
|
||||
|
||||
def _save(self, df: pd.DataFrame) -> None:
|
||||
df.to_csv(str(self._filepath))
|
||||
# raise NotImplementedError("Attention : dataset en lecture seule !")
|
||||
|
||||
def _exists(self) -> bool:
|
||||
return Path(self._filepath.as_posix()).exists()
|
||||
|
||||
def _describe(self):
|
||||
return dict(name="my own dataset")
|
||||
Loading…
Reference in New Issue