cleaning and configure logger
parent
9535597e91
commit
e6164dc134
@ -1,3 +0,0 @@
|
|||||||
"Data Processing pipeline"
|
|
||||||
|
|
||||||
from .pipeline import create_pipeline # NOQA
|
|
||||||
@ -1,17 +0,0 @@
|
|||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
#def _is_true(x: pd.Series) -> pd.Series:
|
|
||||||
# return x == "t"
|
|
||||||
|
|
||||||
#def _parse_percentage(x: pd.Series) -> pd.Series:
|
|
||||||
# x = x.str.replace("%", "")
|
|
||||||
# x = x.astype(float) / 100
|
|
||||||
# return x
|
|
||||||
|
|
||||||
def preprocess_actors(actors: pd.DataFrame) -> pd.DataFrame:
|
|
||||||
actors.replace("XXXX", np.NaN)
|
|
||||||
return actors
|
|
||||||
|
|
||||||
#def parse_xsl(
|
|
||||||
|
|
||||||
@ -1,22 +0,0 @@
|
|||||||
from kedro.pipeline import Pipeline, node, pipeline
|
|
||||||
|
|
||||||
from .nodes import preprocess_actors
|
|
||||||
|
|
||||||
|
|
||||||
def create_pipeline(**kwargs) -> Pipeline:
|
|
||||||
return pipeline(
|
|
||||||
[
|
|
||||||
node(
|
|
||||||
func=preprocess_actors,
|
|
||||||
inputs="actors",
|
|
||||||
outputs="preprocessed_actors",
|
|
||||||
name="preprocess_actors_node",
|
|
||||||
),
|
|
||||||
# node(
|
|
||||||
# func=parse_xsl,
|
|
||||||
# inputs="actors",
|
|
||||||
# outputs="preprocessed_actors",
|
|
||||||
# name="preprocess_actors_node",
|
|
||||||
# ),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
@ -1,3 +0,0 @@
|
|||||||
"Data Processing pipeline"
|
|
||||||
|
|
||||||
from .pipeline import create_pipeline # NOQA
|
|
||||||
@ -1,16 +0,0 @@
|
|||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
#def _is_true(x: pd.Series) -> pd.Series:
|
|
||||||
# return x == "t"
|
|
||||||
|
|
||||||
#def _parse_percentage(x: pd.Series) -> pd.Series:
|
|
||||||
# x = x.str.replace("%", "")
|
|
||||||
# x = x.astype(float) / 100
|
|
||||||
# return x
|
|
||||||
|
|
||||||
def test_dataset(actors: pd.DataFrame) -> pd.DataFrame:
|
|
||||||
actors.replace("XXXX", np.NaN)
|
|
||||||
# print(actors.head())
|
|
||||||
return actors
|
|
||||||
|
|
||||||
@ -1,16 +0,0 @@
|
|||||||
from kedro.pipeline import Pipeline, node, pipeline
|
|
||||||
|
|
||||||
from .nodes import test_dataset
|
|
||||||
|
|
||||||
|
|
||||||
def create_pipeline(**kwargs) -> Pipeline:
|
|
||||||
return pipeline(
|
|
||||||
[
|
|
||||||
node(
|
|
||||||
func=test_dataset,
|
|
||||||
inputs="dataset_test",
|
|
||||||
outputs="preprocessed_dataset_test",
|
|
||||||
name="process_test_dataset_node",
|
|
||||||
),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
@ -1,24 +0,0 @@
|
|||||||
from pathlib import Path, PurePosixPath
|
|
||||||
import pandas as pd
|
|
||||||
from kedro.io import AbstractDataSet
|
|
||||||
|
|
||||||
class MyOwnDataSet(AbstractDataSet[pd.DataFrame, pd.DataFrame]):
|
|
||||||
def __init__(self, filepath, load_args=None, save_args=None):
|
|
||||||
# print("------------------------------------", str(load_args))
|
|
||||||
# def __init__(self, filepath, param1, param2=True):
|
|
||||||
self._filepath = PurePosixPath(filepath)
|
|
||||||
# self._param1 = param1
|
|
||||||
# self._param2 = param2
|
|
||||||
|
|
||||||
def _load(self) -> pd.DataFrame:
|
|
||||||
return pd.read_csv(self._filepath)
|
|
||||||
|
|
||||||
def _save(self, df: pd.DataFrame) -> None:
|
|
||||||
df.to_csv(str(self._filepath))
|
|
||||||
# raise NotImplementedError("Attention : dataset en lecture seule !")
|
|
||||||
|
|
||||||
def _exists(self) -> bool:
|
|
||||||
return Path(self._filepath.as_posix()).exists()
|
|
||||||
|
|
||||||
def _describe(self):
|
|
||||||
return dict(name="my own dataset")
|
|
||||||
Loading…
Reference in New Issue