|
|
# _________________________________________________________________________
|
|
|
# loading some data catalogs
|
|
|
|
|
|
actors:
|
|
|
type: pandas.CSVDataSet
|
|
|
filepath: data/01_raw/csv/actors.csv
|
|
|
load_args:
|
|
|
sep: ";"
|
|
|
|
|
|
corpus-agnes-bourgogne:
|
|
|
type: pandas.CSVDataSet
|
|
|
filepath: data/01_raw/csv/corpus-agnes-bourgogne.csv
|
|
|
load_args:
|
|
|
sep: ";"
|
|
|
|
|
|
corpus-charles-i:
|
|
|
type: pandas.CSVDataSet
|
|
|
filepath: data/01_raw/csv/corpus-charles-i.csv
|
|
|
load_args:
|
|
|
sep: ";"
|
|
|
|
|
|
|
|
|
# _________________________________________________________________________
|
|
|
# custom csv dataset test sample
|
|
|
|
|
|
dataset_test:
|
|
|
type: myowndataset.MyOwnDataSet
|
|
|
filepath: data/01_raw/csv/actors.csv
|
|
|
load_args:
|
|
|
sep: ";"
|
|
|
|
|
|
preprocessed_dataset_test:
|
|
|
type: myowndataset.MyOwnDataSet
|
|
|
filepath: data/02_intermediate/csv/preprocessed_test_dataset.csv
|
|
|
save_args:
|
|
|
sep: ";"
|
|
|
|
|
|
# _________________________________________________________________________
|
|
|
# custom xml dataset sample
|
|
|
|
|
|
load_xml:
|
|
|
type: actesdataset.XMLDataSet
|
|
|
filepath: data/01_raw/xml/anjou/anj_is_i_1441_08_05a.xml
|
|
|
|
|
|
preprocess_html:
|
|
|
type: actesdataset.XMLDataSet
|
|
|
filepath: data/02_intermediate/xml/anjou/anj_is_i_1441_08_05a.html
|
|
|
|
|
|
# _________________________________________________________________________
|
|
|
# same test with kedro.io.PartitionedDataSet
|
|
|
|
|
|
# warning :
|
|
|
# this kind of yaml data in generated programmatically
|
|
|
# in the generic data loader
|
|
|
|
|
|
#load_full_xml_catalog:
|
|
|
# type: PartitionedDataSet
|
|
|
# path: data/01_raw/xml/anjou/
|
|
|
# dataset:
|
|
|
# type: actesdataset.XMLDataSet
|
|
|
# filename_suffix: '.xml'
|
|
|
|
|
|
#preprocess_full_catalog_html:
|
|
|
# type: PartitionedDataSet
|
|
|
# path: data/02_intermediate/xml/anjou/
|
|
|
# dataset:
|
|
|
# type: actesdataset.XMLDataSet
|
|
|
# filename_suffix: '.html'
|
|
|
|
|
|
# _________________________________________________________________________
|
|
|
|
|
|
|
|
|
preprocessed_actors:
|
|
|
type: pandas.CSVDataSet
|
|
|
filepath: data/02_intermediate/csv/preprocessed_actors.csv
|
|
|
save_args:
|
|
|
sep: ";"
|
|
|
|
|
|
|