You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

76 lines
1.8 KiB
YAML

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

# _________________________________________________________________________
# loading some data catalogs
actors:
type: pandas.CSVDataSet
filepath: data/01_raw/csv/actors.csv
load_args:
sep: ";"
corpus-agnes-bourgogne:
type: pandas.CSVDataSet
filepath: data/01_raw/csv/corpus-agnes-bourgogne.csv
load_args:
sep: ";"
corpus-charles-i:
type: pandas.CSVDataSet
filepath: data/01_raw/csv/corpus-charles-i.csv
load_args:
sep: ";"
# _________________________________________________________________________
# custom csv dataset test sample
dataset_test:
type: myowndataset.MyOwnDataSet
filepath: data/01_raw/csv/actors.csv
load_args:
sep: ";"
preprocessed_dataset_test:
type: myowndataset.MyOwnDataSet
filepath: data/02_intermediate/csv/preprocessed_test_dataset.csv
save_args:
sep: ";"
# _________________________________________________________________________
# custom xml dataset sample
load_xml:
type: actesdataset.XMLDataSet
filepath: data/01_raw/xml/Anjou/anj_is_i_1441_08_05a.xml
preprocess_html:
type: actesdataset.XMLDataSet
filepath: data/02_intermediate/xml/Anjou/anj_is_i_1441_08_05a.html
# _________________________________________________________________________
# same test with kedro.io.PartitionedDataSet
load_full_xml_catalog:
type: PartitionedDataSet
path: data/01_raw/xml/Anjou/
dataset:
type: actesdataset.XMLDataSet
filename_suffix: '.xml'
preprocess_full_catalog_html:
type: PartitionedDataSet
path: data/02_intermediate/xml/Anjou/
dataset:
type: actesdataset.XMLDataSet
filename_suffix: '.html'
# _________________________________________________________________________
preprocessed_actors:
type: pandas.CSVDataSet
filepath: data/02_intermediate/csv/preprocessed_actors.csv
save_args:
sep: ";"