debut de traitement par lots

develop
gwen 3 years ago
parent 226a65ce69
commit 57ba65e606

@ -1,3 +1,8 @@
xmlreflector:
type: actesdataset.XMLHousesReflector
housename: bourbon
folderpath: data/01_raw/houses/bourbon
actors:
type: pandas.CSVDataSet
filepath: data/01_raw/csv/actors.csv

@ -1 +0,0 @@
houses_datapath: data/01_raw/houses

@ -1,10 +1,11 @@
raw_datapath: data/01_raw
houses:
bourbon:
name: Bourbon
path: houses/bourbon
berry:
name: Berry
path: houses/Berry
path: houses/berry
anjou:
name: Anjou
path: houses/anjou

@ -12,25 +12,169 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 1,
"id": "ae9bc24c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">kedro.io.data_catalog.DataCatalog</span><span style=\"color: #000000; text-decoration-color: #000000\"> object at </span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0x7f2d5be6e740</span><span style=\"font-weight: bold\">&gt;</span>\n",
"</pre>\n"
],
"text/plain": [
"<kedro.io.data_catalog.DataCatalog at 0x7fbafd365970>"
"\u001b[1m<\u001b[0m\u001b[1;95mkedro.io.data_catalog.DataCatalog\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x7f2d5be6e740\u001b[0m\u001b[1m>\u001b[0m\n"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
"output_type": "display_data"
}
],
"source": [
"catalog"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "40417f25",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[06/30/23 17:50:49] </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO </span> Loading data from <span style=\"color: #008000; text-decoration-color: #008000\">'xmlreflector'</span> <span style=\"font-weight: bold\">(</span>XMLHousesReflector<span style=\"font-weight: bold\">)</span><span style=\"color: #808000; text-decoration-color: #808000\">...</span> <a href=\"file:///home/gwen/.local/lib/python3.10/site-packages/kedro/io/data_catalog.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">data_catalog.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/gwen/.local/lib/python3.10/site-packages/kedro/io/data_catalog.py#345\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">345</span></a>\n",
"</pre>\n"
],
"text/plain": [
"\u001b[2;36m[06/30/23 17:50:49]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[32m'xmlreflector'\u001b[0m \u001b[1m(\u001b[0mXMLHousesReflector\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=287074;file:///home/gwen/.local/lib/python3.10/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=134334;file:///home/gwen/.local/lib/python3.10/site-packages/kedro/io/data_catalog.py#345\u001b\\\u001b[2m345\u001b[0m\u001b]8;;\u001b\\\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">╭─────────────────────────────── </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">Traceback </span><span style=\"color: #bf7f7f; text-decoration-color: #bf7f7f; font-weight: bold\">(most recent call last)</span><span style=\"color: #800000; text-decoration-color: #800000\"> ────────────────────────────────╮</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/home/gwen/.local/lib/python3.10/site-packages/kedro/io/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">core.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">187</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">load</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">184 │ │ </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._logger.debug(<span style=\"color: #808000; text-decoration-color: #808000\">\"Loading %s\"</span>, <span style=\"color: #00ffff; text-decoration-color: #00ffff\">str</span>(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>)) <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">185 │ │ </span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">186 │ │ </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">try</span>: <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>187 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._load() <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">188 │ │ </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">except</span> DataSetError: <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">189 │ │ │ </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">raise</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">190 │ │ </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">except</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">Exception</span> <span style=\"color: #0000ff; text-decoration-color: #0000ff\">as</span> exc: <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/actes-princiers/src/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">actesdataset</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">62</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_load</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 59 │ │ </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.filepath = filepath <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 60 │ </span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 61 │ </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">def</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_load</span>(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>): <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span> 62 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">raise</span> <span style=\"color: #808000; text-decoration-color: #808000\">\"C'est chargé!\"</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 63 │ </span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 64 │ </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">def</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_save</span>(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>): <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 65 │ │ </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">raise</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">NotImplementedError</span>(<span style=\"color: #808000; text-decoration-color: #808000\">\"Attention : dataset en lecture seule !\"</span>) <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">╰──────────────────────────────────────────────────────────────────────────────────────────────────╯</span>\n",
"<span style=\"color: #ff0000; text-decoration-color: #ff0000; font-weight: bold\">TypeError: </span>exceptions must derive from BaseException\n",
"\n",
"<span style=\"font-style: italic\">The above exception was the direct cause of the following exception:</span>\n",
"\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">╭─────────────────────────────── </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">Traceback </span><span style=\"color: #bf7f7f; text-decoration-color: #bf7f7f; font-weight: bold\">(most recent call last)</span><span style=\"color: #800000; text-decoration-color: #800000\"> ────────────────────────────────╮</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/tmp/ipykernel_28884/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">4226322454.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">&lt;module&gt;</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000; font-style: italic\">[Errno 2] No such file or directory: '/tmp/ipykernel_28884/4226322454.py'</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/home/gwen/.local/lib/python3.10/site-packages/kedro/io/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">data_catalog.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">349</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">load</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">346 │ │ │ </span><span style=\"color: #808000; text-decoration-color: #808000\">\"Loading data from '%s' (%s)...\"</span>, name, <span style=\"color: #00ffff; text-decoration-color: #00ffff\">type</span>(dataset).<span style=\"color: #ff0000; text-decoration-color: #ff0000\">__name__</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">347 │ │ </span>) <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">348 │ │ </span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>349 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ </span>result = dataset.load() <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">350 │ │ </span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">351 │ │ </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> result <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">352 </span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/home/gwen/.local/lib/python3.10/site-packages/kedro/io/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">core.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">196</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">load</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">193 │ │ │ </span>message = ( <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">194 │ │ │ │ </span><span style=\"color: #808000; text-decoration-color: #808000\">f\"Failed while loading data from data set {</span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">str</span>(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>)<span style=\"color: #808000; text-decoration-color: #808000\">}.\\n{</span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">str</span>(exc)<span style=\"color: #808000; text-decoration-color: #808000\">}\"</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">195 │ │ │ </span>) <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>196 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">raise</span> DataSetError(message) <span style=\"color: #0000ff; text-decoration-color: #0000ff\">from</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff; text-decoration: underline\">exc</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">197 │ </span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">198 │ </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">def</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">save</span>(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>, data: _DI) -&gt; <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span>: <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">199 │ │ </span><span style=\"color: #808000; text-decoration-color: #808000\">\"\"\"Saves data by delegation to the provided save method.</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
"<span style=\"color: #800000; text-decoration-color: #800000\">╰──────────────────────────────────────────────────────────────────────────────────────────────────╯</span>\n",
"<span style=\"color: #ff0000; text-decoration-color: #ff0000; font-weight: bold\">DataSetError: </span>Failed while loading data from data set <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">XMLHousesReflector</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">name</span>=<span style=\"color: #800080; text-decoration-color: #800080\">my</span> own dataset<span style=\"font-weight: bold\">)</span>.\n",
"exceptions must derive from BaseException\n",
"</pre>\n"
],
"text/plain": [
"\u001b[31m╭─\u001b[0m\u001b[31m────────────────────────────── \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m ───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2;33m/home/gwen/.local/lib/python3.10/site-packages/kedro/io/\u001b[0m\u001b[1;33mcore.py\u001b[0m:\u001b[94m187\u001b[0m in \u001b[92mload\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m184 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[96mself\u001b[0m._logger.debug(\u001b[33m\"\u001b[0m\u001b[33mLoading \u001b[0m\u001b[33m%s\u001b[0m\u001b[33m\"\u001b[0m, \u001b[96mstr\u001b[0m(\u001b[96mself\u001b[0m)) \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m185 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m186 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mtry\u001b[0m: \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m187 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96mself\u001b[0m._load() \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m188 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mexcept\u001b[0m DataSetError: \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m189 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m190 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mexcept\u001b[0m \u001b[96mException\u001b[0m \u001b[94mas\u001b[0m exc: \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2;33m/media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/actes-princiers/src/\u001b[0m\u001b[1;33mactesdataset\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[1;33m.py\u001b[0m:\u001b[94m62\u001b[0m in \u001b[92m_load\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m 59 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[96mself\u001b[0m.filepath = filepath \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m 60 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m 61 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92m_load\u001b[0m(\u001b[96mself\u001b[0m): \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 62 \u001b[2m│ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[33m\"\u001b[0m\u001b[33mC\u001b[0m\u001b[33m'\u001b[0m\u001b[33mest chargé!\u001b[0m\u001b[33m\"\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m 63 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m 64 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92m_save\u001b[0m(\u001b[96mself\u001b[0m): \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m 65 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mNotImplementedError\u001b[0m(\u001b[33m\"\u001b[0m\u001b[33mAttention : dataset en lecture seule !\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31m│\u001b[0m\n",
"\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n",
"\u001b[1;91mTypeError: \u001b[0mexceptions must derive from BaseException\n",
"\n",
"\u001b[3mThe above exception was the direct cause of the following exception:\u001b[0m\n",
"\n",
"\u001b[31m╭─\u001b[0m\u001b[31m────────────────────────────── \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m ───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2;33m/tmp/ipykernel_28884/\u001b[0m\u001b[1;33m4226322454.py\u001b[0m:\u001b[94m1\u001b[0m in \u001b[92m<module>\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[3;31m[Errno 2] No such file or directory: '/tmp/ipykernel_28884/4226322454.py'\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2;33m/home/gwen/.local/lib/python3.10/site-packages/kedro/io/\u001b[0m\u001b[1;33mdata_catalog.py\u001b[0m:\u001b[94m349\u001b[0m in \u001b[92mload\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m346 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[33m\"\u001b[0m\u001b[33mLoading data from \u001b[0m\u001b[33m'\u001b[0m\u001b[33m%s\u001b[0m\u001b[33m'\u001b[0m\u001b[33m (\u001b[0m\u001b[33m%s\u001b[0m\u001b[33m)...\u001b[0m\u001b[33m\"\u001b[0m, name, \u001b[96mtype\u001b[0m(dataset).\u001b[91m__name__\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m347 \u001b[0m\u001b[2m│ │ \u001b[0m) \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m348 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m349 \u001b[2m│ │ \u001b[0mresult = dataset.load() \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m350 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m351 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m result \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m352 \u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2;33m/home/gwen/.local/lib/python3.10/site-packages/kedro/io/\u001b[0m\u001b[1;33mcore.py\u001b[0m:\u001b[94m196\u001b[0m in \u001b[92mload\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m193 \u001b[0m\u001b[2m│ │ │ \u001b[0mmessage = ( \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m194 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33mFailed while loading data from data set \u001b[0m\u001b[33m{\u001b[0m\u001b[96mstr\u001b[0m(\u001b[96mself\u001b[0m)\u001b[33m}\u001b[0m\u001b[33m.\u001b[0m\u001b[33m\\n\u001b[0m\u001b[33m{\u001b[0m\u001b[96mstr\u001b[0m(exc)\u001b[33m}\u001b[0m\u001b[33m\"\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m195 \u001b[0m\u001b[2m│ │ │ \u001b[0m) \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m196 \u001b[2m│ │ │ \u001b[0m\u001b[94mraise\u001b[0m DataSetError(message) \u001b[94mfrom\u001b[0m \u001b[4;96mexc\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m197 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m198 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92msave\u001b[0m(\u001b[96mself\u001b[0m, data: _DI) -> \u001b[94mNone\u001b[0m: \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m199 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[33m\"\"\"Saves data by delegation to the provided save method.\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n",
"\u001b[1;91mDataSetError: \u001b[0mFailed while loading data from data set \u001b[1;35mXMLHousesReflector\u001b[0m\u001b[1m(\u001b[0m\u001b[33mname\u001b[0m=\u001b[35mmy\u001b[0m own dataset\u001b[1m)\u001b[0m.\n",
"exceptions must derive from BaseException\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"catalog.load(\"xmlreflector\")"
]
},
{
"cell_type": "code",
"execution_count": 38,
@ -453,7 +597,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
"version": "3.10.6"
}
},
"nbformat": 4,

@ -6,6 +6,8 @@ from kedro.framework.context import KedroContext
from kedro.pipeline import Pipeline, node, pipeline
from actesdataset import XMLDataSet
# FIXME : move in an utils.py module
def tree(directory, relative_to=None):
"helper that returns a directory tree structure"
trees = dict()
@ -22,14 +24,13 @@ class ProjectContext(KedroContext):
def get_houses(self):
"""loading from generic configuration file
(that is, the global houses `houses.yaml`)"""
houses_file = self.config_loader.get("houses*")
return houses_file['houses']
houses = self.config_loader.get("houses*")
return houses['houses']
def get_houses_datapath(self):
"""loading from generic configuration file"""
config = self.config_loader.get("config*")
return config['houses_datapath']
houses = self.config_loader.get("houses*")
return houses['raw_datapath']
def _get_catalog(self, *args, **kwargs):
"catalog loader entry point"

@ -5,6 +5,8 @@ from lxml import etree
from kedro.io import AbstractDataSet, DataSetError
# FIXME: supprimer l'héritage
class XMLDataSet(AbstractDataSet):
"lxml.etree._ElementTree loader"
# FIXME set the typing signature !!!!
@ -23,24 +25,113 @@ class XMLDataSet(AbstractDataSet):
return source_doc
def _save(self, data:str) -> None:
# raise NotImplementedError("pas encore implemente !!!!")
with open(self._filepath, 'w') as fhandle:
fhandle.write(data)
def _describe(self) -> Dict[str, Any]:
return dict(filepath=self._filepath)
class JSONDataSet(AbstractDataSet):
def __init__(self, filepath: str):
self._filepath = filepath
class XMLHousesReflector(AbstractDataSet):
"""``XMLHousesReflector`` stores instances of ``XMLDataSet``
implementations to provide ``load`` and ``save`` capabilities.
anywhere in the program. To use a ``DataCatalog``, you need to
instantiate it with a file system folder path, it "reflects"
this file system of XML files.
It loads a dictionary of XML data sets.
Args:
data_sets: A dictionary of data set names and data set instances.
def _load(self) -> Dict:
with open(self._filepath, 'r') as f:
return json.load(f)
Example::
def _save(self, data: Dict) -> None:
with open(self._filepath, 'w') as f:
json.dump(data, f)
>>> from .actesdatasets import XMLDataSet, XMLCatalogReflector
>>>
>>> cars = XMLDataSet(filepath="cars.xml")
>>> io = XMLCatalogReflector(housename='bourbon', folderpath='/tmp/mydir', data_sets={'cars': cars})
# filepath, load_args=None, save_args=None):
"""
def __init__(self,
housename: str,
folderpath: str,
data_sets: dict[str, XMLDataSet] = None):
self._housename = housename
self._folderpath = folderpath
self._datasets = data_sets
# self.filepath = filepath
def _describe(self) -> Dict[str, Any]:
return dict(filepath=self._filepath)
def _load(self):
return "C'est chargé!"
def _save(self):
raise NotImplementedError("Attention : dataset en lecture seule !")
def _exists(self) -> bool:
return True
def _describe(self):
return dict(name="my own dataset")
# def load(self, name: str) -> Any:
# """Loads a registered data set.
# Args:
# name: A data set to be loaded.
# version: Optional argument for concrete data version to be loaded.
# Works only with versioned datasets.
# Returns:
# The loaded data as configured.
# """
# return result
#
## def save(self, name: str, data: Any) -> None:
## """Save data to a registered data set.
## Args:
## name: A data set to be saved to.
## data: A data object to be saved as configured in the registered
## data set.
## Raises:
## DatasetNotFoundError: When a data set with the given name
## has not yet been registered.
## Example:
## ::
## >>> import pandas as pd
## >>>
## >>> from kedro.extras.datasets.pandas import CSVDataSet
## >>>
## >>> cars = CSVDataSet(filepath="cars.csv",
## >>> load_args=None,
## >>> save_args={"index": False})
## >>> io = DataCatalog(data_sets={'cars': cars})
## >>>
## >>> df = pd.DataFrame({'col1': [1, 2],
## >>> 'col2': [4, 5],
## >>> 'col3': [5, 6]})
## >>> io.save("cars", df)
## """
## dataset = self._get_dataset(name)
### self._logger.info("Saving data to '%s' (%s)...", name, type(dataset).__name__)
## dataset.save(data)
# def _describe(self) -> Dict[str, Any]:
# return dict(filepath=self._housename)
#class JSONDataSet(AbstractDataSet):
# def __init__(self, filepath: str):
# self._filepath = filepath
# def _load(self) -> Dict:
# with open(self._filepath, 'r') as fp:
# return json.load(fp)
# def _save(self, data: Dict) -> None:
# with open(self._filepath, 'w') as fp:
# json.dump(data, fp, sort_keys=True, indent=4)
# def _describe(self) -> Dict[str, Any]:
# return dict(filepath=self._filepath)

Loading…
Cancel
Save