{ "cells": [ { "cell_type": "markdown", "id": "aeacd24e", "metadata": {}, "source": [ "# Catalogs\n", "\n", "## Chargement des actors" ] }, { "cell_type": "code", "execution_count": 2, "id": "ae9bc24c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
       " in <module>:1                                                                                    \n",
       "                                                                                                  \n",
       " 1 catalog                                                                                      \n",
       "   2 dir(catalog)                                                                                 \n",
       "   3                                                                                              \n",
       "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
       "NameError: name 'catalog' is not defined\n",
       "
\n" ], "text/plain": [ "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n", "\u001b[31m│\u001b[0m in \u001b[92m\u001b[0m:\u001b[94m1\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1 catalog \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m2 \u001b[0m\u001b[96mdir\u001b[0m(catalog) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m3 \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", "\u001b[1;91mNameError: \u001b[0mname \u001b[32m'catalog'\u001b[0m is not defined\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "catalog\n", "dir(catalog)" ] }, { "cell_type": "code", "execution_count": 1, "id": "40417f25", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
[06/30/23 17:50:49] INFO     Loading data from 'xmlreflector' (XMLHousesReflector)...           data_catalog.py:345\n",
       "
\n" ], "text/plain": [ "\u001b[2;36m[06/30/23 17:50:49]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[32m'xmlreflector'\u001b[0m \u001b[1m(\u001b[0mXMLHousesReflector\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=287074;file:///home/gwen/.local/lib/python3.10/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=134334;file:///home/gwen/.local/lib/python3.10/site-packages/kedro/io/data_catalog.py#345\u001b\\\u001b[2m345\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
       " /home/gwen/.local/lib/python3.10/site-packages/kedro/io/core.py:187 in load                      \n",
       "                                                                                                  \n",
       "   184 │   │   self._logger.debug(\"Loading %s\", str(self))                                        \n",
       "   185 │   │                                                                                      \n",
       "   186 │   │   try:                                                                               \n",
       " 187 │   │   │   return self._load()                                                            \n",
       "   188 │   │   except DataSetError:                                                               \n",
       "   189 │   │   │   raise                                                                          \n",
       "   190 │   │   except Exception as exc:                                                           \n",
       "                                                                                                  \n",
       " /media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/actes-princiers/src/actesdataset \n",
       " .py:62 in _load                                                                                  \n",
       "                                                                                                  \n",
       "    59 │   │   self.filepath = filepath                                                           \n",
       "    60 │                                                                                          \n",
       "    61 │   def _load(self):                                                                       \n",
       "  62 │   │   raise \"C'est chargé!\"                                                              \n",
       "    63 │                                                                                          \n",
       "    64 │   def _save(self):                                                                       \n",
       "    65 │   │   raise NotImplementedError(\"Attention : dataset en lecture seule !\")                \n",
       "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
       "TypeError: exceptions must derive from BaseException\n",
       "\n",
       "The above exception was the direct cause of the following exception:\n",
       "\n",
       "╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
       " /tmp/ipykernel_28884/4226322454.py:1 in <module>                                                 \n",
       "                                                                                                  \n",
       " [Errno 2] No such file or directory: '/tmp/ipykernel_28884/4226322454.py'                        \n",
       "                                                                                                  \n",
       " /home/gwen/.local/lib/python3.10/site-packages/kedro/io/data_catalog.py:349 in load              \n",
       "                                                                                                  \n",
       "   346 │   │   │   \"Loading data from '%s' (%s)...\", name, type(dataset).__name__                 \n",
       "   347 │   │   )                                                                                  \n",
       "   348 │   │                                                                                      \n",
       " 349 │   │   result = dataset.load()                                                            \n",
       "   350 │   │                                                                                      \n",
       "   351 │   │   return result                                                                      \n",
       "   352                                                                                            \n",
       "                                                                                                  \n",
       " /home/gwen/.local/lib/python3.10/site-packages/kedro/io/core.py:196 in load                      \n",
       "                                                                                                  \n",
       "   193 │   │   │   message = (                                                                    \n",
       "   194 │   │   │   │   f\"Failed while loading data from data set {str(self)}.\\n{str(exc)}\"        \n",
       "   195 │   │   │   )                                                                              \n",
       " 196 │   │   │   raise DataSetError(message) from exc                                           \n",
       "   197 │                                                                                          \n",
       "   198 │   def save(self, data: _DI) -> None:                                                     \n",
       "   199 │   │   \"\"\"Saves data by delegation to the provided save method.                           \n",
       "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
       "DataSetError: Failed while loading data from data set XMLHousesReflector(name=my own dataset).\n",
       "exceptions must derive from BaseException\n",
       "
\n" ], "text/plain": [ "\u001b[31m╭─\u001b[0m\u001b[31m────────────────────────────── \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m ───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/home/gwen/.local/lib/python3.10/site-packages/kedro/io/\u001b[0m\u001b[1;33mcore.py\u001b[0m:\u001b[94m187\u001b[0m in \u001b[92mload\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m184 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[96mself\u001b[0m._logger.debug(\u001b[33m\"\u001b[0m\u001b[33mLoading \u001b[0m\u001b[33m%s\u001b[0m\u001b[33m\"\u001b[0m, \u001b[96mstr\u001b[0m(\u001b[96mself\u001b[0m)) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m185 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m186 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mtry\u001b[0m: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m187 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96mself\u001b[0m._load() \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m188 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mexcept\u001b[0m DataSetError: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m189 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m190 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mexcept\u001b[0m \u001b[96mException\u001b[0m \u001b[94mas\u001b[0m exc: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/actes-princiers/src/\u001b[0m\u001b[1;33mactesdataset\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[1;33m.py\u001b[0m:\u001b[94m62\u001b[0m in \u001b[92m_load\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 59 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[96mself\u001b[0m.filepath = filepath \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 60 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 61 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92m_load\u001b[0m(\u001b[96mself\u001b[0m): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 62 \u001b[2m│ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[33m\"\u001b[0m\u001b[33mC\u001b[0m\u001b[33m'\u001b[0m\u001b[33mest chargé!\u001b[0m\u001b[33m\"\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 63 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 64 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92m_save\u001b[0m(\u001b[96mself\u001b[0m): \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 65 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mNotImplementedError\u001b[0m(\u001b[33m\"\u001b[0m\u001b[33mAttention : dataset en lecture seule !\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31m│\u001b[0m\n", "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", "\u001b[1;91mTypeError: \u001b[0mexceptions must derive from BaseException\n", "\n", "\u001b[3mThe above exception was the direct cause of the following exception:\u001b[0m\n", "\n", "\u001b[31m╭─\u001b[0m\u001b[31m────────────────────────────── \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m ───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/tmp/ipykernel_28884/\u001b[0m\u001b[1;33m4226322454.py\u001b[0m:\u001b[94m1\u001b[0m in \u001b[92m\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[3;31m[Errno 2] No such file or directory: '/tmp/ipykernel_28884/4226322454.py'\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/home/gwen/.local/lib/python3.10/site-packages/kedro/io/\u001b[0m\u001b[1;33mdata_catalog.py\u001b[0m:\u001b[94m349\u001b[0m in \u001b[92mload\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m346 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[33m\"\u001b[0m\u001b[33mLoading data from \u001b[0m\u001b[33m'\u001b[0m\u001b[33m%s\u001b[0m\u001b[33m'\u001b[0m\u001b[33m (\u001b[0m\u001b[33m%s\u001b[0m\u001b[33m)...\u001b[0m\u001b[33m\"\u001b[0m, name, \u001b[96mtype\u001b[0m(dataset).\u001b[91m__name__\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m347 \u001b[0m\u001b[2m│ │ \u001b[0m) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m348 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m349 \u001b[2m│ │ \u001b[0mresult = dataset.load() \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m350 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m351 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m result \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m352 \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/home/gwen/.local/lib/python3.10/site-packages/kedro/io/\u001b[0m\u001b[1;33mcore.py\u001b[0m:\u001b[94m196\u001b[0m in \u001b[92mload\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m193 \u001b[0m\u001b[2m│ │ │ \u001b[0mmessage = ( \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m194 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33mFailed while loading data from data set \u001b[0m\u001b[33m{\u001b[0m\u001b[96mstr\u001b[0m(\u001b[96mself\u001b[0m)\u001b[33m}\u001b[0m\u001b[33m.\u001b[0m\u001b[33m\\n\u001b[0m\u001b[33m{\u001b[0m\u001b[96mstr\u001b[0m(exc)\u001b[33m}\u001b[0m\u001b[33m\"\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m195 \u001b[0m\u001b[2m│ │ │ \u001b[0m) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m196 \u001b[2m│ │ │ \u001b[0m\u001b[94mraise\u001b[0m DataSetError(message) \u001b[94mfrom\u001b[0m \u001b[4;96mexc\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m197 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m198 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92msave\u001b[0m(\u001b[96mself\u001b[0m, data: _DI) -> \u001b[94mNone\u001b[0m: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m199 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[33m\"\"\"Saves data by delegation to the provided save method.\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", "\u001b[1;91mDataSetError: \u001b[0mFailed while loading data from data set \u001b[1;35mXMLHousesReflector\u001b[0m\u001b[1m(\u001b[0m\u001b[33mname\u001b[0m=\u001b[35mmy\u001b[0m own dataset\u001b[1m)\u001b[0m.\n", "exceptions must derive from BaseException\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "catalog.load(\"xmlreflector\")" ] }, { "cell_type": "code", "execution_count": 38, "id": "dc290e93", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
[06/16/23 15:56:44] INFO     Loading data from 'actors' (CSVDataSet)...                         data_catalog.py:345\n",
       "
\n" ], "text/plain": [ "\u001b[2;36m[06/16/23 15:56:44]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[32m'actors'\u001b[0m \u001b[1m(\u001b[0mCSVDataSet\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=858812;file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=44255;file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py#345\u001b\\\u001b[2m345\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NAMEROLEHOUSEDATE1DATE2DATE3
0Charles Ier de BourbonprinceBourbon14001434.01456.0
1Gort, ÉtiennesecretBourbon14251440.0NaN
2ErartsecretBerry14041405.0NaN
3Jean de BerryprinceBerry13371360.01416.0
4Agnès de BourgogneprinceBourbon14071434.01476.0
\n", "
" ], "text/plain": [ " NAME ROLE HOUSE DATE1 DATE2 DATE3\n", "0 Charles Ier de Bourbon prince Bourbon 1400 1434.0 1456.0\n", "1 Gort, Étienne secret Bourbon 1425 1440.0 NaN\n", "2 Erart secret Berry 1404 1405.0 NaN\n", "3 Jean de Berry prince Berry 1337 1360.0 1416.0\n", "4 Agnès de Bourgogne prince Bourbon 1407 1434.0 1476.0" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "catalog.load(\"actors\").head()" ] }, { "cell_type": "code", "execution_count": 5, "id": "eedbc7fb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['actors', 'corpus-agnes-bourgogne', 'corpus-charles-i', 'parameters']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "catalog.list()" ] }, { "cell_type": "code", "execution_count": 20, "id": "3168935f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
[06/16/23 14:58:30] INFO     Loading data from 'actors' (CSVDataSet)...                         data_catalog.py:345\n",
       "
\n" ], "text/plain": [ "\u001b[2;36m[06/16/23 14:58:30]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[32m'actors'\u001b[0m \u001b[1m(\u001b[0mCSVDataSet\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=659228;file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=160900;file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py#345\u001b\\\u001b[2m345\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "['NAME', 'ROLE', 'HOUSE', 'DATE1', 'DATE2', 'DATE3']" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "actors = catalog.load(\"actors\")\n", "actors.columns.tolist()" ] }, { "cell_type": "markdown", "id": "902dd387", "metadata": {}, "source": [ "## Nettoyage des valeurs non renseignées\n", "\n", "Ligne d'origine (ligne 9) : \n", "`\"René d'Anjou\";\"prince\";\"Anjou\";\"XXXX\";\"XXXX\";\"XXXX\"`\n" ] }, { "cell_type": "code", "execution_count": 37, "id": "24fc62ce", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "NAME Bernard d'Armagnac\n", "ROLE prince\n", "HOUSE Armagnac\n", "DATE1 NaN\n", "DATE2 NaN\n", "DATE3 NaN\n", "Name: 9, dtype: object" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#actors.values\n", "import numpy as np\n", "cleaned_actors = actors.replace(\"XXXX\", np.NaN)\n", "actors.head()\n", "#actors.values\n", "cleaned_actors.iloc[9]" ] }, { "cell_type": "markdown", "id": "ee287f62", "metadata": {}, "source": [ "## Autres catalogues" ] }, { "cell_type": "code", "execution_count": 1, "id": "053ed17c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['actors',\n", " 'corpus-agnes-bourgogne',\n", " 'corpus-charles-i',\n", " 'dataset_test',\n", " 'preprocessed_dataset_test',\n", " 'load_xml',\n", " 'preprocess_html',\n", " 'load_full_xml_catalog',\n", " 'preprocess_full_catalog_html',\n", " 'preprocessed_actors',\n", " 'parameters',\n", " 'params:xlststylesheet']" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "catalog.list()" ] }, { "cell_type": "code", "execution_count": 2, "id": "660b898c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
[06/20/23 16:44:19] INFO     Loading data from 'load_xml' (XMLDataSet)...                       data_catalog.py:345\n",
       "
\n" ], "text/plain": [ "\u001b[2;36m[06/20/23 16:44:19]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[32m'load_xml'\u001b[0m \u001b[1m(\u001b[0mXMLDataSet\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=813727;file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=696103;file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py#345\u001b\\\u001b[2m345\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "catalog.load(\"load_xml\")" ] }, { "cell_type": "markdown", "id": "a46ddef9", "metadata": {}, "source": [ "## PartitionedDataset catalogs" ] }, { "cell_type": "code", "execution_count": 1, "id": "96a60999", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
[06/22/23 15:01:39] INFO     Loading data from 'load_full_xml_catalog' (PartitionedDataSet)...  data_catalog.py:345\n",
       "
\n" ], "text/plain": [ "\u001b[2;36m[06/22/23 15:01:39]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[32m'load_full_xml_catalog'\u001b[0m \u001b[1m(\u001b[0mPartitionedDataSet\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=663642;file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=709654;file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py#345\u001b\\\u001b[2m345\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
                    INFO     Loading data from 'load_full_xml_catalog' (PartitionedDataSet)...  data_catalog.py:345\n",
       "
\n" ], "text/plain": [ "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[32m'load_full_xml_catalog'\u001b[0m \u001b[1m(\u001b[0mPartitionedDataSet\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=916916;file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=129179;file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py#345\u001b\\\u001b[2m345\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "{'anj_is_i_1441_08_05a': >,\n", " 'anj_lo_i_1360_08a': >,\n", " 'anj_lo_i_1371_07_08a': >,\n", " 'anj_lo_ii_1401_04_28a': >,\n", " 'anj_lo_ii_1402_11_07a': >,\n", " 'anj_lo_ii_1405_05_02a': >,\n", " 'anj_lo_ii_1406_01_26a': >,\n", " 'anj_lo_ii_1406_04_15a': >,\n", " 'anj_lo_ii_1409_08_07a': >,\n", " 'anj_lo_ii_1409_12_12a': >,\n", " 'anj_lo_ii_1413_03_01a': >,\n", " 'anj_lo_iii_1420_11_04a': >,\n", " 'anj_lo_iii_1422_02_09a': >,\n", " 'anj_lo_iii_1424_03_31a': >,\n", " 'anj_lo_iii_1424_03_31b': >,\n", " 'anj_lo_iii_1428_06_07a': >,\n", " 'anj_lo_iii_1428_06_07b': >,\n", " 'anj_lo_iii_1432_10_27a': >,\n", " 'anj_ma_i_1370_12_10a': >,\n", " 'anj_re_i_1437_09_16a': >,\n", " 'anj_re_i_1439_11_22a': >,\n", " 'anj_re_i_1440_01_20a': >,\n", " 'anj_re_i_1445a': >,\n", " 'anj_re_i_1450_11_07a': >,\n", " 'anj_re_i_1454_01_14a': >,\n", " 'anj_re_i_1454_02_09a': >,\n", " 'anj_re_i_1454_06_17a': >,\n", " 'anj_re_i_1454_09_01a': >,\n", " 'anj_re_i_1455_11_13a': >,\n", " 'anj_re_i_1456_11_29a': >,\n", " 'anj_re_i_1457_01_04a': >,\n", " 'anj_re_i_1459_03_17a': >,\n", " 'anj_re_i_1459_04_16a': >,\n", " 'anj_re_i_1463_07_21a': >,\n", " 'anj_re_i_1466_12_16a': >,\n", " 'anj_re_i_1474_02_01a': >,\n", " 'anj_re_i_1475_05_26a': >,\n", " 'anj_yo_i_1418_12_20a': >,\n", " 'anj_yo_i_1421_06_28a': >,\n", " 'anj_yo_i_1442_02_24a': >}" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "partitions = catalog.load('load_full_xml_catalog')\n", "catalog.load('load_full_xml_catalog')" ] }, { "cell_type": "code", "execution_count": 4, "id": "bdc37079", "metadata": {}, "outputs": [ { "data": { "text/plain": [ ">" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "partitions['anj_is_i_1441_08_05a']" ] } ], "metadata": { "kernelspec": { "display_name": "Kedro (actes_princiers)", "language": "python", "name": "kedro_actes_princiers" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" } }, "nbformat": 4, "nbformat_minor": 5 }