{ "cells": [ { "cell_type": "markdown", "id": "aeacd24e", "metadata": {}, "source": [ "# Catalogs\n", "\n", "## Chargement des actors" ] }, { "cell_type": "code", "execution_count": 4, "id": "ae9bc24c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "catalog" ] }, { "cell_type": "code", "execution_count": 38, "id": "dc290e93", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
[06/16/23 15:56:44] INFO     Loading data from 'actors' (CSVDataSet)...                         data_catalog.py:345\n",
       "
\n" ], "text/plain": [ "\u001b[2;36m[06/16/23 15:56:44]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[32m'actors'\u001b[0m \u001b[1m(\u001b[0mCSVDataSet\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=858812;file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=44255;file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py#345\u001b\\\u001b[2m345\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NAMEROLEHOUSEDATE1DATE2DATE3
0Charles Ier de BourbonprinceBourbon14001434.01456.0
1Gort, ÉtiennesecretBourbon14251440.0NaN
2ErartsecretBerry14041405.0NaN
3Jean de BerryprinceBerry13371360.01416.0
4Agnès de BourgogneprinceBourbon14071434.01476.0
\n", "
" ], "text/plain": [ " NAME ROLE HOUSE DATE1 DATE2 DATE3\n", "0 Charles Ier de Bourbon prince Bourbon 1400 1434.0 1456.0\n", "1 Gort, Étienne secret Bourbon 1425 1440.0 NaN\n", "2 Erart secret Berry 1404 1405.0 NaN\n", "3 Jean de Berry prince Berry 1337 1360.0 1416.0\n", "4 Agnès de Bourgogne prince Bourbon 1407 1434.0 1476.0" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "catalog.load(\"actors\").head()" ] }, { "cell_type": "code", "execution_count": 5, "id": "eedbc7fb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['actors', 'corpus-agnes-bourgogne', 'corpus-charles-i', 'parameters']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "catalog.list()" ] }, { "cell_type": "code", "execution_count": 20, "id": "3168935f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
[06/16/23 14:58:30] INFO     Loading data from 'actors' (CSVDataSet)...                         data_catalog.py:345\n",
       "
\n" ], "text/plain": [ "\u001b[2;36m[06/16/23 14:58:30]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[32m'actors'\u001b[0m \u001b[1m(\u001b[0mCSVDataSet\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=659228;file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=160900;file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py#345\u001b\\\u001b[2m345\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "['NAME', 'ROLE', 'HOUSE', 'DATE1', 'DATE2', 'DATE3']" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "actors = catalog.load(\"actors\")\n", "actors.columns.tolist()" ] }, { "cell_type": "markdown", "id": "902dd387", "metadata": {}, "source": [ "## Nettoyage des valeurs non renseignées\n", "\n", "Ligne d'origine (ligne 9) : \n", "`\"René d'Anjou\";\"prince\";\"Anjou\";\"XXXX\";\"XXXX\";\"XXXX\"`\n" ] }, { "cell_type": "code", "execution_count": 37, "id": "24fc62ce", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "NAME Bernard d'Armagnac\n", "ROLE prince\n", "HOUSE Armagnac\n", "DATE1 NaN\n", "DATE2 NaN\n", "DATE3 NaN\n", "Name: 9, dtype: object" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#actors.values\n", "import numpy as np\n", "cleaned_actors = actors.replace(\"XXXX\", np.NaN)\n", "actors.head()\n", "#actors.values\n", "cleaned_actors.iloc[9]" ] } ], "metadata": { "kernelspec": { "display_name": "Kedro (actes_princiers)", "language": "python", "name": "kedro_actes_princiers" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" } }, "nbformat": 4, "nbformat_minor": 5 }