You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

272 lines
9.7 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"id": "951f178d",
"metadata": {},
"source": [
"# Catalogs\n",
"\n",
"## Chargement des actors"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "ae9bc24c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<kedro.io.data_catalog.DataCatalog at 0x7fbafd365970>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"catalog"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "dc290e93",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[06/16/23 14:56:53] </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO </span> Loading data from <span style=\"color: #008000; text-decoration-color: #008000\">'actors'</span> <span style=\"font-weight: bold\">(</span>CSVDataSet<span style=\"font-weight: bold\">)</span><span style=\"color: #808000; text-decoration-color: #808000\">...</span> <a href=\"file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">data_catalog.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py#345\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">345</span></a>\n",
"</pre>\n"
],
"text/plain": [
"\u001b[2;36m[06/16/23 14:56:53]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[32m'actors'\u001b[0m \u001b[1m(\u001b[0mCSVDataSet\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=755052;file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=546933;file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py#345\u001b\\\u001b[2m345\u001b[0m\u001b]8;;\u001b\\\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>NAME</th>\n",
" <th>ROLE</th>\n",
" <th>HOUSE</th>\n",
" <th>DATE1</th>\n",
" <th>DATE2</th>\n",
" <th>DATE3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Charles Ier de Bourbon</td>\n",
" <td>prince</td>\n",
" <td>Bourbon</td>\n",
" <td>1400</td>\n",
" <td>1434</td>\n",
" <td>1456</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gort, Étienne</td>\n",
" <td>secret</td>\n",
" <td>Bourbon</td>\n",
" <td>1425</td>\n",
" <td>1440</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Erart</td>\n",
" <td>secret</td>\n",
" <td>Berry</td>\n",
" <td>1404</td>\n",
" <td>1405</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Jean de Berry</td>\n",
" <td>prince</td>\n",
" <td>Berry</td>\n",
" <td>1337</td>\n",
" <td>1360</td>\n",
" <td>1416</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Agnès de Bourgogne</td>\n",
" <td>prince</td>\n",
" <td>Bourbon</td>\n",
" <td>1407</td>\n",
" <td>1434</td>\n",
" <td>1476</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" NAME ROLE HOUSE DATE1 DATE2 DATE3\n",
"0 Charles Ier de Bourbon prince Bourbon 1400 1434 1456\n",
"1 Gort, Étienne secret Bourbon 1425 1440 NaN\n",
"2 Erart secret Berry 1404 1405 NaN\n",
"3 Jean de Berry prince Berry 1337 1360 1416\n",
"4 Agnès de Bourgogne prince Bourbon 1407 1434 1476"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"catalog.load(\"actors\").head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "fbccaa41",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['actors', 'corpus-agnes-bourgogne', 'corpus-charles-i', 'parameters']"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"catalog.list()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "530a8932",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[06/16/23 14:58:30] </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO </span> Loading data from <span style=\"color: #008000; text-decoration-color: #008000\">'actors'</span> <span style=\"font-weight: bold\">(</span>CSVDataSet<span style=\"font-weight: bold\">)</span><span style=\"color: #808000; text-decoration-color: #808000\">...</span> <a href=\"file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">data_catalog.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py#345\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">345</span></a>\n",
"</pre>\n"
],
"text/plain": [
"\u001b[2;36m[06/16/23 14:58:30]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[32m'actors'\u001b[0m \u001b[1m(\u001b[0mCSVDataSet\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=659228;file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=160900;file:///media/gwen/maxtor/gwen/entrepot/cnrs/nicolas/depot/datascience/.venv/lib/python3.9/site-packages/kedro/io/data_catalog.py#345\u001b\\\u001b[2m345\u001b[0m\u001b]8;;\u001b\\\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"['NAME', 'ROLE', 'HOUSE', 'DATE1', 'DATE2', 'DATE3']"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"actors = catalog.load(\"actors\")\n",
"actors.columns.tolist()"
]
},
{
"cell_type": "markdown",
"id": "7f10c2c3",
"metadata": {},
"source": [
"## Nettoyage des valeurs non renseignées\n",
"\n",
"Ligne d'origine (ligne 9) : \n",
"`\"René d'Anjou\";\"prince\";\"Anjou\";\"XXXX\";\"XXXX\";\"XXXX\"`\n"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "ea0451df",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"NAME Bernard d'Armagnac\n",
"ROLE prince\n",
"HOUSE Armagnac\n",
"DATE1 NaN\n",
"DATE2 NaN\n",
"DATE3 NaN\n",
"Name: 9, dtype: object"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#actors.values\n",
"import numpy as np\n",
"cleaned_actors = actors.replace(\"XXXX\", np.NaN)\n",
"actors.head()\n",
"#actors.values\n",
"cleaned_actors.iloc[9]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Kedro (actes_princiers)",
"language": "python",
"name": "kedro_actes_princiers"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}