You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.
import logging
from pathlib import Path
from typing import Dict
from kedro . framework . session import KedroSession
from actesdataset import JSONDataSetCollection
import pymongo
logger = logging . getLogger ( __name__ )
def populate_mongo ( jsondoc : JSONDataSetCollection , storage_ip : str , db_name : str , db_collection_name : str ) - > None :
#logger.info(storage_ip)
#logger.info(db_name)
#logger.info(db_collection_name)
jsondatasets = jsondoc . datasets
housename = jsondoc . _housename
mongodb_url = " mongodb:// {} :27017/ " . format ( storage_ip )
logger . info ( " connection to the mongodb server: " + mongodb_url )
# pymongo settings
myclient = pymongo . MongoClient ( mongodb_url )
actesdb = myclient [ db_name ]
actes_collection = actesdb [ db_collection_name ]
# TODO faire un insert_many directement ?
for dataset_filenamestem , dataset in jsondatasets . items ( ) :
# a manual load is required here, because
# the dataset **is not** registered in kedro's catalog
document = dataset . _load ( )
# FIXME que mettre comme id ? le filename ?
document [ " _id " ] = document [ " filename " ]
#logger.info(str(document))
res = actes_collection . insert_one ( document )
logger . info ( res . inserted_id )
return