|
|
|
|
@ -15,11 +15,27 @@ from actesdataset import JSONDataSetCollection
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Database schemas
|
|
|
|
|
class House(Document):
|
|
|
|
|
"_id is the name"
|
|
|
|
|
_id = StringField(required=True, max_length=100)
|
|
|
|
|
name = StringField(required=True, max_length=100)
|
|
|
|
|
trigram = StringField(required=True, max_length=3)
|
|
|
|
|
particle = StringField(required=True, max_length=150)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Acte(Document):
|
|
|
|
|
"_id is the filename"
|
|
|
|
|
_id = StringField(required=True, max_length=150)
|
|
|
|
|
analysis = StringField(required=True, max_length=3000)
|
|
|
|
|
date = StringField(required=True, max_length=250)
|
|
|
|
|
# FIMXE make a real date object ? or not.
|
|
|
|
|
date_time = StringField(required=True, max_length=15)
|
|
|
|
|
filename = StringField(required=True, max_length=100)
|
|
|
|
|
ref_acte = StringField(required=True, max_length=100)
|
|
|
|
|
xmlcontent = StringField(required=True) # no max_length
|
|
|
|
|
|
|
|
|
|
# pipeline functions
|
|
|
|
|
def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str, db_collection_name: str, mongodb_admin: str, mongodb_password: str) -> None:
|
|
|
|
|
|
|
|
|
|
@ -33,27 +49,27 @@ def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str
|
|
|
|
|
password = urllib.parse.quote_plus(mongodb_password)
|
|
|
|
|
mongodb_url = f"mongodb://{username}:{password}@{storage_ip}:27017/"
|
|
|
|
|
#mongodb_url = "mongodb://{}:27017/".format(storage_ip)
|
|
|
|
|
logger.info("connection to the mongodb server: " + mongodb_url)
|
|
|
|
|
logger.info("connection to the mongodb server")
|
|
|
|
|
|
|
|
|
|
# pymongo settings
|
|
|
|
|
myclient = pymongo.MongoClient(mongodb_url)
|
|
|
|
|
# myclient = pymongo.MongoClient(mongodb_url)
|
|
|
|
|
myclient = connect(db=db_name, host=mongodb_url, authentication_source='admin', alias="default")
|
|
|
|
|
|
|
|
|
|
actesdb = myclient[db_name]
|
|
|
|
|
actes_collection = actesdb[db_collection_name]
|
|
|
|
|
#actesdb = myclient[db_name]
|
|
|
|
|
#actes_collection = actesdb[db_collection_name]
|
|
|
|
|
|
|
|
|
|
# TODO faire un insert_many directement ?
|
|
|
|
|
for dataset_filenamestem, dataset in jsondatasets.items():
|
|
|
|
|
# a manual load is required here, because
|
|
|
|
|
# the dataset **is not** registered in kedro's catalog
|
|
|
|
|
document = dataset._load()
|
|
|
|
|
# FIXME que mettre comme id ? le filename ?
|
|
|
|
|
document["_id"] = document["filename"]
|
|
|
|
|
#logger.info(str(document))
|
|
|
|
|
res = actes_collection.insert_one(document)
|
|
|
|
|
logger.info(res.inserted_id)
|
|
|
|
|
json_document = dataset._load()
|
|
|
|
|
json_document["_id"] = json_document["filename"]
|
|
|
|
|
acte_entry = Acte(**json_document)
|
|
|
|
|
logger.info("adding entry: " + json_document["filename"])
|
|
|
|
|
acte_entry.save()
|
|
|
|
|
#res = actes_collection.insert_one(document)
|
|
|
|
|
#logger.info(res.inserted_id)
|
|
|
|
|
# properly closes the db connection
|
|
|
|
|
# FIXME with MongoClient() as client
|
|
|
|
|
myclient.close()
|
|
|
|
|
# myclient.close()
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -67,6 +83,7 @@ def load_houses(yamldoc: YAMLDataSet, storage_ip: str, db_name: str, mongodb_adm
|
|
|
|
|
myclient = connect(db=db_name, host=mongodb_url, authentication_source='admin', alias="default")
|
|
|
|
|
|
|
|
|
|
for house_dict in yamldoc['houses'].values():
|
|
|
|
|
house_dict['_id'] = house_dict['name']
|
|
|
|
|
logger.info("inserting: " + str(house_dict))
|
|
|
|
|
#houses_col.insert_one(house_dict)
|
|
|
|
|
house_entry = House(**house_dict)
|
|
|
|
|
|