houses trigram

develop
gwen 2 years ago
parent 4c0f113bd7
commit 9bd5f29198

@ -15,11 +15,27 @@ from actesdataset import JSONDataSetCollection
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Database schemas # Database schemas
class House(Document): class House(Document):
"_id is the name"
_id = StringField(required=True, max_length=100)
name = StringField(required=True, max_length=100) name = StringField(required=True, max_length=100)
trigram = StringField(required=True, max_length=3)
particle = StringField(required=True, max_length=150) particle = StringField(required=True, max_length=150)
class Acte(Document):
"_id is the filename"
_id = StringField(required=True, max_length=150)
analysis = StringField(required=True, max_length=3000)
date = StringField(required=True, max_length=250)
# FIMXE make a real date object ? or not.
date_time = StringField(required=True, max_length=15)
filename = StringField(required=True, max_length=100)
ref_acte = StringField(required=True, max_length=100)
xmlcontent = StringField(required=True) # no max_length
# pipeline functions # pipeline functions
def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str, db_collection_name: str, mongodb_admin: str, mongodb_password: str) -> None: def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str, db_collection_name: str, mongodb_admin: str, mongodb_password: str) -> None:
@ -33,27 +49,27 @@ def populate_mongo(jsondoc: JSONDataSetCollection, storage_ip: str, db_name: str
password = urllib.parse.quote_plus(mongodb_password) password = urllib.parse.quote_plus(mongodb_password)
mongodb_url = f"mongodb://{username}:{password}@{storage_ip}:27017/" mongodb_url = f"mongodb://{username}:{password}@{storage_ip}:27017/"
#mongodb_url = "mongodb://{}:27017/".format(storage_ip) #mongodb_url = "mongodb://{}:27017/".format(storage_ip)
logger.info("connection to the mongodb server: " + mongodb_url) logger.info("connection to the mongodb server")
# pymongo settings # pymongo settings
myclient = pymongo.MongoClient(mongodb_url) # myclient = pymongo.MongoClient(mongodb_url)
myclient = connect(db=db_name, host=mongodb_url, authentication_source='admin', alias="default")
actesdb = myclient[db_name] #actesdb = myclient[db_name]
actes_collection = actesdb[db_collection_name] #actes_collection = actesdb[db_collection_name]
# TODO faire un insert_many directement ?
for dataset_filenamestem, dataset in jsondatasets.items(): for dataset_filenamestem, dataset in jsondatasets.items():
# a manual load is required here, because # a manual load is required here, because
# the dataset **is not** registered in kedro's catalog # the dataset **is not** registered in kedro's catalog
document = dataset._load() json_document = dataset._load()
# FIXME que mettre comme id ? le filename ? json_document["_id"] = json_document["filename"]
document["_id"] = document["filename"] acte_entry = Acte(**json_document)
#logger.info(str(document)) logger.info("adding entry: " + json_document["filename"])
res = actes_collection.insert_one(document) acte_entry.save()
logger.info(res.inserted_id) #res = actes_collection.insert_one(document)
#logger.info(res.inserted_id)
# properly closes the db connection # properly closes the db connection
# FIXME with MongoClient() as client # myclient.close()
myclient.close()
return return
@ -67,6 +83,7 @@ def load_houses(yamldoc: YAMLDataSet, storage_ip: str, db_name: str, mongodb_adm
myclient = connect(db=db_name, host=mongodb_url, authentication_source='admin', alias="default") myclient = connect(db=db_name, host=mongodb_url, authentication_source='admin', alias="default")
for house_dict in yamldoc['houses'].values(): for house_dict in yamldoc['houses'].values():
house_dict['_id'] = house_dict['name']
logger.info("inserting: " + str(house_dict)) logger.info("inserting: " + str(house_dict))
#houses_col.insert_one(house_dict) #houses_col.insert_one(house_dict)
house_entry = House(**house_dict) house_entry = House(**house_dict)

@ -12,7 +12,7 @@ def create_pipeline(**kwargs) -> Pipeline:
"params:db_collection_name", "params:mongodb_admin", "params:db_collection_name", "params:mongodb_admin",
"params:mongodb_password"], "params:mongodb_password"],
outputs=None, outputs=None,
name="populate_mongo", name="populate_actes",
tags="populate_database", tags="populate_database",
), ),
node( node(
@ -20,8 +20,8 @@ def create_pipeline(**kwargs) -> Pipeline:
inputs=["houses", "params:storage_ip", "params:db_name", inputs=["houses", "params:storage_ip", "params:db_name",
"params:mongodb_admin", "params:mongodb_password"], "params:mongodb_admin", "params:mongodb_password"],
outputs=None, outputs=None,
name="load_houses", name="polulate_houses",
tags="load_houses", tags="populate_database",
) )
] ]

Loading…
Cancel
Save