You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

99 lines
3.5 KiB
Python

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

"""
Mongodb storage init
- collections creation
- helper queries
TODO : maybe put the mongodb connector into the flask app object
with the pymongo library.
"""
import urllib.parse
from unidecode import unidecode # to remove accents in house and prince names in the urls
from pymongo import MongoClient
from .config import dbadmin, dbpassword, server_ip
# ______________________________________________________________________________
# database connexion
def database_connexion(username, password, server_ip):
username = urllib.parse.quote_plus(dbadmin)
password = urllib.parse.quote_plus(dbpassword)
dbclient = MongoClient(f'mongodb://{username}:{password}@{server_ip}:27017')
# database
actesdb = dbclient["actesdb"]
return actesdb
actesdb = database_connexion(dbadmin, dbpassword, server_ip)
# collections
housecol = actesdb["house"]
# the acte collection is the most important collection
actecol = actesdb["acte"]
helpers = actesdb["helpers"]
#folium_map = actesdb["folium_map"]
# ______________________________________________________________________________
# storage extractor utilities
def extract_princes_in_houses():
"""Extracts all princes from a house
by queries in the storage, (not by using csv metadatas)
sample:
>>> extract_princes_in_houses['anjou']
[{'prince_name': "Louis Ier d'Anjou", 'prince_code': 'lo_i'}, {'prince_name': "Louis III d'Anjou", 'prince_code': 'lo_iii'}, {'prince_name': 'Isabelle de Lorraine', 'prince_code': 'isa_i'}, {'prince_name': "Louis II d'Anjou", 'prince_code': 'lo_ii'}, {'prince_name': 'Marie de Blois', 'prince_code': 'mar_i'}, {'prince_name': "René d'Anjou", 'prince_code': 're_i'}, {'prince_name': "Yollande d'Aragon", 'prince_code': 'yol_i'}]
"""
princes_in_houses = dict()
for house in housecol.find():
housename = house['name'].lower()
query = list(actecol.aggregate([{"$match": {"house": housename}}, {'$group': {'_id': {'prince_name': '$prince_name', 'prince_code': '$prince_code'}}}]))
princes_in_houses[housename] = [pr['_id'] for pr in query]
return princes_in_houses
def normalize_trigrams(trigram):
"""normalizes names, usefull for the uris routes
sample: Alençon -> Alencon
Orléans -> Orleans
"""
return {unidecode(value):key for key, value in trigram.items()}
# ______________________________________________________________________________
# in memory storage extracted meta informations on the database
# TODO: if it takes too much time at launch, put it in something like
# a `flask init` procedure
helpers_dicts = helpers.find_one()
house_trigram = helpers_dicts["house_trigram"]
prince_bigram = helpers_dicts["prince_bigram"]
princes_in_houses = extract_princes_in_houses()
# normalized_trigrams
house_trigram = normalize_trigrams(house_trigram)
prince_bigram = normalize_trigrams(prince_bigram)
def bigram_prince(prince):
"Translates Charles_i -> ch_i"
name, number = prince.split("_")
return prince_bigram[name] + "_" + number
# TODO: write tests in the datascience project `datascience/tests`
#print(bigram_prince("Agnes"))
#print(bigram_prince("Arthur"))
#print(bigram_prince("Bernard"))
#print(trigram_house("Anjou"))
#print(trigram_house("Orleans"))
def trigram_house(house):
return house_trigram[house]
def make_acteid_from_route(house=None, prince=None, date_and_item=None):
"/acte/Anjou/Isabelle_i/1441_08_05a -> anj_isa_i_1441_08_05a"
return "_".join([trigram_house(house), bigram_prince(prince), date_and_item])