You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

99 lines
3.5 KiB
Python

2 years ago
"""
Mongodb storage init
- collections creation
- helper queries
2 years ago
TODO : maybe put the mongodb connector into the flask app object
with the pymongo library.
2 years ago
"""
import urllib.parse
from unidecode import unidecode # to remove accents in house and prince names in the urls
from pymongo import MongoClient
from .config import dbadmin, dbpassword, server_ip
2 years ago
2 years ago
# ______________________________________________________________________________
# database connexion
2 years ago
def database_connexion(username, password, server_ip):
username = urllib.parse.quote_plus(dbadmin)
password = urllib.parse.quote_plus(dbpassword)
dbclient = MongoClient(f'mongodb://{username}:{password}@{server_ip}:27017')
# database
actesdb = dbclient["actesdb"]
return actesdb
actesdb = database_connexion(dbadmin, dbpassword, server_ip)
2 years ago
# collections
housecol = actesdb["house"]
# the acte collection is the most important collection
actecol = actesdb["acte"]
helpers = actesdb["helpers"]
#folium_map = actesdb["folium_map"]
# ______________________________________________________________________________
# storage extractor utilities
def extract_princes_in_houses():
"""Extracts all princes from a house
by queries in the storage, (not by using csv metadatas)
sample:
>>> extract_princes_in_houses['anjou']
[{'prince_name': "Louis Ier d'Anjou", 'prince_code': 'lo_i'}, {'prince_name': "Louis III d'Anjou", 'prince_code': 'lo_iii'}, {'prince_name': 'Isabelle de Lorraine', 'prince_code': 'isa_i'}, {'prince_name': "Louis II d'Anjou", 'prince_code': 'lo_ii'}, {'prince_name': 'Marie de Blois', 'prince_code': 'mar_i'}, {'prince_name': "René d'Anjou", 'prince_code': 're_i'}, {'prince_name': "Yollande d'Aragon", 'prince_code': 'yol_i'}]
"""
princes_in_houses = dict()
for house in housecol.find():
housename = house['name'].lower()
query = list(actecol.aggregate([{"$match": {"house": housename}}, {'$group': {'_id': {'prince_name': '$prince_name', 'prince_code': '$prince_code'}}}]))
princes_in_houses[housename] = [pr['_id'] for pr in query]
return princes_in_houses
def normalize_trigrams(trigram):
"""normalizes names, usefull for the uris routes
sample: Alençon -> Alencon
Orléans -> Orleans
"""
return {unidecode(value):key for key, value in trigram.items()}
# ______________________________________________________________________________
# in memory storage extracted meta informations on the database
# TODO: if it takes too much time at launch, put it in something like
# a `flask init` procedure
helpers_dicts = helpers.find_one()
house_trigram = helpers_dicts["house_trigram"]
prince_bigram = helpers_dicts["prince_bigram"]
princes_in_houses = extract_princes_in_houses()
# normalized_trigrams
house_trigram = normalize_trigrams(house_trigram)
prince_bigram = normalize_trigrams(prince_bigram)
def bigram_prince(prince):
"Translates Charles_i -> ch_i"
name, number = prince.split("_")
return prince_bigram[name] + "_" + number
# TODO: write tests in the datascience project `datascience/tests`
#print(bigram_prince("Agnes"))
#print(bigram_prince("Arthur"))
#print(bigram_prince("Bernard"))
#print(trigram_house("Anjou"))
#print(trigram_house("Orleans"))
def trigram_house(house):
return house_trigram[house]
def make_acteid_from_route(house=None, prince=None, date_and_item=None):
"/acte/Anjou/Isabelle_i/1441_08_05a -> anj_isa_i_1441_08_05a"
return "_".join([trigram_house(house), bigram_prince(prince), date_and_item])