add aggregation on the acte's storage collections

develop
gwen 2 years ago
parent 2caf6cf49d
commit 11418fc863

@ -12,7 +12,8 @@ from .helper import find_one_or_404
main = Blueprint("main", __name__, url_prefix="/")
# FIXME : put the mongodb connector in the flask app object
# TODO : maybe put the mongodb connector in the flask app object
# ______________________________________________________________________________
# database connexion
username = urllib.parse.quote_plus(dbadmin)
password = urllib.parse.quote_plus(dbpassword)
@ -25,19 +26,44 @@ housecol = actesdb["house"]
actecol = actesdb["acte"]
helpers = actesdb["helpers"]
#folium_map = actesdb["folium_map"]
#
helpers_dicts = helpers.find_one()
house_trigram = helpers_dicts["house_trigram"]
prince_bigram = helpers_dicts["prince_bigram"]
# ______________________________________________________________________________
# storage extractor utilities
def extract_princes_in_houses():
"""Extracts all princes from a house
by queries in the storage, (not by using csv metadatas)
sample:
>>> extract_princes_in_houses['anjou']
[{'prince_name': "Louis Ier d'Anjou", 'prince_code': 'lo_i'}, {'prince_name': "Louis III d'Anjou", 'prince_code': 'lo_iii'}, {'prince_name': 'Isabelle de Lorraine', 'prince_code': 'isa_i'}, {'prince_name': "Louis II d'Anjou", 'prince_code': 'lo_ii'}, {'prince_name': 'Marie de Blois', 'prince_code': 'mar_i'}, {'prince_name': "René d'Anjou", 'prince_code': 're_i'}, {'prince_name': "Yollande d'Aragon", 'prince_code': 'yol_i'}]
"""
princes_in_houses = dict()
for house in housecol.find():
housename = house['name'].lower()
query = list(actecol.aggregate([{"$match": {"house": housename}}, {'$group': {'_id': {'prince_name': '$prince_name', 'prince_code': '$prince_code'}}}]))
princes_in_houses[housename] = [pr['_id'] for pr in query]
return princes_in_houses
def normalize_trigrams(trigram):
"""noramlizes names like Alençon -> Alencon
"""normalizes names, usefull for the uris routes
or Orléans -> Orleans for the url routes to be clean
sample: Alençon -> Alencon
Orléans -> Orleans
"""
return {unidecode(value):key for key, value in trigram.items()}
# ______________________________________________________________________________
# in memory storage extracted meta informations on the database
# TODO: if it takes too much time at launch, put it in something like
# a `flask init` procedure
helpers_dicts = helpers.find_one()
house_trigram = helpers_dicts["house_trigram"]
prince_bigram = helpers_dicts["prince_bigram"]
princes_in_houses = extract_princes_in_houses()
# normalized_trigrams
house_trigram = normalize_trigrams(house_trigram)
prince_bigram = normalize_trigrams(prince_bigram)
@ -47,9 +73,6 @@ def bigram_prince(prince):
name, number = prince.split("_")
return prince_bigram[name] + "_" + number
def trigram_house(house):
return house_trigram[house]
# TODO: write tests in the datascience project `datascience/tests`
#print(bigram_prince("Agnes"))
#print(bigram_prince("Arthur"))
@ -57,10 +80,17 @@ def trigram_house(house):
#print(trigram_house("Anjou"))
#print(trigram_house("Orleans"))
def trigram_house(house):
return house_trigram[house]
def make_acteid_from_route(house=None, prince=None, date_and_item=None):
"/acte/Anjou/Isabelle_i/1441_08_05a -> anj_isa_i_1441_08_05a"
return "_".join([trigram_house(house), bigram_prince(prince), date_and_item])
# ______________________________________________________________________________
# routes
@main.route("/")
def home():
"""home route"""
@ -94,27 +124,41 @@ def actes(house):
with a capital letter at the beginning
example: `house = "Berry"`
"""
#for code in actecol.distinct('prince_code'):
# print(code)
# house in the store shall be in lower case, but let's force it, just in case
house = house.lower()
# [(prince_name, prince_code)] means for example:
# [("Charles Ier de Bourbon", "Ch_i"), ("Agnès de Bourgogne", "Agn_i")]
princes = []
for act in actecol.find({"house":house}):
prince_name = act['prince_name'].capitalize()
prince_code = act['prince_code'].capitalize()
if (prince_name, prince_code) not in princes:
princes.append((prince_name, prince_code))
# the nosql query below is equivalent to this code, which is more readable but slower:
#princes = []
#for act in actecol.find({"house":house}):
# prince_name = act['prince_name'].capitalize()
# prince_code = act['prince_code'].capitalize()
# if (prince_name, prince_code) not in princes:
# princes.append((prince_name, prince_code))
# [('Louis II de Bourbon', 'lo_ii'), ('Anne Dauphine', 'ann_i'), ('Agnès de Bourgogne', 'agn_i'), ('Charles Ier de Bourbon', 'ch_i')]
#princes = princes_in_houses[house]
#princes = [(prc['prince_name'], prc['prince_code']) for prc in princes]
# [('Agnès de Bourgogne', 'agn_i'), ('Anne Dauphine', 'ann_i'), ('Charles Ier de Bourbon', 'ch_i'), ('Louis II de Bourbon', 'lo_ii')
return render_template("corpus.html", house=house.capitalize(), princes=princes)
@main.route("/actes/<house>/<prince>") # don't put a slash at the end
def prince_corpus(house=None, prince=None):
"""copora prince, **timeline view**"""
# FIXME
return "FIXME" # make_acteid_from_route(house, prince)
house = house.lower()
# prince bigram -> prince_code
# sample uri: /actes/Anjou/lo_i -> Louis_i -> Louis Ier d'Anjou
prc_big, prc_num = prince.split("_")
prince_code = prince_bigram[prc_big] + "_" + prc_num
# for item in prince_acte:
# print("\n\n", item)
#info = [(t.date_time, t.date, t.filename, t.analysis, t.prod_place_acte,
# t.diplo_type_acte, t.state_doc)
# ['1418-12-20', '1418, 20 décembre', 'anj_yo_i_1418_12_20a', "Donation à Antoine de la Salle d'une maison à Arles", "Château d'Anger", 'Lettres patentes', 'Copie']
# ['1421-06-28', '1421, 28 juin', 'anj_yo_i_1421_06_28a', "Confirmation par Yolande, duchesse d'Anjou, du douaire assigné à sa belle-fille, Isabelle de Lorraine", 'NS', 'Lettres patentes', 'Original']
# ['1442-02-24', '1442 (n. st.), 24 février', 'anj_yo_i_1442_02_24a', 'Pierre Throvan, secrétaire de la reine, nommé trésorier général de Provence et de Languedoc du 16 juillet au 31 octobre 1441, puis pendant trois ans à partir du 1er novembre 1441', 'Château de Saumur', 'Lettres patentes', 'Copie']
return render_template("prince_corpus.html", houseS=house, duke_name=prince_name,
lst_id=prince_acte)
@main.route("/acte/<house>/<prince>/<dateitem>") # don't put a slash at the end

Loading…
Cancel
Save