From 11418fc863ec9e0d2089fd83f98f21f01baedc7e Mon Sep 17 00:00:00 2001 From: gwen Date: Tue, 19 Sep 2023 22:23:29 +0200 Subject: [PATCH] add aggregation on the acte's storage collections --- app/routes.py | 104 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 74 insertions(+), 30 deletions(-) diff --git a/app/routes.py b/app/routes.py index bf2bb28..9a4d391 100644 --- a/app/routes.py +++ b/app/routes.py @@ -12,7 +12,8 @@ from .helper import find_one_or_404 main = Blueprint("main", __name__, url_prefix="/") -# FIXME : put the mongodb connector in the flask app object +# TODO : maybe put the mongodb connector in the flask app object +# ______________________________________________________________________________ # database connexion username = urllib.parse.quote_plus(dbadmin) password = urllib.parse.quote_plus(dbpassword) @@ -25,19 +26,44 @@ housecol = actesdb["house"] actecol = actesdb["acte"] helpers = actesdb["helpers"] #folium_map = actesdb["folium_map"] -# -helpers_dicts = helpers.find_one() -house_trigram = helpers_dicts["house_trigram"] -prince_bigram = helpers_dicts["prince_bigram"] +# ______________________________________________________________________________ +# storage extractor utilities + +def extract_princes_in_houses(): + """Extracts all princes from a house + by queries in the storage, (not by using csv metadatas) + + sample: + >>> extract_princes_in_houses['anjou'] + [{'prince_name': "Louis Ier d'Anjou", 'prince_code': 'lo_i'}, {'prince_name': "Louis III d'Anjou", 'prince_code': 'lo_iii'}, {'prince_name': 'Isabelle de Lorraine', 'prince_code': 'isa_i'}, {'prince_name': "Louis II d'Anjou", 'prince_code': 'lo_ii'}, {'prince_name': 'Marie de Blois', 'prince_code': 'mar_i'}, {'prince_name': "René d'Anjou", 'prince_code': 're_i'}, {'prince_name': "Yollande d'Aragon", 'prince_code': 'yol_i'}] + """ + princes_in_houses = dict() + for house in housecol.find(): + housename = house['name'].lower() + query = list(actecol.aggregate([{"$match": {"house": housename}}, {'$group': {'_id': {'prince_name': '$prince_name', 'prince_code': '$prince_code'}}}])) + princes_in_houses[housename] = [pr['_id'] for pr in query] + return princes_in_houses + def normalize_trigrams(trigram): - """noramlizes names like Alençon -> Alencon + """normalizes names, usefull for the uris routes - or Orléans -> Orleans for the url routes to be clean + sample: Alençon -> Alencon + Orléans -> Orleans """ return {unidecode(value):key for key, value in trigram.items()} +# ______________________________________________________________________________ +# in memory storage extracted meta informations on the database +# TODO: if it takes too much time at launch, put it in something like +# a `flask init` procedure + +helpers_dicts = helpers.find_one() +house_trigram = helpers_dicts["house_trigram"] +prince_bigram = helpers_dicts["prince_bigram"] + +princes_in_houses = extract_princes_in_houses() # normalized_trigrams house_trigram = normalize_trigrams(house_trigram) prince_bigram = normalize_trigrams(prince_bigram) @@ -46,9 +72,6 @@ def bigram_prince(prince): "Translates Charles_i -> ch_i" name, number = prince.split("_") return prince_bigram[name] + "_" + number - -def trigram_house(house): - return house_trigram[house] # TODO: write tests in the datascience project `datascience/tests` #print(bigram_prince("Agnes")) @@ -57,10 +80,17 @@ def trigram_house(house): #print(trigram_house("Anjou")) #print(trigram_house("Orleans")) +def trigram_house(house): + return house_trigram[house] + + def make_acteid_from_route(house=None, prince=None, date_and_item=None): "/acte/Anjou/Isabelle_i/1441_08_05a -> anj_isa_i_1441_08_05a" return "_".join([trigram_house(house), bigram_prince(prince), date_and_item]) +# ______________________________________________________________________________ +# routes + @main.route("/") def home(): """home route""" @@ -87,45 +117,59 @@ def corpora_all(): @main.route("/actes/") # dont put a slash at the end def actes(house): """actes route - + shows the princes in the selected house - - :param: the house in the url is the house name + + :param: the house in the url is the house name with a capital letter at the beginning example: `house = "Berry"` """ - #for code in actecol.distinct('prince_code'): - # print(code) # house in the store shall be in lower case, but let's force it, just in case house = house.lower() - # [(prince_name, prince_code)] means for example: - # [("Charles Ier de Bourbon", "Ch_i"), ("Agnès de Bourgogne", "Agn_i")] - princes = [] - for act in actecol.find({"house":house}): - prince_name = act['prince_name'].capitalize() - prince_code = act['prince_code'].capitalize() - if (prince_name, prince_code) not in princes: - princes.append((prince_name, prince_code)) + # the nosql query below is equivalent to this code, which is more readable but slower: + #princes = [] + #for act in actecol.find({"house":house}): + # prince_name = act['prince_name'].capitalize() + # prince_code = act['prince_code'].capitalize() + # if (prince_name, prince_code) not in princes: + # princes.append((prince_name, prince_code)) + + # [('Louis II de Bourbon', 'lo_ii'), ('Anne Dauphine', 'ann_i'), ('Agnès de Bourgogne', 'agn_i'), ('Charles Ier de Bourbon', 'ch_i')] + #princes = princes_in_houses[house] + #princes = [(prc['prince_name'], prc['prince_code']) for prc in princes] + # [('Agnès de Bourgogne', 'agn_i'), ('Anne Dauphine', 'ann_i'), ('Charles Ier de Bourbon', 'ch_i'), ('Louis II de Bourbon', 'lo_ii') return render_template("corpus.html", house=house.capitalize(), princes=princes) - + @main.route("/actes//") # don't put a slash at the end def prince_corpus(house=None, prince=None): """copora prince, **timeline view**""" - # FIXME - return "FIXME" # make_acteid_from_route(house, prince) + house = house.lower() + # prince bigram -> prince_code + # sample uri: /actes/Anjou/lo_i -> Louis_i -> Louis Ier d'Anjou + prc_big, prc_num = prince.split("_") + prince_code = prince_bigram[prc_big] + "_" + prc_num +# for item in prince_acte: +# print("\n\n", item) +#info = [(t.date_time, t.date, t.filename, t.analysis, t.prod_place_acte, +# t.diplo_type_acte, t.state_doc) +# ['1418-12-20', '1418, 20 décembre', 'anj_yo_i_1418_12_20a', "Donation à Antoine de la Salle d'une maison à Arles", "Château d'Anger", 'Lettres patentes', 'Copie'] +# ['1421-06-28', '1421, 28 juin', 'anj_yo_i_1421_06_28a', "Confirmation par Yolande, duchesse d'Anjou, du douaire assigné à sa belle-fille, Isabelle de Lorraine", 'NS', 'Lettres patentes', 'Original'] +# ['1442-02-24', '1442 (n. st.), 24 février', 'anj_yo_i_1442_02_24a', 'Pierre Throvan, secrétaire de la reine, nommé trésorier général de Provence et de Languedoc du 16 juillet au 31 octobre 1441, puis pendant trois ans à partir du 1er novembre 1441', 'Château de Saumur', 'Lettres patentes', 'Copie'] + return render_template("prince_corpus.html", houseS=house, duke_name=prince_name, + lst_id=prince_acte) @main.route("/acte///") # don't put a slash at the end def acte(house=None, prince=None, dateitem=None): - """specific prince's acte view + """specific prince's acte view - :params: - house + :params: - house - prince - date + item (sample: 1441_08_05a) :url location sample: /acte/Anjou/Isabelle_i/1441_08_05a - + url transcription samples: /acte/Anjou/Isabelle_i/1441_08_05a -> anj_isa_i_1441_08_05a /acte/Bourbon/Anne_i/1388_09_15a -> brb_ann_i_1388_09_15a @@ -157,7 +201,7 @@ def geoloc(): #geolocalisation = folium_map.find_one() #geolocalisation = geolocalisation['globalmap'] #return render_template("map.html", geolocalisation=geolocalisation) - + @main.route("/contact") def contact() -> t.Text: """Displays the Contact page"""