add word extracts in the search results

3 years ago · 7a6a911d30
parent 07712ad544
commit 7a6a911d30
3 changed files with 24 additions and 6 deletions
--- a/app/helper.py
+++ b/app/helper.py
@ -1,10 +1,11 @@
-"""helper functions (is it really usefull?)
+"""helper functions
 TODO: maybe all these calculations are to be put in the db storage
 """
 from bs4 import BeautifulSoup
 from .dbinit import inverted_prince_bigram
 def make_timeitem_from_filename(filename):
    """
    "anj_isa_i_1441_08_05a" -> "1441_08_05a"
@ -35,21 +36,36 @@ def plaintext_response(search, actecol, prince_bigram):
                            'filename': '$filename',
                            "place": "$place.name",
                            "diplo_state": "$diplo_state",
-                            "diplo_type": "$diplo_type"
+                            "diplo_type": "$diplo_type",
                            "xmlcontent": "$xmlcontent"
                            }
                    }
        }]
    results = list(actecol.aggregate(query))
    transformed_query = [pr['_id'] for pr in results]
    invert_prince_bigram = {val: key for key, val in prince_bigram.items()}
    # constructing the dateitem
    for trs in transformed_query:
        trs['house'] = trs['house'].capitalize()
        trs['dateitem'] = make_timeitem_from_filename(trs['filename'])
        bigram, number = trs['prince_code'].split('_')
        long_prince_bigram = inverted_prince_bigram(bigram) + '_' + number
        trs['prince_url'] = prince_name=long_prince_bigram.capitalize()
        # find the word in the text
        cleantext = BeautifulSoup(trs["xmlcontent"], "lxml").text
        cleantext = cleantext.lower()
        searchtext = search.lower()
        begin = cleantext.find(searchtext)
        #end = cleantext.find(searchtext, begin, len(cleantext))
        strframe = 35
        begin_mark = begin - strframe
        if begin_mark < 0:
            begin_mark = 0
        end_mark = begin + len(searchtext)  + strframe
        trs['show_text'] = cleantext[begin_mark:end_mark]
    return transformed_query
--- a/app/templates/plainsearch.html
+++ b/app/templates/plainsearch.html
@ -6,7 +6,7 @@
 <h1>Résulat de la recherche</h1>
-<h2>Résulat de la recherche sur "<b>{{ search }}</b>" : </h2>
+<h2>{{ actes | count }} résulats pour la recherche sur "<b>{{ search }}</b>" :</h2>
        {% for acte in actes %}
            <div>
@ -14,7 +14,7 @@
                    <h3></h3>
                </div>
                <div class="pboly">
-                    <p class="chrono-p font-weight-bold"><a href="{{url_for('main.acte', house=acte['house'], prince=acte['prince_url'], dateitem=acte['dateitem'])}}">{{ acte['prince_name'] }}, {{ acte['date'] }}.</a></p>
+                    <p class="chrono-p font-weight-bold"><a href="{{url_for('main.acte', house=acte['house'], prince=acte['prince_url'], dateitem=acte['dateitem'])}}">{{ acte['prince_name'] }}, {{ acte['date'] }}, extrait : "... {{ acte['show_text'] }} ..."</a></p>
                    <p style="margin-bottom: 5%">
                        {% if acte['place'] != 'NS' %}
                            <span class="badge badge-pill badge" style="background-color: #284AA7; color: white; font-size: small;">{{ acte['place'] }}</span>
--- a/requirements.txt
+++ b/requirements.txt
@ -22,3 +22,5 @@ zipp==3.9.0
 pyyaml==6.0.1
 Unidecode==1.3.6
 folium==0.14.0
 beautifulsoup4==4.12.2
 lxml==4.9.3