add word extracts in the search results

develop
gwen 2 years ago
parent 07712ad544
commit 7a6a911d30

@ -1,10 +1,11 @@
"""helper functions (is it really usefull?)
"""helper functions
TODO: maybe all these calculations are to be put in the db storage
"""
from bs4 import BeautifulSoup
from .dbinit import inverted_prince_bigram
def make_timeitem_from_filename(filename):
"""
"anj_isa_i_1441_08_05a" -> "1441_08_05a"
@ -35,21 +36,36 @@ def plaintext_response(search, actecol, prince_bigram):
'filename': '$filename',
"place": "$place.name",
"diplo_state": "$diplo_state",
"diplo_type": "$diplo_type"
"diplo_type": "$diplo_type",
"xmlcontent": "$xmlcontent"
}
}
}]
results = list(actecol.aggregate(query))
transformed_query = [pr['_id'] for pr in results]
invert_prince_bigram = {val: key for key, val in prince_bigram.items()}
# constructing the dateitem
for trs in transformed_query:
trs['house'] = trs['house'].capitalize()
trs['dateitem'] = make_timeitem_from_filename(trs['filename'])
bigram, number = trs['prince_code'].split('_')
long_prince_bigram = inverted_prince_bigram(bigram) + '_' + number
trs['prince_url'] = prince_name=long_prince_bigram.capitalize()
# find the word in the text
cleantext = BeautifulSoup(trs["xmlcontent"], "lxml").text
cleantext = cleantext.lower()
searchtext = search.lower()
begin = cleantext.find(searchtext)
#end = cleantext.find(searchtext, begin, len(cleantext))
strframe = 35
begin_mark = begin - strframe
if begin_mark < 0:
begin_mark = 0
end_mark = begin + len(searchtext) + strframe
trs['show_text'] = cleantext[begin_mark:end_mark]
return transformed_query

@ -6,7 +6,7 @@
<h1>Résulat de la recherche</h1>
<h2>Résulat de la recherche sur "<b>{{ search }}</b>" : </h2>
<h2>{{ actes | count }} résulats pour la recherche sur "<b>{{ search }}</b>" :</h2>
{% for acte in actes %}
<div>
@ -14,7 +14,7 @@
<h3></h3>
</div>
<div class="pboly">
<p class="chrono-p font-weight-bold"><a href="{{url_for('main.acte', house=acte['house'], prince=acte['prince_url'], dateitem=acte['dateitem'])}}">{{ acte['prince_name'] }}, {{ acte['date'] }}.</a></p>
<p class="chrono-p font-weight-bold"><a href="{{url_for('main.acte', house=acte['house'], prince=acte['prince_url'], dateitem=acte['dateitem'])}}">{{ acte['prince_name'] }}, {{ acte['date'] }}, extrait : "... {{ acte['show_text'] }} ..."</a></p>
<p style="margin-bottom: 5%">
{% if acte['place'] != 'NS' %}
<span class="badge badge-pill badge" style="background-color: #284AA7; color: white; font-size: small;">{{ acte['place'] }}</span>

@ -22,3 +22,5 @@ zipp==3.9.0
pyyaml==6.0.1
Unidecode==1.3.6
folium==0.14.0
beautifulsoup4==4.12.2
lxml==4.9.3

Loading…
Cancel
Save