You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

96 lines
3.0 KiB
Python

"""helper functions
"""
from bs4 import BeautifulSoup
from .dbinit import inverted_prince_bigram
def make_timeitem_from_filename(filename):
"""
"anj_isa_i_1441_08_05a" -> "1441_08_05a"
"""
trs_fname = filename.split('_')
return "_".join(trs_fname[3:])
def plaintext_response(search, actecol, prince_bigram):
"""plain text search response
:search: the form's result
:return: database query
"""
query = [
{
"$match": {
"xmlcontent": {
"$regex": search,
"$options" :'i' # case-insensitive
}
}
},
{
'$group': {'_id': {'prince_name': '$prince_name',
'house': '$house',
'date': '$date',
'prince_code': '$prince_code',
'filename': '$filename',
"place": "$place.name",
"diplo_state": "$diplo_state",
"diplo_type": "$diplo_type",
"xmlcontent": "$xmlcontent"
}
}
}]
results = list(actecol.aggregate(query))
transformed_query = [pr['_id'] for pr in results]
invert_prince_bigram = {val: key for key, val in prince_bigram.items()}
for trs in transformed_query:
trs['house'] = trs['house'].capitalize()
trs['dateitem'] = make_timeitem_from_filename(trs['filename'])
bigram, number = trs['prince_code'].split('_')
long_prince_bigram = inverted_prince_bigram(bigram) + '_' + number
trs['prince_url'] = prince_name=long_prince_bigram.capitalize()
# find the word in the text
cleantext = BeautifulSoup(trs["xmlcontent"], "lxml").text
cleantext = cleantext.lower()
searchtext = search.lower()
begin = cleantext.find(searchtext)
#end = cleantext.find(searchtext, begin, len(cleantext))
strframe = 35
begin_mark = begin - strframe
if begin_mark < 0:
begin_mark = 0
end_mark = begin + len(searchtext) + strframe
trs['show_text'] = cleantext[begin_mark:end_mark]
return transformed_query
#from flask import abort
#
#def find_one_or_404(collection, **kwargs):
# """Find a single document or raise a 404.
#
# This is like :meth:`~pymongo.collection.Collection.find_one`, but
# rather than returning ``None``, cause a 404 Not Found HTTP status
# on the request.
#
# .. code-block:: python
#
# usercollection = mydb['usercollection']
#
# @app.route("/user/<username>")
# def user_profile(username):
# userfound = find_one_or_404(usercollection, {"_id": username})
# return render_template("user.html",
# user=userfound)
# """
# found = collection.find_one(**kwargs)
# if found is None:
# abort(404)
# return found
2 years ago