|
|
|
|
"""helper functions
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
|
|
from .dbinit import inverted_prince_bigram
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def make_timeitem_from_filename(filename):
|
|
|
|
|
"""
|
|
|
|
|
"anj_isa_i_1441_08_05a" -> "1441_08_05a"
|
|
|
|
|
"""
|
|
|
|
|
trs_fname = filename.split('_')
|
|
|
|
|
return "_".join(trs_fname[3:])
|
|
|
|
|
|
|
|
|
|
def plaintext_response(search, actecol, prince_bigram):
|
|
|
|
|
"""plain text search response
|
|
|
|
|
|
|
|
|
|
:search: the form's result
|
|
|
|
|
:return: database query
|
|
|
|
|
"""
|
|
|
|
|
query = [
|
|
|
|
|
{
|
|
|
|
|
"$match": {
|
|
|
|
|
"xmlcontent": {
|
|
|
|
|
"$regex": search,
|
|
|
|
|
"$options" :'i' # case-insensitive
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
'$group': {'_id': {'prince_name': '$prince_name',
|
|
|
|
|
'house': '$house',
|
|
|
|
|
'date': '$date',
|
|
|
|
|
'prince_code': '$prince_code',
|
|
|
|
|
'filename': '$filename',
|
|
|
|
|
"place": "$place.name",
|
|
|
|
|
"diplo_state": "$diplo_state",
|
|
|
|
|
"diplo_type": "$diplo_type",
|
|
|
|
|
"xmlcontent": "$xmlcontent"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}]
|
|
|
|
|
|
|
|
|
|
results = list(actecol.aggregate(query))
|
|
|
|
|
transformed_query = [pr['_id'] for pr in results]
|
|
|
|
|
invert_prince_bigram = {val: key for key, val in prince_bigram.items()}
|
|
|
|
|
|
|
|
|
|
for trs in transformed_query:
|
|
|
|
|
trs['house'] = trs['house'].capitalize()
|
|
|
|
|
trs['dateitem'] = make_timeitem_from_filename(trs['filename'])
|
|
|
|
|
bigram, number = trs['prince_code'].split('_')
|
|
|
|
|
long_prince_bigram = inverted_prince_bigram(bigram) + '_' + number
|
|
|
|
|
trs['prince_url'] = prince_name=long_prince_bigram.capitalize()
|
|
|
|
|
|
|
|
|
|
# find the word in the text
|
|
|
|
|
cleantext = BeautifulSoup(trs["xmlcontent"], "lxml").text
|
|
|
|
|
cleantext = cleantext.lower()
|
|
|
|
|
searchtext = search.lower()
|
|
|
|
|
begin = cleantext.find(searchtext)
|
|
|
|
|
#end = cleantext.find(searchtext, begin, len(cleantext))
|
|
|
|
|
strframe = 35
|
|
|
|
|
begin_mark = begin - strframe
|
|
|
|
|
if begin_mark < 0:
|
|
|
|
|
begin_mark = 0
|
|
|
|
|
end_mark = begin + len(searchtext) + strframe
|
|
|
|
|
trs['show_text'] = cleantext[begin_mark:end_mark]
|
|
|
|
|
|
|
|
|
|
return transformed_query
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#from flask import abort
|
|
|
|
|
#
|
|
|
|
|
#def find_one_or_404(collection, **kwargs):
|
|
|
|
|
# """Find a single document or raise a 404.
|
|
|
|
|
#
|
|
|
|
|
# This is like :meth:`~pymongo.collection.Collection.find_one`, but
|
|
|
|
|
# rather than returning ``None``, cause a 404 Not Found HTTP status
|
|
|
|
|
# on the request.
|
|
|
|
|
#
|
|
|
|
|
# .. code-block:: python
|
|
|
|
|
#
|
|
|
|
|
# usercollection = mydb['usercollection']
|
|
|
|
|
#
|
|
|
|
|
# @app.route("/user/<username>")
|
|
|
|
|
# def user_profile(username):
|
|
|
|
|
# userfound = find_one_or_404(usercollection, {"_id": username})
|
|
|
|
|
# return render_template("user.html",
|
|
|
|
|
# user=userfound)
|
|
|
|
|
# """
|
|
|
|
|
# found = collection.find_one(**kwargs)
|
|
|
|
|
# if found is None:
|
|
|
|
|
# abort(404)
|
|
|
|
|
# return found
|
|
|
|
|
|