|
|
|
|
@ -92,6 +92,15 @@ class BsXMLDataSet(XMLDataSet):
|
|
|
|
|
date = self.soup.msItem.docDate.text # verbose date
|
|
|
|
|
analyse = self.soup.abstract.p.text # acte's short analysis
|
|
|
|
|
ref = self.soup.msIdentifier.find_all("idno", {"n": "2"})
|
|
|
|
|
|
|
|
|
|
#prince_name = tree.xpath('//listPerson[@type="prince"]/person/name/text()')
|
|
|
|
|
# XXX ugly : I HATE BEAUTIFULL SOUP
|
|
|
|
|
persons = self.soup.find_all("listPerson")
|
|
|
|
|
for pers in persons:
|
|
|
|
|
if pers.attrs.get('type') == "prince":
|
|
|
|
|
ps = pers.find_next()
|
|
|
|
|
ps_name = pers.find_next()
|
|
|
|
|
prince_name = ps_name.get_text()
|
|
|
|
|
# //sourceDesc//msIdentifier/idno[@n='2'] is the doc id inside the
|
|
|
|
|
# archive box or the page number inside a manuscript (see _create_doc)
|
|
|
|
|
# warning: the analysis may not have been written yet,
|
|
|
|
|
@ -100,6 +109,7 @@ class BsXMLDataSet(XMLDataSet):
|
|
|
|
|
ref_acte = ref[0].text
|
|
|
|
|
else: # there is no analysis
|
|
|
|
|
ref_acte = "NS"
|
|
|
|
|
# FIXME: use this location -> geolocallisation
|
|
|
|
|
# prod_place = self.soup.find_all("placeName", {"type": "production_place"})[0].text
|
|
|
|
|
# //sourceDesc//msIdentifier/idno[@n='1'] is always the
|
|
|
|
|
# archive box or manuscript collection id
|
|
|
|
|
@ -109,6 +119,7 @@ class BsXMLDataSet(XMLDataSet):
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
# "num_acte": counter,
|
|
|
|
|
"prince_name": prince_name,
|
|
|
|
|
"filename": numb,
|
|
|
|
|
"date_time": date_time,
|
|
|
|
|
"date": date,
|
|
|
|
|
|