|
|
|
|
@ -127,10 +127,40 @@ class BsXMLDataSet(XMLDataSet):
|
|
|
|
|
#type_diplo = self.soup.body.div["subtype"]
|
|
|
|
|
#diplo_state = self.soup.body.div["type"]
|
|
|
|
|
|
|
|
|
|
# FIXME: location -> geolocallisation
|
|
|
|
|
#place = self.soup.find_all("placeName", {"type": "production_place"})[0].text
|
|
|
|
|
#if len(place != "NS":
|
|
|
|
|
# place = place[0].text
|
|
|
|
|
# geolocalisation
|
|
|
|
|
place = self.soup.find("place")
|
|
|
|
|
place_name = place.find("placeName")
|
|
|
|
|
if place_name.get_text() != "NS":
|
|
|
|
|
pl_name = place_name.get_text()
|
|
|
|
|
else:
|
|
|
|
|
pl_name = "Non spécifié"
|
|
|
|
|
|
|
|
|
|
region_balise = place.find("region")
|
|
|
|
|
if region_balise is not None:
|
|
|
|
|
region = region_balise.get_text()
|
|
|
|
|
else:
|
|
|
|
|
region = "Non spécifié"
|
|
|
|
|
|
|
|
|
|
settlement = place.find("settlement")
|
|
|
|
|
if settlement is not None:
|
|
|
|
|
settlement = settlement.get_text()
|
|
|
|
|
else:
|
|
|
|
|
settlement = "Non spécifié"
|
|
|
|
|
|
|
|
|
|
geolocalisation = place.find("geo")
|
|
|
|
|
if geolocalisation is not None:
|
|
|
|
|
geolocalisation = geolocalisation.get_text()
|
|
|
|
|
latitude, longitude = geolocalisation.split(" ")
|
|
|
|
|
else:
|
|
|
|
|
latitude = None
|
|
|
|
|
longitude = None
|
|
|
|
|
|
|
|
|
|
place = dict(name=pl_name,
|
|
|
|
|
region=region,
|
|
|
|
|
settlement=settlement,
|
|
|
|
|
latitude = latitude,
|
|
|
|
|
longitude = longitude
|
|
|
|
|
)
|
|
|
|
|
return {
|
|
|
|
|
# "num_acte": counter,
|
|
|
|
|
"prince_name": self.find_prince_name(),
|
|
|
|
|
@ -141,7 +171,8 @@ class BsXMLDataSet(XMLDataSet):
|
|
|
|
|
"analysis": analyse,
|
|
|
|
|
# "doc_acte": doc_query[0],
|
|
|
|
|
"ref_acte": ref_acte,
|
|
|
|
|
"transcribers": self.find_transcribers()
|
|
|
|
|
"transcribers": self.find_transcribers(),
|
|
|
|
|
"place": place
|
|
|
|
|
# "state_doc": state_query[0],
|
|
|
|
|
# "diplo_type_acte": diplo_query[0]
|
|
|
|
|
}
|
|
|
|
|
@ -254,18 +285,16 @@ class TextDataSetCollection(DataSetCollection):
|
|
|
|
|
filepath=str(filepath))
|
|
|
|
|
return self
|
|
|
|
|
|
|
|
|
|
class FoliumHTMLDataSet(AbstractDataSet):
|
|
|
|
|
def __init__(self, filepath: str):
|
|
|
|
|
self._filepath = filepath
|
|
|
|
|
|
|
|
|
|
def _load(self) -> None:
|
|
|
|
|
raise DataSetError('This dataset is WriteOnly')
|
|
|
|
|
|
|
|
|
|
def _describe(self) -> Dict[str, Any]:
|
|
|
|
|
return dict(filepath=self._filepath)
|
|
|
|
|
|
|
|
|
|
#class FoliumHTMLDataSet(AbstractDataSet):
|
|
|
|
|
# def __init__(self, filepath: str):
|
|
|
|
|
# self._filepath = filepath
|
|
|
|
|
#
|
|
|
|
|
# def _load(self) -> None:
|
|
|
|
|
# raise DataSetError('This dataset is WriteOnly')
|
|
|
|
|
#
|
|
|
|
|
# def _describe(self) -> Dict[str, Any]:
|
|
|
|
|
# return dict(filepath=self._filepath)
|
|
|
|
|
#
|
|
|
|
|
# def _save(self, data: Map) -> None:
|
|
|
|
|
def _save(self, data) -> None:
|
|
|
|
|
# FIXME
|
|
|
|
|
data.save(self._filepath)
|
|
|
|
|
# data.save(self._filepath)
|
|
|
|
|
|
|
|
|
|
|