diff --git a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py index 4574076..a8eae48 100755 --- a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py +++ b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py @@ -43,6 +43,7 @@ class Acte(Document): filename = StringField(required=True, max_length=100) ref_acte = StringField(required=True, max_length=100) xmlcontent = StringField(required=True) # no max_length + place = DictField() def db_connect(storage_ip, db_name, mongodb_admin, mongodb_password): diff --git a/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py b/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py index b421b1f..5c18f1d 100755 --- a/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py +++ b/actes-princiers/src/actes_princiers/pipelines/xml_processing/nodes.py @@ -63,6 +63,7 @@ def make_json_collection(datasetcol: BsXMLDataSetCollection) -> JSONDataSetColle output_datasets.datasets[dataset_filenamestem] = output_xmldataset return output_datasets + def add_xmlcontent_tojson(jsondoc: JSONDataSetCollection, xmlcontent: TextDataSetCollection) -> JSONDataSetCollection: "adds xmlcontent to the json" jsondatasets = jsondoc.datasets @@ -79,6 +80,10 @@ def add_xmlcontent_tojson(jsondoc: JSONDataSetCollection, xmlcontent: TextDataSe xmlds = xmldatasets[dataset_filenamestem] # xmlds._load() document['xmlcontent'] = xmldatasets[dataset_filenamestem]._load() + if document['place']['latitude'] is not None: + document['folium'] = "FIXME" + else: + document['folium'] = None else: raise KeyError(f"xmlcontent datasets does not have the key : {dataset_filenamestem}") # let's create subfolders, if they don't exist diff --git a/actes-princiers/src/actesdataset.py b/actes-princiers/src/actesdataset.py index c1587ec..05a10c4 100644 --- a/actes-princiers/src/actesdataset.py +++ b/actes-princiers/src/actesdataset.py @@ -127,10 +127,40 @@ class BsXMLDataSet(XMLDataSet): #type_diplo = self.soup.body.div["subtype"] #diplo_state = self.soup.body.div["type"] - # FIXME: location -> geolocallisation - #place = self.soup.find_all("placeName", {"type": "production_place"})[0].text - #if len(place != "NS":   - # place = place[0].text + # geolocalisation + place = self.soup.find("place") + place_name = place.find("placeName") + if place_name.get_text() != "NS": + pl_name = place_name.get_text() + else: + pl_name = "Non spécifié" + + region_balise = place.find("region") + if region_balise is not None: + region = region_balise.get_text() + else: + region = "Non spécifié" + + settlement = place.find("settlement") + if settlement is not None: + settlement = settlement.get_text() + else: + settlement = "Non spécifié" + + geolocalisation = place.find("geo") + if geolocalisation is not None: + geolocalisation = geolocalisation.get_text() + latitude, longitude = geolocalisation.split(" ") + else: + latitude = None + longitude = None + + place = dict(name=pl_name, + region=region, + settlement=settlement, + latitude = latitude, + longitude = longitude + ) return { # "num_acte": counter, "prince_name": self.find_prince_name(), @@ -141,7 +171,8 @@ class BsXMLDataSet(XMLDataSet): "analysis": analyse, # "doc_acte": doc_query[0], "ref_acte": ref_acte, - "transcribers": self.find_transcribers() + "transcribers": self.find_transcribers(), + "place": place # "state_doc": state_query[0], # "diplo_type_acte": diplo_query[0] } @@ -254,18 +285,16 @@ class TextDataSetCollection(DataSetCollection): filepath=str(filepath)) return self -class FoliumHTMLDataSet(AbstractDataSet): - def __init__(self, filepath: str): - self._filepath = filepath - - def _load(self) -> None: - raise DataSetError('This dataset is WriteOnly') - - def _describe(self) -> Dict[str, Any]: - return dict(filepath=self._filepath) - - #def _save(self, data: Map) -> None: - def _save(self, data) -> None: - # FIXME - data.save(self._filepath) +#class FoliumHTMLDataSet(AbstractDataSet): +# def __init__(self, filepath: str): +# self._filepath = filepath +# +# def _load(self) -> None: +# raise DataSetError('This dataset is WriteOnly') +# +# def _describe(self) -> Dict[str, Any]: +# return dict(filepath=self._filepath) +# +# def _save(self, data: Map) -> None: +# data.save(self._filepath)