From eae34ab0a884f0f3ade9e88e38ace8b1a9fa7baf Mon Sep 17 00:00:00 2001 From: gwen Date: Mon, 2 Oct 2023 16:09:07 +0200 Subject: [PATCH] add image in acte --- .../src/actes_princiers/pipelines/populate_mongo/nodes.py | 2 +- actes-princiers/src/actesdataset.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py index 1ef67f2..619120a 100755 --- a/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py +++ b/actes-princiers/src/actes_princiers/pipelines/populate_mongo/nodes.py @@ -53,7 +53,7 @@ class Acte(Document): folium = StringField(required=False) # no max_length diplo_state = StringField(required=False) # sample: "diplo_state": "Original", diplo_type = StringField(required=False) # sample: "diplo_type": "Lettres_patentes", - + image = StringField(required=False) def db_connect(storage_ip, db_name, mongodb_admin, mongodb_password): #mongodb://%s:%s@149.202.41.75:27017' % (username, password) diff --git a/actes-princiers/src/actesdataset.py b/actes-princiers/src/actesdataset.py index 5fb9f6d..357be1d 100644 --- a/actes-princiers/src/actesdataset.py +++ b/actes-princiers/src/actesdataset.py @@ -198,6 +198,10 @@ class BsXMLDataSet(XMLDataSet): latitude = latitude, longitude = longitude ) + # nakala + image = self.soup.find("graphic") + if image is not None: + image = image.get('url') return { # "num_acte": counter, "prince_name": self.find_prince_name(), @@ -212,7 +216,8 @@ class BsXMLDataSet(XMLDataSet): "transcribers": self.find_transcribers(), "place": place, "diplo_type": type_diplo, - "diplo_state": diplo_state + "diplo_state": diplo_state, + "image": image # "state_doc": state_query[0], # "diplo_type_acte": diplo_query[0] }