add anjou and berry in mongodb

develop
gwen 2 years ago
parent 0bf238caf7
commit 208d22053c

@ -41,10 +41,11 @@ class Acte(Document):
_id = StringField(required=True, max_length=150)
house = StringField(required=True, max_length=100)
prince_name = StringField(required=True, max_length=150)
prince_code = StringField(required=True, max_length=100)
analysis = StringField(required=True, max_length=3000)
date = StringField(required=True, max_length=250)
transcribers = ListField(required=True)
# FIXME type it as a **real** date object ?
# FIXME date_teim type shal it be a **real** date object ?
date_time = StringField(required=True, max_length=15)
filename = StringField(required=True, max_length=100)
ref_acte = StringField(required=True, max_length=100)
@ -113,7 +114,6 @@ def load_houses(yamldoc: YAMLDataSet, storage_ip: str, db_name: str, mongodb_adm
for house_dict in yamldoc['houses'].values():
house_dict['_id'] = house_dict['name']
#logger.info("inserting: " + str(house_dict))
#houses_col.insert_one(house_dict)
house_entry = House(**house_dict)
house_entry.save()

@ -15,6 +15,24 @@ def create_pipeline(**kwargs) -> Pipeline:
name="populate_actes",
tags="populate_database",
),
node(
func=populate_mongo,
inputs=["berry_fulljsonoutput", "params:storage_ip", "params:db_name",
"params:db_collection_name", "params:mongodb_admin",
"params:mongodb_password"],
outputs=None,
name="populate_actes",
tags="populate_database",
),
node(
func=populate_mongo,
inputs=["anjou_fulljsonoutput", "params:storage_ip", "params:db_name",
"params:db_collection_name", "params:mongodb_admin",
"params:mongodb_password"],
outputs=None,
name="populate_actes",
tags="populate_database",
),
node(
func=load_houses,
inputs=["houses", "params:storage_ip", "params:db_name",
@ -32,8 +50,6 @@ def create_pipeline(**kwargs) -> Pipeline:
name="populate_helpers",
tags="populate_database",
)
]
)

@ -69,11 +69,10 @@ def _make_map(latitude, longitude, popup):
width=800,
height=600,
)
folium.Marker(
location=[latitude, longitude],
popup=popup,
icon=folium.Icon(icon="cloud"),
icon=folium.Icon(color='lightgray', icon="circle", prefix='fa')
).add_to(m)
return m.get_root()._repr_html_()

@ -104,6 +104,20 @@ class BsXMLDataSet(XMLDataSet):
prince_name = ps.get_text()
return prince_name
def make_prince_code_from_filestem(self, filestem):
"""
builds prince code
:param: filestem
sample: "anj_isa_i_1441_08_05a"
:return: prince code, sample: "isa_i"
"""
# cut with the underscores
cut = filestem.split('_')
# remove house and date
prince_code = "_".join(cut[1:3])
return prince_code
def transform(self):
#soup = make_soup(os.path.join(folder, acte))
# 1.1/ Get all data from XML (9). counter is the id (= numb_acte)
@ -160,9 +174,11 @@ class BsXMLDataSet(XMLDataSet):
latitude = latitude,
longitude = longitude
)
return {
# "num_acte": counter,
"prince_name": self.find_prince_name(),
"prince_code": self.make_prince_code_from_filestem(numb),
"filename": numb,
"date_time": date_time,
"date": date,

Loading…
Cancel
Save