add anjou and berry in mongodb

develop
gwen 3 years ago
parent 0bf238caf7
commit 208d22053c

@ -41,10 +41,11 @@ class Acte(Document):
_id = StringField(required=True, max_length=150) _id = StringField(required=True, max_length=150)
house = StringField(required=True, max_length=100) house = StringField(required=True, max_length=100)
prince_name = StringField(required=True, max_length=150) prince_name = StringField(required=True, max_length=150)
prince_code = StringField(required=True, max_length=100)
analysis = StringField(required=True, max_length=3000) analysis = StringField(required=True, max_length=3000)
date = StringField(required=True, max_length=250) date = StringField(required=True, max_length=250)
transcribers = ListField(required=True) transcribers = ListField(required=True)
# FIXME type it as a **real** date object ? # FIXME date_teim type shal it be a **real** date object ?
date_time = StringField(required=True, max_length=15) date_time = StringField(required=True, max_length=15)
filename = StringField(required=True, max_length=100) filename = StringField(required=True, max_length=100)
ref_acte = StringField(required=True, max_length=100) ref_acte = StringField(required=True, max_length=100)
@ -113,7 +114,6 @@ def load_houses(yamldoc: YAMLDataSet, storage_ip: str, db_name: str, mongodb_adm
for house_dict in yamldoc['houses'].values(): for house_dict in yamldoc['houses'].values():
house_dict['_id'] = house_dict['name'] house_dict['_id'] = house_dict['name']
#logger.info("inserting: " + str(house_dict))
#houses_col.insert_one(house_dict) #houses_col.insert_one(house_dict)
house_entry = House(**house_dict) house_entry = House(**house_dict)
house_entry.save() house_entry.save()

@ -15,6 +15,24 @@ def create_pipeline(**kwargs) -> Pipeline:
name="populate_actes", name="populate_actes",
tags="populate_database", tags="populate_database",
), ),
node(
func=populate_mongo,
inputs=["berry_fulljsonoutput", "params:storage_ip", "params:db_name",
"params:db_collection_name", "params:mongodb_admin",
"params:mongodb_password"],
outputs=None,
name="populate_actes",
tags="populate_database",
),
node(
func=populate_mongo,
inputs=["anjou_fulljsonoutput", "params:storage_ip", "params:db_name",
"params:db_collection_name", "params:mongodb_admin",
"params:mongodb_password"],
outputs=None,
name="populate_actes",
tags="populate_database",
),
node( node(
func=load_houses, func=load_houses,
inputs=["houses", "params:storage_ip", "params:db_name", inputs=["houses", "params:storage_ip", "params:db_name",
@ -32,8 +50,6 @@ def create_pipeline(**kwargs) -> Pipeline:
name="populate_helpers", name="populate_helpers",
tags="populate_database", tags="populate_database",
) )
] ]
) )

@ -69,11 +69,10 @@ def _make_map(latitude, longitude, popup):
width=800, width=800,
height=600, height=600,
) )
folium.Marker( folium.Marker(
location=[latitude, longitude], location=[latitude, longitude],
popup=popup, popup=popup,
icon=folium.Icon(icon="cloud"), icon=folium.Icon(color='lightgray', icon="circle", prefix='fa')
).add_to(m) ).add_to(m)
return m.get_root()._repr_html_() return m.get_root()._repr_html_()

@ -104,6 +104,20 @@ class BsXMLDataSet(XMLDataSet):
prince_name = ps.get_text() prince_name = ps.get_text()
return prince_name return prince_name
def make_prince_code_from_filestem(self, filestem):
"""
builds prince code
:param: filestem
sample: "anj_isa_i_1441_08_05a"
:return: prince code, sample: "isa_i"
"""
# cut with the underscores
cut = filestem.split('_')
# remove house and date
prince_code = "_".join(cut[1:3])
return prince_code
def transform(self): def transform(self):
#soup = make_soup(os.path.join(folder, acte)) #soup = make_soup(os.path.join(folder, acte))
# 1.1/ Get all data from XML (9). counter is the id (= numb_acte) # 1.1/ Get all data from XML (9). counter is the id (= numb_acte)
@ -160,9 +174,11 @@ class BsXMLDataSet(XMLDataSet):
latitude = latitude, latitude = latitude,
longitude = longitude longitude = longitude
) )
return { return {
# "num_acte": counter, # "num_acte": counter,
"prince_name": self.find_prince_name(), "prince_name": self.find_prince_name(),
"prince_code": self.make_prince_code_from_filestem(numb),
"filename": numb, "filename": numb,
"date_time": date_time, "date_time": date_time,
"date": date, "date": date,

Loading…
Cancel
Save