add datetime type

develop
gwen 2 years ago
parent a385966dee
commit 8b5b3d83c8

@ -5,7 +5,7 @@ from pathlib import Path
from typing import Dict from typing import Dict
from mongoengine import connect from mongoengine import connect
from mongoengine import Document, StringField, DictField, ListField from mongoengine import Document, StringField, DictField, ListField, DateTimeField
#import folium #import folium
from kedro.framework.session import KedroSession from kedro.framework.session import KedroSession
@ -45,8 +45,7 @@ class Acte(Document):
analysis = StringField(required=True, max_length=3000) analysis = StringField(required=True, max_length=3000)
date = StringField(required=True, max_length=250) date = StringField(required=True, max_length=250)
transcribers = ListField(required=True) transcribers = ListField(required=True)
# FIXME date_teim type shal it be a **real** date object ? date_time = DateTimeField(required=True)
date_time = StringField(required=True, max_length=15)
filename = StringField(required=True, max_length=100) filename = StringField(required=True, max_length=100)
ref_acte = StringField(required=True, max_length=100) ref_acte = StringField(required=True, max_length=100)
xmlcontent = StringField(required=True) # no max_length xmlcontent = StringField(required=True) # no max_length

@ -122,16 +122,29 @@ class BsXMLDataSet(XMLDataSet):
#soup = make_soup(os.path.join(folder, acte)) #soup = make_soup(os.path.join(folder, acte))
# 1.1/ Get all data from XML (9). counter is the id (= numb_acte) # 1.1/ Get all data from XML (9). counter is the id (= numb_acte)
numb = self.soup.TEI["xml:id"] # /TEI[@xml:id] is always the acte's ID numb = self.soup.TEI["xml:id"] # /TEI[@xml:id] is always the acte's ID
date_time = self.soup.msItem.docDate["when"] # YYYY-MM-DD or YYYY-MM date # date formats : YYYY-MM-DD, YYYY-MM or just YYYY
#from datetime import datetime date_time = self.soup.msItem.docDate["when"]
#if not len(date_time.split('-')) == 2:
# # time format '%Y-%m-%d' # datetime parsing
# isotime = datetime.strptime(date_time,'%Y-%m-%d') from datetime import datetime
# date_time = isotime.isoformat() if len(date_time.split('-')) == 1:
#else: # time format 'YYYY'
# # time format '%Y-%m' isotime = datetime.strptime(date_time,'%Y')
# isotime = datetime.strptime(date_time,'%Y-%m') date_time = isotime.isoformat()
# date_time = isotime.isoformat()
elif len(date_time.split('-')) == 2:
# time format '%Y-%m'
isotime = datetime.strptime(date_time,'%Y-%m')
date_time = isotime.isoformat()
elif len(date_time.split('-')) == 3:
# time format '%Y-%m-%d'
isotime = datetime.strptime(date_time,'%Y-%m-%d')
date_time = isotime.isoformat()
else:
# FIXME raise exception
pass
date = self.soup.msItem.docDate.text # verbose date date = self.soup.msItem.docDate.text # verbose date
analyse = self.soup.abstract.p.text # acte's short analysis analyse = self.soup.abstract.p.text # acte's short analysis
ref = self.soup.msIdentifier.find_all("idno", {"n": "2"}) ref = self.soup.msIdentifier.find_all("idno", {"n": "2"})

Loading…
Cancel
Save