refactoring

develop
gwen 2 years ago
parent 4abfc2ba01
commit f5090d799f

@ -100,12 +100,9 @@ class BsXMLDataSet(XMLDataSet):
prince_name = tree.xpath('//listPerson[@type="prince"]/person/name/text()')
"""
persons = self.soup.find_all("listPerson")
for pers in persons:
if pers.attrs.get('type') == "prince":
ps = pers.find('person')
ps_name = pers.find('name')
prince_name = ps_name.get_text()
person = self.soup.find("listPerson", {'type': "prince"} )
ps = person.find('name')
prince_name = ps.get_text()
return prince_name
def transform(self):
@ -116,23 +113,24 @@ class BsXMLDataSet(XMLDataSet):
date = self.soup.msItem.docDate.text # verbose date
analyse = self.soup.abstract.p.text # acte's short analysis
ref = self.soup.msIdentifier.find_all("idno", {"n": "2"})
# //sourceDesc//msIdentifier/idno[@n='2'] is the doc id inside the
# archive box or the page number inside a manuscript (see _create_doc)
# warning: the analysis may not have been written yet,
# which would result in List Index Out of Range Error. Hence :
if len(ref) > 0: # there is an analysis
ref_acte = ref[0].text
else: # there is no analysis
ref_acte = "NS"
# FIXME: use this location -> geolocallisation
# prod_place = self.soup.find_all("placeName", {"type": "production_place"})[0].text
# //sourceDesc//msIdentifier/idno[@n='2'] is the doc id inside the
# archive box or the page number inside a manuscript (see _create_doc)
# warning: the analysis may not have been written yet,
# which would result in List Index Out of Range Error. Hence:
# //sourceDesc//msIdentifier/idno[@n='1'] is always the
# archive box or manuscript collection id
# #doc = self.soup.msIdentifier.find_all("idno", {"n": "1"})[0]
# #type_diplo = self.soup.body.div["subtype"]
# #diplo_state = self.soup.body.div["type"]
#doc = self.soup.msIdentifier.find_all("idno", {"n": "1"})[0]
#type_diplo = self.soup.body.div["subtype"]
#diplo_state = self.soup.body.div["type"]
# FIXME: location -> geolocallisation
#place = self.soup.find_all("placeName", {"type": "production_place"})[0].text
#if len(place != "NS":  
# place = place[0].text
return {
# "num_acte": counter,
"prince_name": self.find_prince_name(),
@ -256,16 +254,18 @@ class TextDataSetCollection(DataSetCollection):
filepath=str(filepath))
return self
#class FoliumHTMLDataSet(AbstractDataSet):
# def __init__(self, filepath: str):
# self._filepath = filepath
#
# def _load(self) -> None:
# raise DataSetError('This dataset is WriteOnly')
#
# def _describe(self) -> Dict[str, Any]:
# return dict(filepath=self._filepath)
#
# def _save(self, data: Map) -> None:
# data.save(self._filepath)
#
class FoliumHTMLDataSet(AbstractDataSet):
def __init__(self, filepath: str):
self._filepath = filepath
def _load(self) -> None:
raise DataSetError('This dataset is WriteOnly')
def _describe(self) -> Dict[str, Any]:
return dict(filepath=self._filepath)
#def _save(self, data: Map) -> None:
def _save(self, data) -> None:
# FIXME
data.save(self._filepath)

Loading…
Cancel
Save