refactoring

develop
gwen 2 years ago
parent 4abfc2ba01
commit f5090d799f

@ -100,12 +100,9 @@ class BsXMLDataSet(XMLDataSet):
prince_name = tree.xpath('//listPerson[@type="prince"]/person/name/text()') prince_name = tree.xpath('//listPerson[@type="prince"]/person/name/text()')
""" """
persons = self.soup.find_all("listPerson") person = self.soup.find("listPerson", {'type': "prince"} )
for pers in persons: ps = person.find('name')
if pers.attrs.get('type') == "prince": prince_name = ps.get_text()
ps = pers.find('person')
ps_name = pers.find('name')
prince_name = ps_name.get_text()
return prince_name return prince_name
def transform(self): def transform(self):
@ -116,23 +113,24 @@ class BsXMLDataSet(XMLDataSet):
date = self.soup.msItem.docDate.text # verbose date date = self.soup.msItem.docDate.text # verbose date
analyse = self.soup.abstract.p.text # acte's short analysis analyse = self.soup.abstract.p.text # acte's short analysis
ref = self.soup.msIdentifier.find_all("idno", {"n": "2"}) ref = self.soup.msIdentifier.find_all("idno", {"n": "2"})
# //sourceDesc//msIdentifier/idno[@n='2'] is the doc id inside the
# archive box or the page number inside a manuscript (see _create_doc)
# warning: the analysis may not have been written yet,
# which would result in List Index Out of Range Error. Hence :
if len(ref) > 0: # there is an analysis if len(ref) > 0: # there is an analysis
ref_acte = ref[0].text ref_acte = ref[0].text
else: # there is no analysis else: # there is no analysis
ref_acte = "NS" ref_acte = "NS"
# FIXME: use this location -> geolocallisation # //sourceDesc//msIdentifier/idno[@n='2'] is the doc id inside the
# prod_place = self.soup.find_all("placeName", {"type": "production_place"})[0].text # archive box or the page number inside a manuscript (see _create_doc)
# warning: the analysis may not have been written yet,
# which would result in List Index Out of Range Error. Hence:
# //sourceDesc//msIdentifier/idno[@n='1'] is always the # //sourceDesc//msIdentifier/idno[@n='1'] is always the
# archive box or manuscript collection id # archive box or manuscript collection id
# #doc = self.soup.msIdentifier.find_all("idno", {"n": "1"})[0] #doc = self.soup.msIdentifier.find_all("idno", {"n": "1"})[0]
# #type_diplo = self.soup.body.div["subtype"] #type_diplo = self.soup.body.div["subtype"]
# #diplo_state = self.soup.body.div["type"] #diplo_state = self.soup.body.div["type"]
# FIXME: location -> geolocallisation
#place = self.soup.find_all("placeName", {"type": "production_place"})[0].text
#if len(place != "NS":  
# place = place[0].text
return { return {
# "num_acte": counter, # "num_acte": counter,
"prince_name": self.find_prince_name(), "prince_name": self.find_prince_name(),
@ -256,16 +254,18 @@ class TextDataSetCollection(DataSetCollection):
filepath=str(filepath)) filepath=str(filepath))
return self return self
#class FoliumHTMLDataSet(AbstractDataSet): class FoliumHTMLDataSet(AbstractDataSet):
# def __init__(self, filepath: str): def __init__(self, filepath: str):
# self._filepath = filepath self._filepath = filepath
#
# def _load(self) -> None: def _load(self) -> None:
# raise DataSetError('This dataset is WriteOnly') raise DataSetError('This dataset is WriteOnly')
#
# def _describe(self) -> Dict[str, Any]: def _describe(self) -> Dict[str, Any]:
# return dict(filepath=self._filepath) return dict(filepath=self._filepath)
#
# def _save(self, data: Map) -> None: #def _save(self, data: Map) -> None:
# data.save(self._filepath) def _save(self, data) -> None:
# # FIXME
data.save(self._filepath)

Loading…
Cancel
Save