refactoring

3 years ago · f5090d799f
parent 4abfc2ba01
commit f5090d799f
1 changed files with 32 additions and 32 deletions
--- a/actes-princiers/src/actesdataset.py
+++ b/actes-princiers/src/actesdataset.py
@ -100,12 +100,9 @@ class BsXMLDataSet(XMLDataSet):

        prince_name = tree.xpath('//listPerson[@type="prince"]/person/name/text()')
        """
-        persons = self.soup.find_all("listPerson")
-        for pers in persons:
-            if pers.attrs.get('type') == "prince":
-                ps = pers.find('person')
-                ps_name = pers.find('name')
-                prince_name = ps_name.get_text()
+        person = self.soup.find("listPerson", {'type': "prince"} )
+        ps = person.find('name')
+        prince_name = ps.get_text()
        return prince_name

    def transform(self):
@ -116,23 +113,24 @@ class BsXMLDataSet(XMLDataSet):
        date = self.soup.msItem.docDate.text  # verbose date
        analyse = self.soup.abstract.p.text  # acte's short analysis
        ref = self.soup.msIdentifier.find_all("idno", {"n": "2"})
-        
-        # //sourceDesc//msIdentifier/idno[@n='2'] is the doc id inside the
-        # archive box or the page number inside a manuscript (see _create_doc)
-        # warning: the analysis may not have been written yet,
-        # which would result in List Index Out of Range Error. Hence :
        if len(ref) > 0:  # there is an analysis
            ref_acte = ref[0].text
        else:  # there is no analysis
            ref_acte = "NS"
-        # FIXME: use this location -> geolocallisation
-#        prod_place = self.soup.find_all("placeName", {"type": "production_place"})[0].text
+        # //sourceDesc//msIdentifier/idno[@n='2'] is the doc id inside the
+        # archive box or the page number inside a manuscript (see _create_doc)
+        # warning: the analysis may not have been written yet,
+        # which would result in List Index Out of Range Error. Hence:
        # //sourceDesc//msIdentifier/idno[@n='1'] is always the
        # archive box or manuscript collection id
-#        #doc = self.soup.msIdentifier.find_all("idno", {"n": "1"})[0]
-#        #type_diplo = self.soup.body.div["subtype"]
-#        #diplo_state = self.soup.body.div["type"]
-
+        #doc = self.soup.msIdentifier.find_all("idno", {"n": "1"})[0]
+        #type_diplo = self.soup.body.div["subtype"]
+        #diplo_state = self.soup.body.div["type"]
+
+        # FIXME: location -> geolocallisation
+        #place = self.soup.find_all("placeName", {"type": "production_place"})[0].text
+        #if len(place != "NS":  
+        #    place = place[0].text
        return {
 #            "num_acte": counter,
            "prince_name": self.find_prince_name(),
@ -256,16 +254,18 @@ class TextDataSetCollection(DataSetCollection):
                filepath=str(filepath))
        return self

-#class FoliumHTMLDataSet(AbstractDataSet):
-#    def __init__(self, filepath: str):
-#        self._filepath = filepath
-#
-#    def _load(self) -> None:
-#        raise DataSetError('This dataset is WriteOnly')
-#
-#    def _describe(self) -> Dict[str, Any]:
-#        return dict(filepath=self._filepath)
-#
-#    def _save(self, data: Map) -> None:
-#        data.save(self._filepath)
-#
+class FoliumHTMLDataSet(AbstractDataSet):
+    def __init__(self, filepath: str):
+        self._filepath = filepath
+
+    def _load(self) -> None:
+        raise DataSetError('This dataset is WriteOnly')
+
+    def _describe(self) -> Dict[str, Any]:
+        return dict(filepath=self._filepath)
+
+    #def _save(self, data: Map) -> None:
+    def _save(self, data) -> None:
+        # FIXME
+        data.save(self._filepath)
+