From d7dee3a5b7d2fe434874c0fd3b6d8835ff03719e Mon Sep 17 00:00:00 2001 From: jgenero Date: Sat, 1 Oct 2022 20:03:53 +0200 Subject: [PATCH] initial commit --- app/data.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 app/data.py diff --git a/app/data.py b/app/data.py new file mode 100644 index 0000000..59b26fd --- /dev/null +++ b/app/data.py @@ -0,0 +1,23 @@ +from bs4 import BeautifulSoup + +def make_soup(file): + with open(file, 'r', encoding="utf-8") as opening: + xml = BeautifulSoup(opening, 'xml') + return xml + +def get_places(file): + soup = make_soup(file) + for div in soup.find_all('div', {'xml:id': True}): + print(div['n']) + for place in div.find_all('placeName', {'type': 'production_place'}): + print(place.text) + + +def get_archives(file): + soup = make_soup(file) + for div in soup.find_all('orgName', {'type': 'main'}): + print(div.text) + + + +get_archives("../bourbon-latex/charles-actes-latex.xml")