You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
|
|
def make_soup(file):
|
|
|
|
|
with open(file, 'r', encoding="utf-8") as opening:
|
|
|
|
|
xml = BeautifulSoup(opening, 'xml')
|
|
|
|
|
return xml
|
|
|
|
|
|
|
|
|
|
def get_places(file):
|
|
|
|
|
places_xtract = []
|
|
|
|
|
soup = make_soup(file)
|
|
|
|
|
for div in soup.find_all('div', {'xml:id': True}):
|
|
|
|
|
for place in div.find_all('placeName', {'type': 'production_place'}):
|
|
|
|
|
production_places.append(place.text)
|
|
|
|
|
print(set(places_xtract))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_archives(file):
|
|
|
|
|
soup = make_soup(file)
|
|
|
|
|
for div in soup.find_all('orgName', {'type': 'main'}):
|
|
|
|
|
print(div.text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
get_places("../bourbon-latex/charles-actes-latex.xml")
|