from bs4 import BeautifulSoup def make_soup(file): with open(file, 'r', encoding="utf-8") as opening: xml = BeautifulSoup(opening, 'xml') return xml def get_places(file): places_xtract = [] soup = make_soup(file) for div in soup.find_all('div', {'xml:id': True}): for place in div.find_all('placeName', {'type': 'production_place'}): production_places.append(place.text) print(set(places_xtract)) def get_archives(file): soup = make_soup(file) for div in soup.find_all('orgName', {'type': 'main'}): print(div.text) get_places("../bourbon-latex/charles-actes-latex.xml")