Coverage for scripts/get_wiki.py: 0%
41 statements
« prev ^ index » next coverage.py v7.6.0, created at 2024-07-28 10:12 -0400
« prev ^ index » next coverage.py v7.6.0, created at 2024-07-28 10:12 -0400
1import json
2import logging
3import re
4from typing import List, Dict
6import requests
7from bs4 import BeautifulSoup
10def equals_to_dict(pairs: List[str]) -> Dict[str, str]:
11 """Convert a string in the format "key=value" into a dictionary."""
12 return {
13 key.strip(): value.strip() for key, value in (pair.split("=") for pair in pairs)
14 }
17def main() -> None:
18 """Parse the OSM wiki and return a mapping of Overture categories to OSM tags."""
19 tag_mapping = parse_wiki()
21 print("Writing tags to scripts/tags.json...")
22 with open("scripts/tags.json", mode="w", encoding="utf-8") as f:
23 f.write(
24 json.dumps(
25 tag_mapping,
26 indent=4,
27 )
28 )
31def parse_wiki() -> dict:
32 """Parse the OSM wiki and return a mapping of Overture categories to OSM tags."""
33 table = {}
34 print("Getting tags from OSM wiki...")
35 data = requests.get(
36 "https://wiki.openstreetmap.org/w/api.php",
37 {"action": "parse", "page": "Overture_categories", "format": "json"},
38 timeout=10,
39 ).json()
40 soup = BeautifulSoup(data["parse"]["text"]["*"], "html.parser")
41 print("Parsing tags from OSM wiki...")
42 a = soup.find("table")
43 if a:
44 for row in list(a.find_all("tr")):
45 overture = row.find("td")
46 overture_tag = ""
47 if overture:
48 overture_tag = overture.text
50 osm = row.find_all("tt")
51 osm_tags = {}
52 if osm:
53 osm_tags = equals_to_dict([i.text for i in osm])
55 if overture_tag:
56 table[overture_tag] = osm_tags
58 return table
61def parse_tags(wiki_str: str) -> dict:
62 """Parse OSM tags from the wiki."""
63 tags = {}
64 print(wiki_str)
65 for tag in wiki_str.split(" and "):
66 if m := re.match(r"^\s*([-_:\w]+)=([-_:\w]+)\s*$", tag):
67 tags[m.group(1)] = m.group(2)
68 else:
69 logging.error(f"Failed to parse '{tag}' from '{wiki_str}'")
71 return tags
74if __name__ == "__main__":
75 main()