Coverage for scripts/get_wiki.py: 0%

41 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2024-07-28 10:12 -0400

1import json 

2import logging 

3import re 

4from typing import List, Dict 

5 

6import requests 

7from bs4 import BeautifulSoup 

8 

9 

10def equals_to_dict(pairs: List[str]) -> Dict[str, str]: 

11 """Convert a string in the format "key=value" into a dictionary.""" 

12 return { 

13 key.strip(): value.strip() for key, value in (pair.split("=") for pair in pairs) 

14 } 

15 

16 

17def main() -> None: 

18 """Parse the OSM wiki and return a mapping of Overture categories to OSM tags.""" 

19 tag_mapping = parse_wiki() 

20 

21 print("Writing tags to scripts/tags.json...") 

22 with open("scripts/tags.json", mode="w", encoding="utf-8") as f: 

23 f.write( 

24 json.dumps( 

25 tag_mapping, 

26 indent=4, 

27 ) 

28 ) 

29 

30 

31def parse_wiki() -> dict: 

32 """Parse the OSM wiki and return a mapping of Overture categories to OSM tags.""" 

33 table = {} 

34 print("Getting tags from OSM wiki...") 

35 data = requests.get( 

36 "https://wiki.openstreetmap.org/w/api.php", 

37 {"action": "parse", "page": "Overture_categories", "format": "json"}, 

38 timeout=10, 

39 ).json() 

40 soup = BeautifulSoup(data["parse"]["text"]["*"], "html.parser") 

41 print("Parsing tags from OSM wiki...") 

42 a = soup.find("table") 

43 if a: 

44 for row in list(a.find_all("tr")): 

45 overture = row.find("td") 

46 overture_tag = "" 

47 if overture: 

48 overture_tag = overture.text 

49 

50 osm = row.find_all("tt") 

51 osm_tags = {} 

52 if osm: 

53 osm_tags = equals_to_dict([i.text for i in osm]) 

54 

55 if overture_tag: 

56 table[overture_tag] = osm_tags 

57 

58 return table 

59 

60 

61def parse_tags(wiki_str: str) -> dict: 

62 """Parse OSM tags from the wiki.""" 

63 tags = {} 

64 print(wiki_str) 

65 for tag in wiki_str.split(" and "): 

66 if m := re.match(r"^\s*([-_:\w]+)=([-_:\w]+)\s*$", tag): 

67 tags[m.group(1)] = m.group(2) 

68 else: 

69 logging.error(f"Failed to parse '{tag}' from '{wiki_str}'") 

70 

71 return tags 

72 

73 

74if __name__ == "__main__": 

75 main()