Coverage for dj/superset.py: 100%

41 statements  

« prev     ^ index     » next       coverage.py v7.2.3, created at 2023-04-17 20:05 -0700

1""" 

2A DB engine spec for Superset. 

3""" 

4 

5import re 

6from datetime import timedelta 

7from typing import TYPE_CHECKING, Any, List, Optional, Set, TypedDict 

8 

9import requests 

10from sqlalchemy.engine.reflection import Inspector 

11 

12try: 

13 from superset.db_engine_specs.base import BaseEngineSpec 

14except ImportError: # pragma: no cover 

15 # we don't really need the base class, so we can just mock it if Apache Superset is 

16 # not installed 

17 BaseEngineSpec = object 

18 

19if TYPE_CHECKING: 

20 from superset.models.core import Database 

21 

22 

23SELECT_STAR_MESSAGE = ( 

24 "DJ does not support data preview, since the `metrics` table is a virtual table " 

25 "representing the whole repository of metrics. An administrator should configure the " 

26 "DJ database with the `disable_data_preview` attribute set to `true` in the `extra` " 

27 "field." 

28) 

29GET_METRICS_TIMEOUT = timedelta(seconds=60) 

30 

31 

32class MetricType(TypedDict, total=False): 

33 """ 

34 Type for metrics return by `get_metrics`. 

35 """ 

36 

37 metric_name: str 

38 expression: str 

39 verbose_name: Optional[str] 

40 metric_type: Optional[str] 

41 description: Optional[str] 

42 d3format: Optional[str] 

43 warning_text: Optional[str] 

44 extra: Optional[str] 

45 

46 

47class DJEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method 

48 """ 

49 Engine spec for the DataJunction metric repository 

50 

51 See https://github.com/DataJunction/dj for more information. 

52 """ 

53 

54 engine = "dj" 

55 engine_name = "DJ" 

56 

57 sqlalchemy_uri_placeholder = "dj://host:port/database_id" 

58 

59 _time_grain_expressions = { 

60 None: "{col}", 

61 "PT1S": "DATE_TRUNC('second', {col})", 

62 "PT1M": "DATE_TRUNC('minute', {col})", 

63 "PT1H": "DATE_TRUNC('hour', {col})", 

64 "P1D": "DATE_TRUNC('day', {col})", 

65 "P1W": "DATE_TRUNC('week', {col})", 

66 "P1M": "DATE_TRUNC('month', {col})", 

67 "P3M": "DATE_TRUNC('quarter', {col})", 

68 "P1Y": "DATE_TRUNC('year', {col})", 

69 } 

70 

71 @classmethod 

72 def select_star( # pylint: disable=unused-argument 

73 cls, 

74 *args: Any, 

75 **kwargs: Any, 

76 ) -> str: 

77 """ 

78 Return a ``SELECT *`` query. 

79 

80 Since DJ doesn't have tables per se, a ``SELECT *`` query doesn't make sense. 

81 """ 

82 message = SELECT_STAR_MESSAGE.replace("'", "''") 

83 return f"SELECT '{message}' AS warning" 

84 

85 @classmethod 

86 def get_metrics( # pylint: disable=unused-argument 

87 cls, 

88 database: "Database", 

89 inspector: Inspector, 

90 table_name: str, 

91 schema: Optional[str], 

92 ) -> List[MetricType]: 

93 """ 

94 Get all metrics from a given schema and table. 

95 """ 

96 with database.get_sqla_engine_with_context() as engine: 

97 base_url = engine.connect().connection.base_url 

98 

99 response = requests.get( 

100 base_url / "metrics/", 

101 timeout=GET_METRICS_TIMEOUT.total_seconds(), 

102 ) 

103 payload = response.json() 

104 return [ 

105 { 

106 "metric_name": metric["name"], 

107 "expression": f'"{metric["name"]}"', 

108 "description": metric["description"], 

109 } 

110 for metric in payload 

111 ] 

112 

113 @classmethod 

114 def execute( 

115 cls, 

116 cursor: Any, 

117 query: str, 

118 **kwargs: Any, 

119 ) -> None: 

120 """ 

121 Quote ``__timestamp`` and other identifiers starting with an underscore. 

122 """ 

123 query = re.sub(r" AS (_.*?)(\b|$)", r' AS "\1"', query) 

124 

125 return super().execute(cursor, query, **kwargs) 

126 

127 @classmethod 

128 def get_view_names( # pylint: disable=unused-argument 

129 cls, 

130 database: "Database", 

131 inspector: Inspector, 

132 schema: Optional[str], 

133 ) -> Set[str]: 

134 """ 

135 Return all views. 

136 """ 

137 return set()