Coverage for src/hdmf/spec/write.py: 89%

153 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-07-25 05:02 +0000

1import copy 

2import json 

3import os.path 

4import warnings 

5from abc import ABCMeta, abstractmethod 

6from collections import OrderedDict 

7from datetime import datetime 

8import ruamel.yaml as yaml 

9 

10from .catalog import SpecCatalog 

11from .namespace import SpecNamespace 

12from .spec import GroupSpec, DatasetSpec 

13from ..utils import docval, getargs, popargs 

14 

15 

16class SpecWriter(metaclass=ABCMeta): 

17 

18 @abstractmethod 

19 def write_spec(self, spec_file_dict, path): 

20 pass 

21 

22 @abstractmethod 

23 def write_namespace(self, namespace, path): 

24 pass 

25 

26 

27class YAMLSpecWriter(SpecWriter): 

28 

29 @docval({'name': 'outdir', 

30 'type': str, 

31 'doc': 'the path to write the directory to output the namespace and specs too', 'default': '.'}) 

32 def __init__(self, **kwargs): 

33 self.__outdir = getargs('outdir', kwargs) 

34 

35 def __dump_spec(self, specs, stream): 

36 specs_plain_dict = json.loads(json.dumps(specs)) 

37 yaml_obj = yaml.YAML(typ='safe', pure=True) 

38 yaml_obj.default_flow_style = False 

39 yaml_obj.dump(specs_plain_dict, stream) 

40 

41 def write_spec(self, spec_file_dict, path): 

42 out_fullpath = os.path.join(self.__outdir, path) 

43 spec_plain_dict = json.loads(json.dumps(spec_file_dict)) 

44 sorted_data = self.sort_keys(spec_plain_dict) 

45 with open(out_fullpath, 'w') as fd_write: 

46 yaml_obj = yaml.YAML(pure=True) 

47 yaml_obj.dump(sorted_data, fd_write) 

48 

49 def write_namespace(self, namespace, path): 

50 """Write the given namespace key-value pairs as YAML to the given path. 

51 

52 :param namespace: SpecNamespace holding the key-value pairs that define the namespace 

53 :param path: File path to write the namespace to as YAML under the key 'namespaces' 

54 """ 

55 with open(os.path.join(self.__outdir, path), 'w') as stream: 

56 # Convert the date to a string if necessary 

57 ns = namespace 

58 if 'date' in namespace and isinstance(namespace['date'], datetime): 

59 ns = copy.copy(ns) # copy the namespace to avoid side-effects 

60 ns['date'] = ns['date'].isoformat() 

61 self.__dump_spec({'namespaces': [ns]}, stream) 

62 

63 def reorder_yaml(self, path): 

64 """ 

65 Open a YAML file, load it as python data, sort the data alphabetically, and write it back out to the 

66 same path. 

67 """ 

68 with open(path, 'rb') as fd_read: 

69 yaml_obj = yaml.YAML(pure=True) 

70 data = yaml_obj.load(fd_read) 

71 self.write_spec(data, path) 

72 

73 def sort_keys(self, obj): 

74 # Represent None as null 

75 def my_represent_none(self, data): 

76 return self.represent_scalar(u'tag:yaml.org,2002:null', u'null') 

77 

78 yaml.representer.RoundTripRepresenter.add_representer(type(None), my_represent_none) 

79 

80 order = ['neurodata_type_def', 'neurodata_type_inc', 'data_type_def', 'data_type_inc', 

81 'name', 'default_name', 

82 'dtype', 'target_type', 'dims', 'shape', 'default_value', 'value', 'doc', 

83 'required', 'quantity', 'attributes', 'datasets', 'groups', 'links'] 

84 if isinstance(obj, dict): 

85 keys = list(obj.keys()) 

86 for k in order[::-1]: 

87 if k in keys: 

88 keys.remove(k) 

89 keys.insert(0, k) 

90 if 'neurodata_type_def' not in keys and 'name' in keys: 

91 keys.remove('name') 

92 keys.insert(0, 'name') 

93 return yaml.comments.CommentedMap( 

94 yaml.compat.ordereddict([(k, self.sort_keys(obj[k])) for k in keys]) 

95 ) 

96 elif isinstance(obj, list): 

97 return [self.sort_keys(v) for v in obj] 

98 elif isinstance(obj, tuple): 98 ↛ 99line 98 didn't jump to line 99, because the condition on line 98 was never true

99 return (self.sort_keys(v) for v in obj) 

100 else: 

101 return obj 

102 

103 

104class NamespaceBuilder: 

105 ''' A class for building namespace and spec files ''' 

106 

107 @docval({'name': 'doc', 'type': str, 'doc': 'Description about what the namespace represents'}, 

108 {'name': 'name', 'type': str, 'doc': 'Name of the namespace'}, 

109 {'name': 'full_name', 'type': str, 'doc': 'Extended full name of the namespace', 'default': None}, 

110 {'name': 'version', 'type': (str, tuple, list), 'doc': 'Version number of the namespace', 'default': None}, 

111 {'name': 'author', 'type': (str, list), 'doc': 'Author or list of authors.', 'default': None}, 

112 {'name': 'contact', 'type': (str, list), 

113 'doc': 'List of emails. Ordering should be the same as for author', 'default': None}, 

114 {'name': 'date', 'type': (datetime, str), 

115 'doc': "Date last modified or released. Formatting is %Y-%m-%d %H:%M:%S, e.g, 2017-04-25 17:14:13", 

116 'default': None}, 

117 {'name': 'namespace_cls', 'type': type, 'doc': 'the SpecNamespace type', 'default': SpecNamespace}) 

118 def __init__(self, **kwargs): 

119 ns_cls = popargs('namespace_cls', kwargs) 

120 if kwargs['version'] is None: 

121 # version is required on write as of HDMF 1.5. this check should prevent the writing of namespace files 

122 # without a version 

123 raise ValueError("Namespace '%s' missing key 'version'. Please specify a version for the extension." 

124 % kwargs['name']) 

125 self.__ns_args = copy.deepcopy(kwargs) 

126 self.__namespaces = OrderedDict() 

127 self.__sources = OrderedDict() 

128 self.__catalog = SpecCatalog() 

129 self.__dt_key = ns_cls.types_key() 

130 

131 @docval({'name': 'source', 'type': str, 'doc': 'the path to write the spec to'}, 

132 {'name': 'spec', 'type': (GroupSpec, DatasetSpec), 'doc': 'the Spec to add'}) 

133 def add_spec(self, **kwargs): 

134 ''' Add a Spec to the namespace ''' 

135 source, spec = getargs('source', 'spec', kwargs) 

136 self.__catalog.auto_register(spec, source) 

137 self.add_source(source) 

138 self.__sources[source].setdefault(self.__dt_key, list()).append(spec) 

139 

140 @docval({'name': 'source', 'type': str, 'doc': 'the path to write the spec to'}, 

141 {'name': 'doc', 'type': str, 'doc': 'additional documentation for the source file', 'default': None}, 

142 {'name': 'title', 'type': str, 'doc': 'optional heading to be used for the source', 'default': None}) 

143 def add_source(self, **kwargs): 

144 ''' Add a source file to the namespace ''' 

145 source, doc, title = getargs('source', 'doc', 'title', kwargs) 

146 if '/' in source or source[0] == '.': 146 ↛ 147line 146 didn't jump to line 147, because the condition on line 146 was never true

147 raise ValueError('source must be a base file') 

148 source_dict = {'source': source} 

149 self.__sources.setdefault(source, source_dict) 

150 # Update the doc and title if given 

151 if doc is not None: 

152 self.__sources[source]['doc'] = doc 

153 if title is not None: 

154 self.__sources[source]['title'] = doc 

155 

156 @docval({'name': 'data_type', 'type': str, 'doc': 'the data type to include'}, 

157 {'name': 'source', 'type': str, 'doc': 'the source file to include the type from', 'default': None}, 

158 {'name': 'namespace', 'type': str, 

159 'doc': 'the namespace from which to include the data type', 'default': None}) 

160 def include_type(self, **kwargs): 

161 ''' Include a data type from an existing namespace or source ''' 

162 dt, src, ns = getargs('data_type', 'source', 'namespace', kwargs) 

163 if src is not None: 163 ↛ 164line 163 didn't jump to line 164, because the condition on line 163 was never true

164 self.add_source(src) 

165 self.__sources[src].setdefault(self.__dt_key, list()).append(dt) 

166 elif ns is not None: 166 ↛ 170line 166 didn't jump to line 170, because the condition on line 166 was never false

167 self.include_namespace(ns) 

168 self.__namespaces[ns].setdefault(self.__dt_key, list()).append(dt) 

169 else: 

170 raise ValueError("must specify 'source' or 'namespace' when including type") 

171 

172 @docval({'name': 'namespace', 'type': str, 'doc': 'the namespace to include'}) 

173 def include_namespace(self, **kwargs): 

174 ''' Include an entire namespace ''' 

175 namespace = getargs('namespace', kwargs) 

176 self.__namespaces.setdefault(namespace, {'namespace': namespace}) 

177 

178 @docval({'name': 'path', 'type': str, 'doc': 'the path to write the spec to'}, 

179 {'name': 'outdir', 

180 'type': str, 

181 'doc': 'the path to write the directory to output the namespace and specs too', 'default': '.'}, 

182 {'name': 'writer', 

183 'type': SpecWriter, 

184 'doc': 'the SpecWriter to use to write the namespace', 'default': None}) 

185 def export(self, **kwargs): 

186 ''' Export the namespace to the given path. 

187 

188 All new specification source files will be written in the same directory as the 

189 given path. 

190 ''' 

191 ns_path, writer = getargs('path', 'writer', kwargs) 

192 if writer is None: 

193 writer = YAMLSpecWriter(outdir=getargs('outdir', kwargs)) 

194 ns_args = copy.copy(self.__ns_args) 

195 ns_args['schema'] = list() 

196 for ns, info in self.__namespaces.items(): 

197 ns_args['schema'].append(info) 

198 for path, info in self.__sources.items(): 

199 out = SpecFileBuilder() 

200 dts = list() 

201 for spec in info[self.__dt_key]: 

202 if isinstance(spec, str): 202 ↛ 203line 202 didn't jump to line 203, because the condition on line 202 was never true

203 dts.append(spec) 

204 else: 

205 out.add_spec(spec) 

206 item = {'source': path} 

207 if 'doc' in info: 

208 item['doc'] = info['doc'] 

209 if 'title' in info: 

210 item['title'] = info['title'] 

211 if out and dts: 211 ↛ 212line 211 didn't jump to line 212, because the condition on line 211 was never true

212 raise ValueError('cannot include from source if writing to source') 

213 elif dts: 213 ↛ 214line 213 didn't jump to line 214, because the condition on line 213 was never true

214 item[self.__dt_key] = dts 

215 elif out: 215 ↛ 217line 215 didn't jump to line 217, because the condition on line 215 was never false

216 writer.write_spec(out, path) 

217 ns_args['schema'].append(item) 

218 namespace = SpecNamespace.build_namespace(**ns_args) 

219 writer.write_namespace(namespace, ns_path) 

220 

221 @property 

222 def name(self): 

223 return self.__ns_args['name'] 

224 

225 

226class SpecFileBuilder(dict): 

227 

228 @docval({'name': 'spec', 'type': (GroupSpec, DatasetSpec), 'doc': 'the Spec to add'}) 

229 def add_spec(self, **kwargs): 

230 spec = getargs('spec', kwargs) 

231 if isinstance(spec, GroupSpec): 

232 self.setdefault('groups', list()).append(spec) 

233 elif isinstance(spec, DatasetSpec): 233 ↛ exitline 233 didn't return from function 'add_spec', because the condition on line 233 was never false

234 self.setdefault('datasets', list()).append(spec) 

235 

236 

237def export_spec(ns_builder, new_data_types, output_dir): 

238 """ 

239 Create YAML specification files for a new namespace and extensions with 

240 the given data type specs. 

241 

242 Args: 

243 ns_builder - NamespaceBuilder instance used to build the 

244 namespace and extension 

245 new_data_types - Iterable of specs that represent new data types 

246 to be added 

247 """ 

248 

249 if len(new_data_types) == 0: 

250 warnings.warn('No data types specified. Exiting.') 

251 return 

252 

253 ns_path = ns_builder.name + '.namespace.yaml' 

254 ext_path = ns_builder.name + '.extensions.yaml' 

255 

256 for data_type in new_data_types: 

257 ns_builder.add_spec(ext_path, data_type) 

258 

259 ns_builder.export(ns_path, outdir=output_dir)