Coverage for src/hdmf/build/builders.py: 100%

257 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-04 02:57 +0000

1import copy as _copy 

2import itertools as _itertools 

3import posixpath as _posixpath 

4from abc import ABCMeta 

5from collections.abc import Iterable 

6from datetime import datetime, date 

7 

8import numpy as np 

9from h5py import RegionReference 

10 

11from ..utils import docval, getargs, get_docval 

12 

13 

14class Builder(dict, metaclass=ABCMeta): 

15 

16 @docval({'name': 'name', 'type': str, 'doc': 'the name of the group'}, 

17 {'name': 'parent', 'type': 'Builder', 'doc': 'the parent builder of this Builder', 'default': None}, 

18 {'name': 'source', 'type': str, 

19 'doc': 'the source of the data in this builder e.g. file name', 'default': None}) 

20 def __init__(self, **kwargs): 

21 name, parent, source = getargs('name', 'parent', 'source', kwargs) 

22 super().__init__() 

23 self.__name = name 

24 self.__parent = parent 

25 if source is not None: 

26 self.__source = source 

27 elif parent is not None: 

28 self.__source = parent.source 

29 else: 

30 self.__source = None 

31 

32 @property 

33 def path(self): 

34 """The path of this builder.""" 

35 s = list() 

36 c = self 

37 while c is not None: 

38 s.append(c.name) 

39 c = c.parent 

40 return "/".join(s[::-1]) 

41 

42 @property 

43 def name(self): 

44 """The name of this builder.""" 

45 return self.__name 

46 

47 @property 

48 def source(self): 

49 """The source of this builder.""" 

50 return self.__source 

51 

52 @source.setter 

53 def source(self, s): 

54 if self.__source is not None: 

55 raise AttributeError('Cannot overwrite source.') 

56 self.__source = s 

57 

58 @property 

59 def parent(self): 

60 """The parent builder of this builder.""" 

61 return self.__parent 

62 

63 @parent.setter 

64 def parent(self, p): 

65 if self.__parent is not None: 

66 raise AttributeError('Cannot overwrite parent.') 

67 self.__parent = p 

68 if self.__source is None: 

69 self.source = p.source 

70 

71 def __repr__(self): 

72 ret = "%s %s %s" % (self.path, self.__class__.__name__, super().__repr__()) 

73 return ret 

74 

75 

76class BaseBuilder(Builder, metaclass=ABCMeta): 

77 __attribute = 'attributes' # self dictionary key for attributes 

78 

79 @docval({'name': 'name', 'type': str, 'doc': 'The name of the builder.'}, 

80 {'name': 'attributes', 'type': dict, 'doc': 'A dictionary of attributes to create in this builder.', 

81 'default': dict()}, 

82 {'name': 'parent', 'type': 'GroupBuilder', 'doc': 'The parent builder of this builder.', 'default': None}, 

83 {'name': 'source', 'type': str, 

84 'doc': 'The source of the data represented in this builder', 'default': None}) 

85 def __init__(self, **kwargs): 

86 name, attributes, parent, source = getargs('name', 'attributes', 'parent', 'source', kwargs) 

87 super().__init__(name, parent, source) 

88 super().__setitem__(BaseBuilder.__attribute, dict()) 

89 for name, val in attributes.items(): 

90 self.set_attribute(name, val) 

91 self.__location = None 

92 

93 @property 

94 def location(self): 

95 """The location of this Builder in its source.""" 

96 return self.__location 

97 

98 @location.setter 

99 def location(self, val): 

100 self.__location = val 

101 

102 @property 

103 def attributes(self): 

104 """The attributes stored in this Builder object.""" 

105 return super().__getitem__(BaseBuilder.__attribute) 

106 

107 @docval({'name': 'name', 'type': str, 'doc': 'The name of the attribute.'}, 

108 {'name': 'value', 'type': None, 'doc': 'The attribute value.'}) 

109 def set_attribute(self, **kwargs): 

110 """Set an attribute for this group.""" 

111 name, value = getargs('name', 'value', kwargs) 

112 self.attributes[name] = value 

113 

114 

115class GroupBuilder(BaseBuilder): 

116 # sub-dictionary keys. subgroups go in super().__getitem__(GroupBuilder.__group) 

117 __group = 'groups' 

118 __dataset = 'datasets' 

119 __link = 'links' 

120 __attribute = 'attributes' 

121 

122 @docval({'name': 'name', 'type': str, 'doc': 'The name of the group.'}, 

123 {'name': 'groups', 'type': (dict, list), 

124 'doc': ('A dictionary or list of subgroups to add to this group. If a dict is provided, only the ' 

125 'values are used.'), 

126 'default': dict()}, 

127 {'name': 'datasets', 'type': (dict, list), 

128 'doc': ('A dictionary or list of datasets to add to this group. If a dict is provided, only the ' 

129 'values are used.'), 

130 'default': dict()}, 

131 {'name': 'attributes', 'type': dict, 'doc': 'A dictionary of attributes to create in this group.', 

132 'default': dict()}, 

133 {'name': 'links', 'type': (dict, list), 

134 'doc': ('A dictionary or list of links to add to this group. If a dict is provided, only the ' 

135 'values are used.'), 

136 'default': dict()}, 

137 {'name': 'parent', 'type': 'GroupBuilder', 'doc': 'The parent builder of this builder.', 'default': None}, 

138 {'name': 'source', 'type': str, 

139 'doc': 'The source of the data represented in this builder.', 'default': None}) 

140 def __init__(self, **kwargs): 

141 """Create a builder object for a group.""" 

142 name, groups, datasets, links, attributes, parent, source = getargs( 

143 'name', 'groups', 'datasets', 'links', 'attributes', 'parent', 'source', kwargs) 

144 # NOTE: if groups, datasets, or links are dicts, their keys are unused 

145 groups = self.__to_list(groups) 

146 datasets = self.__to_list(datasets) 

147 links = self.__to_list(links) 

148 # dictionary mapping subgroup/dataset/attribute/link name to the key that maps to the 

149 # subgroup/dataset/attribute/link sub-dictionary that maps the name to the builder 

150 self.obj_type = dict() 

151 super().__init__(name, attributes, parent, source) 

152 super().__setitem__(GroupBuilder.__group, dict()) 

153 super().__setitem__(GroupBuilder.__dataset, dict()) 

154 super().__setitem__(GroupBuilder.__link, dict()) 

155 for group in groups: 

156 self.set_group(group) 

157 for dataset in datasets: 

158 if dataset is not None: 

159 self.set_dataset(dataset) 

160 for link in links: 

161 self.set_link(link) 

162 

163 def __to_list(self, d): 

164 if isinstance(d, dict): 

165 return list(d.values()) 

166 return d 

167 

168 @property 

169 def source(self): 

170 ''' The source of this Builder ''' 

171 return super().source 

172 

173 @source.setter 

174 def source(self, s): 

175 """Recursively set all subgroups/datasets/links source when this source is set.""" 

176 super(GroupBuilder, self.__class__).source.fset(self, s) 

177 for group in self.groups.values(): 

178 if group.source is None: 

179 group.source = s 

180 for dset in self.datasets.values(): 

181 if dset.source is None: 

182 dset.source = s 

183 for link in self.links.values(): 

184 if link.source is None: 

185 link.source = s 

186 

187 @property 

188 def groups(self): 

189 """The subgroups contained in this group.""" 

190 return super().__getitem__(GroupBuilder.__group) 

191 

192 @property 

193 def datasets(self): 

194 """The datasets contained in this group.""" 

195 return super().__getitem__(GroupBuilder.__dataset) 

196 

197 @property 

198 def links(self): 

199 """The links contained in this group.""" 

200 return super().__getitem__(GroupBuilder.__link) 

201 

202 @docval(*get_docval(BaseBuilder.set_attribute)) 

203 def set_attribute(self, **kwargs): 

204 """Set an attribute for this group.""" 

205 name, value = getargs('name', 'value', kwargs) 

206 self.__check_obj_type(name, GroupBuilder.__attribute) 

207 super().set_attribute(name, value) 

208 self.obj_type[name] = GroupBuilder.__attribute 

209 

210 def __check_obj_type(self, name, obj_type): 

211 # check that the name is not associated with a different object type in this group 

212 if name in self.obj_type and self.obj_type[name] != obj_type: 

213 raise ValueError("'%s' already exists in %s.%s, cannot set in %s." 

214 % (name, self.name, self.obj_type[name], obj_type)) 

215 

216 @docval({'name': 'builder', 'type': 'GroupBuilder', 'doc': 'The GroupBuilder to add to this group.'}) 

217 def set_group(self, **kwargs): 

218 """Add a subgroup to this group.""" 

219 builder = getargs('builder', kwargs) 

220 self.__set_builder(builder, GroupBuilder.__group) 

221 

222 @docval({'name': 'builder', 'type': 'DatasetBuilder', 'doc': 'The DatasetBuilder to add to this group.'}) 

223 def set_dataset(self, **kwargs): 

224 """Add a dataset to this group.""" 

225 builder = getargs('builder', kwargs) 

226 self.__set_builder(builder, GroupBuilder.__dataset) 

227 

228 @docval({'name': 'builder', 'type': 'LinkBuilder', 'doc': 'The LinkBuilder to add to this group.'}) 

229 def set_link(self, **kwargs): 

230 """Add a link to this group.""" 

231 builder = getargs('builder', kwargs) 

232 self.__set_builder(builder, GroupBuilder.__link) 

233 

234 def __set_builder(self, builder, obj_type): 

235 name = builder.name 

236 self.__check_obj_type(name, obj_type) 

237 # if child builder already exists (e.g., read from file), do not reset it. 

238 # resetting the child builder will change the python object ID / hash of the child builder 

239 # and make the IO backend think that the child builder has not yet been written. 

240 if self.get(name) == builder: 

241 return 

242 super().__getitem__(obj_type)[name] = builder 

243 self.obj_type[name] = obj_type 

244 if builder.parent is None: 

245 builder.parent = self 

246 

247 def is_empty(self): 

248 """Returns true if there are no datasets, links, attributes, and non-empty subgroups. False otherwise.""" 

249 if len(self.datasets) or len(self.links) or len(self.attributes): 

250 return False 

251 elif len(self.groups): 

252 return all(g.is_empty() for g in self.groups.values()) 

253 else: 

254 return True 

255 

256 def __getitem__(self, key): 

257 """Like dict.__getitem__, but looks in groups, datasets, attributes, and links sub-dictionaries. 

258 Key can be a posix path to a sub-builder. 

259 """ 

260 try: 

261 key_ar = _posixpath.normpath(key).split('/') 

262 return self.__get_rec(key_ar) 

263 except KeyError: 

264 raise KeyError(key) 

265 

266 def get(self, key, default=None): 

267 """Like dict.get, but looks in groups, datasets, attributes, and links sub-dictionaries. 

268 Key can be a posix path to a sub-builder. 

269 """ 

270 try: 

271 key_ar = _posixpath.normpath(key).split('/') 

272 return self.__get_rec(key_ar) 

273 except KeyError: 

274 return default 

275 

276 def __get_rec(self, key_ar): 

277 # recursive helper for __getitem__ and get 

278 if len(key_ar) == 1: 

279 # get the correct dictionary (groups, datasets, links, attributes) associated with the key 

280 # then look up the key within that dictionary to get the builder 

281 return super().__getitem__(self.obj_type[key_ar[0]])[key_ar[0]] 

282 else: 

283 if key_ar[0] in self.groups: 

284 return self.groups[key_ar[0]].__get_rec(key_ar[1:]) 

285 raise KeyError(key_ar[0]) 

286 

287 def __setitem__(self, args, val): 

288 raise NotImplementedError('__setitem__') 

289 

290 def __contains__(self, item): 

291 return self.obj_type.__contains__(item) 

292 

293 def items(self): 

294 """Like dict.items, but iterates over items in groups, datasets, attributes, and links sub-dictionaries.""" 

295 return _itertools.chain(self.groups.items(), 

296 self.datasets.items(), 

297 self.attributes.items(), 

298 self.links.items()) 

299 

300 def keys(self): 

301 """Like dict.keys, but iterates over keys in groups, datasets, attributes, and links sub-dictionaries.""" 

302 return _itertools.chain(self.groups.keys(), 

303 self.datasets.keys(), 

304 self.attributes.keys(), 

305 self.links.keys()) 

306 

307 def values(self): 

308 """Like dict.values, but iterates over values in groups, datasets, attributes, and links sub-dictionaries.""" 

309 return _itertools.chain(self.groups.values(), 

310 self.datasets.values(), 

311 self.attributes.values(), 

312 self.links.values()) 

313 

314 

315class DatasetBuilder(BaseBuilder): 

316 OBJECT_REF_TYPE = 'object' 

317 REGION_REF_TYPE = 'region' 

318 

319 @docval({'name': 'name', 'type': str, 'doc': 'The name of the dataset.'}, 

320 {'name': 'data', 

321 'type': ('array_data', 'scalar_data', 'data', 'DatasetBuilder', 'RegionBuilder', Iterable, datetime, date), 

322 'doc': 'The data in this dataset.', 'default': None}, 

323 {'name': 'dtype', 'type': (type, np.dtype, str, list), 

324 'doc': 'The datatype of this dataset.', 'default': None}, 

325 {'name': 'attributes', 'type': dict, 

326 'doc': 'A dictionary of attributes to create in this dataset.', 'default': dict()}, 

327 {'name': 'maxshape', 'type': (int, tuple), 

328 'doc': 'The shape of this dataset. Use None for scalars.', 'default': None}, 

329 {'name': 'chunks', 'type': bool, 'doc': 'Whether or not to chunk this dataset.', 'default': False}, 

330 {'name': 'parent', 'type': GroupBuilder, 'doc': 'The parent builder of this builder.', 'default': None}, 

331 {'name': 'source', 'type': str, 'doc': 'The source of the data in this builder.', 'default': None}) 

332 def __init__(self, **kwargs): 

333 """ Create a Builder object for a dataset """ 

334 name, data, dtype, attributes, maxshape, chunks, parent, source = getargs( 

335 'name', 'data', 'dtype', 'attributes', 'maxshape', 'chunks', 'parent', 'source', kwargs) 

336 super().__init__(name, attributes, parent, source) 

337 self['data'] = data 

338 self['attributes'] = _copy.copy(attributes) 

339 self.__chunks = chunks 

340 self.__maxshape = maxshape 

341 if isinstance(data, BaseBuilder): 

342 if dtype is None: 

343 dtype = self.OBJECT_REF_TYPE 

344 self.__dtype = dtype 

345 self.__name = name 

346 

347 @property 

348 def data(self): 

349 """The data stored in the dataset represented by this builder.""" 

350 return self['data'] 

351 

352 @data.setter 

353 def data(self, val): 

354 if self['data'] is not None: 

355 raise AttributeError("Cannot overwrite data.") 

356 self['data'] = val 

357 

358 @property 

359 def chunks(self): 

360 """Whether or not this dataset is chunked.""" 

361 return self.__chunks 

362 

363 @property 

364 def maxshape(self): 

365 """The max shape of this dataset.""" 

366 return self.__maxshape 

367 

368 @property 

369 def dtype(self): 

370 """The data type of this dataset.""" 

371 return self.__dtype 

372 

373 @dtype.setter 

374 def dtype(self, val): 

375 if self.__dtype is not None: 

376 raise AttributeError("Cannot overwrite dtype.") 

377 self.__dtype = val 

378 

379 

380class LinkBuilder(Builder): 

381 

382 @docval({'name': 'builder', 'type': (DatasetBuilder, GroupBuilder), 

383 'doc': 'The target group or dataset of this link.'}, 

384 {'name': 'name', 'type': str, 'doc': 'The name of the link', 'default': None}, 

385 {'name': 'parent', 'type': GroupBuilder, 'doc': 'The parent builder of this builder', 'default': None}, 

386 {'name': 'source', 'type': str, 'doc': 'The source of the data in this builder', 'default': None}) 

387 def __init__(self, **kwargs): 

388 """Create a builder object for a link.""" 

389 name, builder, parent, source = getargs('name', 'builder', 'parent', 'source', kwargs) 

390 if name is None: 

391 name = builder.name 

392 super().__init__(name, parent, source) 

393 self['builder'] = builder 

394 

395 @property 

396 def builder(self): 

397 """The target builder object.""" 

398 return self['builder'] 

399 

400 

401class ReferenceBuilder(dict): 

402 

403 @docval({'name': 'builder', 'type': (DatasetBuilder, GroupBuilder), 

404 'doc': 'The group or dataset this reference applies to.'}) 

405 def __init__(self, **kwargs): 

406 """Create a builder object for a reference.""" 

407 builder = getargs('builder', kwargs) 

408 self['builder'] = builder 

409 

410 @property 

411 def builder(self): 

412 """The target builder object.""" 

413 return self['builder'] 

414 

415 

416class RegionBuilder(ReferenceBuilder): 

417 

418 @docval({'name': 'region', 'type': (slice, tuple, list, RegionReference), 

419 'doc': 'The region, i.e. slice or indices, into the target dataset.'}, 

420 {'name': 'builder', 'type': DatasetBuilder, 'doc': 'The dataset this region reference applies to.'}) 

421 def __init__(self, **kwargs): 

422 """Create a builder object for a region reference.""" 

423 region, builder = getargs('region', 'builder', kwargs) 

424 super().__init__(builder) 

425 self['region'] = region 

426 

427 @property 

428 def region(self): 

429 """The selected region of the target dataset.""" 

430 return self['region']