Coverage for /home/simon/Git/preflibtools/preflibtools/instances/preflibinstance.py: 70%

446 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-22 11:34 +0200

1""" This module describes the main class to deal with PrefLib instances.. 

2""" 

3import os.path 

4 

5from .sampling import * 

6 

7from copy import deepcopy 

8from os import path 

9 

10import urllib 

11import re 

12 

13 

14class PrefLibInstance(object): 

15 """ This class provide a general template to implement specific classes representing PrefLib instances. It should 

16 mainly be used as an abstract class. 

17 

18 :ivar file_path: The path to the file the instance is taken from. 

19 :ivar file_name: The name of the file the instance is taken from. 

20 :ivar data_type: The data type of the instance. Whenever a function only applies to certain types of data 

21 (strict and complete orders for instance), we do so by checking this value. 

22 :ivar modification_type: The modification type of the file: original if it represents original data; induced 

23 or imbued it is derived from some other data; synthetic if it is synthetically generated. 

24 :ivar relates_to: The data file this instance relates to, typically the original data file for induced 

25 preferences. 

26 :ivar related_files: The data files that are to the instance. 

27 :ivar title: The title of the instance. 

28 :ivar description: A description of the instance. 

29 :ivar publication_date: Date at which the corresponding file has been added to PrefLib.com. 

30 :ivar modification_date: Last date the file has been modified on PrefLib.com. 

31 :ivar num_alternatives: The number of alternatives in the instance. 

32 :ivar alternatives_name: A dictionary mapping alternative (int) to their name (str). 

33 :ivar num_voters: The number of voters in the instance. 

34 """ 

35 

36 def __init__(self): 

37 self.file_path = "" 

38 self.file_name = "" 

39 self.data_type = "" 

40 self.modification_type = "" 

41 self.relates_to = "" 

42 self.related_files = "" 

43 self.title = "" 

44 self.description = "" 

45 self.publication_date = "" 

46 self.modification_date = "" 

47 self.num_alternatives = 0 

48 self.alternatives_name = {} 

49 self.num_voters = 0 

50 self.alt_name_pattern = re.compile(r'# ALTERNATIVE NAME (\d+): (.*)') 

51 

52 def type_validator(self, data_type): 

53 """ Returns a boolean indicating whether the data_type given as argument is a valid one for the python class. 

54 

55 :param data_type: A strong representing a data type. 

56 :type data_type: str 

57 :return: True if the data type is valid for the class and False otherwise. 

58 :rtype: bool 

59 """ 

60 pass 

61 

62 def parse(self, lines, autocorrect=False): 

63 pass 

64 

65 def parse_lines(self, lines, autocorrect=False): 

66 """ Parses the lines provided as argument. The parser to be used is deducted from the instance's inner value of 

67 data_type. 

68 

69 :param lines: A list of string, each string being one line of the instance to parse. 

70 :type lines: list 

71 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

72 in the file. Default is False. 

73 :type autocorrect: bool 

74 """ 

75 

76 if self.type_validator(self.data_type): 

77 self.parse(lines, autocorrect=autocorrect) 

78 else: 

79 raise TypeError("File extension " + str(self.data_type) + " is not valid for an ordinal PrefLib instance." + 

80 " This file cannot be parsed.") 

81 

82 def parse_file(self, filepath, autocorrect=False): 

83 """ Parses the file whose path is provided as argument and populates the PreflibInstance object accordingly. 

84 The parser to be used is deduced from the file extension. 

85 

86 :param filepath: The path to the file to be parsed. 

87 :type filepath: str 

88 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

89 in the file. Default is False. 

90 :type autocorrect: bool 

91 """ 

92 

93 # Populating basic properties of the instance 

94 self.file_path = filepath 

95 self.file_name = path.split(filepath)[1] 

96 self.data_type = path.splitext(filepath)[1][1:] 

97 

98 # Read the file 

99 file = open(filepath, "r", encoding="utf-8") 

100 lines = file.readlines() 

101 file.close() 

102 

103 self.parse_lines(lines, autocorrect=autocorrect) 

104 

105 def parse_str(self, string, data_type, file_name="", autocorrect=False): 

106 """ Parses the string provided as argument and populates the PreflibInstance object accordingly. 

107 The parser to be used is deduced from the file extension passed as argument. 

108 

109 :param string: The string to parse. 

110 :type string: str 

111 :param data_type: The data type represented by the string. 

112 :type data_type: str 

113 :param file_name: The value to store in the file_name member of the instance. Default is the empty string. 

114 :type file_name: str 

115 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

116 in the file. Default is False. 

117 :type autocorrect: bool 

118 """ 

119 

120 self.file_path = "parsed_from_string" 

121 self.file_name = file_name 

122 self.data_type = data_type 

123 

124 self.parse_lines(string.splitlines(), autocorrect=autocorrect) 

125 

126 def parse_url(self, url, autocorrect=False): 

127 """ Parses the file located at the provided URL and populates the PreflibInstance object accordingly. 

128 The parser to be used (whether the file describes a graph or an order for instance) is deduced based 

129 on the file extension. 

130 

131 :param url: The target URL. 

132 :type url: str 

133 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

134 in the file. Default is False. 

135 :type autocorrect: bool 

136 """ 

137 

138 data = urllib.request.urlopen(url) 

139 lines = [line.decode("utf-8").strip() for line in data] 

140 data.close() 

141 

142 self.file_path = url 

143 self.file_name = url.split('/')[-1].split('.')[0] 

144 self.data_type = url.split('.')[-1] 

145 

146 self.parse_lines(lines, autocorrect=autocorrect) 

147 

148 def parse_metadata(self, line, autocorrect=False): 

149 """ A helper function that parses metadata. 

150 

151 :param line: The line to parse. 

152 :type line: str 

153 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

154 in the file. Default is False. 

155 :type autocorrect: bool 

156 """ 

157 if line.startswith("# FILE NAME"): 

158 self.file_name = line[12:].strip() 

159 elif line.startswith("# TITLE"): 

160 self.title = line[8:].strip() 

161 elif line.startswith("# DESCRIPTION"): 

162 self.description = line[14:].strip() 

163 elif line.startswith("# DATA TYPE"): 

164 self.data_type = line[12:].strip() 

165 elif line.startswith("# MODIFICATION TYPE"): 

166 self.modification_type = line[20:].strip() 

167 elif line.startswith("# RELATES TO"): 

168 self.relates_to = line[13:].strip() 

169 elif line.startswith("# RELATED FILES"): 

170 self.related_files = line[16:].strip() 

171 elif line.startswith("# PUBLICATION DATE"): 

172 self.publication_date = line[19:].strip() 

173 elif line.startswith("# MODIFICATION DATE"): 

174 self.modification_date = line[20:].strip() 

175 elif line.startswith("# NUMBER ALTERNATIVES"): 

176 self.num_alternatives = int(line[22:].strip()) 

177 elif line.startswith("# NUMBER VOTERS"): 

178 self.num_voters = int(line[16:].strip()) 

179 elif line.startswith("# ALTERNATIVE NAME"): 

180 match = re.match(self.alt_name_pattern, line) 

181 if match: 

182 alt = int(match.group(1)) 

183 alt_name = match.group(2) 

184 if autocorrect and alt_name in self.alternatives_name.values(): 

185 tmp = 1 

186 while alt_name + "__" + str(tmp) in self.alternatives_name.values(): 

187 tmp += 1 

188 self.alternatives_name[alt] = alt_name + "__" + str(tmp) 

189 else: 

190 self.alternatives_name[alt] = alt_name 

191 

192 def write(self, filepath): 

193 pass 

194 

195 def write_metadata(self, file): 

196 """ A helper function that writes the metadata in the file. 

197 

198 :param file: The file to write into as a file object. 

199 

200 """ 

201 file.write("# FILE NAME: {}\n# TITLE: {}\n# DESCRIPTION: {}\n# DATA TYPE: {}\n# MODIFICATION TYPE: {}\n".format( 

202 self.file_name, self.title, self.description, self.data_type, self.modification_type)) 

203 file.write("# RELATES TO: {}\n# RELATED FILES: {}\n# PUBLICATION DATE: {}\n# MODIFICATION DATE: {}\n".format( 

204 self.relates_to, self.related_files, self.publication_date, self.modification_date)) 

205 

206 def __str__(self): 

207 return "PrefLib-Instance: {} <{},{}>".format(self.file_name, self.num_voters, self.num_alternatives) 

208 

209 

210class OrdinalInstance(PrefLibInstance): 

211 """ This is the class representing a PrefLib instance of ordinal preferences. It basically contains the data and 

212 information written within a PrefLib file. 

213 

214 :param file_path: The path to the file the instance is taken from. If a path is provided as a parameter, 

215 the file is immediately parsed and the instance populated accordingly. 

216 :type file_path: str, optional 

217 

218 :ivar num_unique_orders: The number of unique orders in the instance. 

219 :ivar multiplicity: A dictionary mapping each order to the number of voters who submitted that order. 

220 :ivar orders: The list of all the distinct orders in the instance. 

221 """ 

222 

223 def __init__(self, file_path=""): 

224 PrefLibInstance.__init__(self) 

225 self.file_path = file_path 

226 self.num_unique_orders = 0 

227 self.multiplicity = {} 

228 self.orders = [] 

229 

230 # If a filePath is given as argument, we parse it immediately 

231 if len(file_path) > 0: 

232 self.parse_file(file_path) 

233 

234 def type_validator(self, data_type): 

235 return data_type in ['soc', 'soi', 'toc', 'toi'] 

236 

237 def parse(self, lines, autocorrect=False): 

238 """ Parses the strings provided as argument, assuming that the latter describes an order. 

239 

240 :param lines: A list of string, each string being one line of the instance to parse. 

241 :type lines: list 

242 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

243 in the file. Default is False. 

244 :type autocorrect: bool 

245 """ 

246 

247 # The first few lines contain the metadata 

248 i = 0 

249 for i in range(len(lines)): 

250 line = lines[i].strip() 

251 if line.startswith('#'): 

252 if line.startswith("# NUMBER UNIQUE ORDERS"): 

253 self.num_unique_orders = int(line[23:].strip()) 

254 else: 

255 self.parse_metadata(line, autocorrect=autocorrect) 

256 else: 

257 break 

258 

259 # The rest of the lines are about the preferences 

260 order_pattern = re.compile(r'{[\d,]+?}|[\d,]+') 

261 for line in lines[i:]: 

262 # The first element indicates the multiplicity of the order 

263 multiplicity, order_str = line.strip().split(":") 

264 multiplicity = int(multiplicity) 

265 order = [] 

266 for group in re.findall(order_pattern, order_str): 

267 if group.startswith('{'): 

268 group = group[1:-1] 

269 order.append(tuple(int(alt.strip()) for alt in group.split(',') if len(alt) > 0)) 

270 else: 

271 for alt in group.split(','): 

272 if len(alt) > 0: 

273 order.append((int(alt.strip()),)) 

274 order = tuple(order) 

275 self.orders.append(order) 

276 if autocorrect and order in self.multiplicity: 

277 self.multiplicity[tuple(order)] += multiplicity 

278 else: 

279 self.multiplicity[tuple(order)] = multiplicity 

280 

281 if autocorrect: 

282 self.orders = list(set(self.orders)) 

283 self.num_alternatives = len(self.alternatives_name) 

284 self.num_unique_orders = len(self.orders) 

285 self.num_voters = sum(self.multiplicity.values()) 

286 

287 def parse_old(self, lines, autocorrect=False): 

288 """ Parses the strings provided as argument, assuming that the latter describes an order, in the old PrefLib 

289 format. 

290 

291 :param lines: A list of string, each string being one line of the instance to parse. 

292 :type lines: list 

293 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

294 in the file. Default is False. 

295 :type autocorrect: bool 

296 """ 

297 

298 # The first line gives us the number of alternatives, then comes the names of the alternatives 

299 self.num_alternatives = int(lines[0]) 

300 for i in range(1, self.num_alternatives + 1): 

301 alt_name = lines[i].split(",")[1].strip() 

302 if autocorrect and alt_name in self.alternatives_name.values(): 

303 tmp = 1 

304 while alt_name + "__" + str(tmp) in self.alternatives_name.values(): 

305 tmp += 1 

306 self.alternatives_name[i] = alt_name + "__" + str(tmp) 

307 else: 

308 self.alternatives_name[i] = alt_name 

309 

310 # We've reached the description of the preferences. We start by some numbers... 

311 self.num_voters = int(lines[self.num_alternatives + 1].split(",")[0]) 

312 self.num_unique_orders = int(lines[self.num_alternatives + 1].split(",")[2]) 

313 

314 # ... and finally comes the preferences 

315 for line in lines[self.num_alternatives + 2:]: 

316 # The first element indicates the multiplicity of the order 

317 elements = line.strip().split(",") 

318 multiplicity = int(elements[0]) 

319 

320 # Then we deal with the rest 

321 in_braces = False 

322 order = [] 

323 indif_class = [] 

324 for w in elements[1:]: 

325 # If there is something in w 

326 if w != "{}" and len(w) > 0: 

327 # If we are entering a series of ties (grouped by {}) 

328 if w.startswith("{"): 

329 # If w also ends with a }, we remove it 

330 if w.endswith("}"): 

331 w = w[:-1] 

332 in_braces = True 

333 indif_class.append(int(w[1:])) # The first element of w is {, so we go beyond that 

334 # If we finished reading a series of ties (grouped by {}) 

335 elif w.endswith("}"): 

336 in_braces = False 

337 indif_class.append(int(w[:-1])) # The first element of w is }, so we go beyond that 

338 order.append(tuple(indif_class)) 

339 indif_class = [] 

340 # Otherwise, we are just reading numbers 

341 else: 

342 # If we are facing ties, we add in the indifference class 

343 if in_braces: 

344 if int(w) not in indif_class: 

345 indif_class.append(int(w)) 

346 # Otherwise, we add the strict preference. 

347 else: 

348 if (int(w),) not in order: 

349 order.append((int(w),)) 

350 order = tuple(order) 

351 self.orders.append(order) 

352 if autocorrect and order in self.multiplicity: 

353 self.multiplicity[tuple(order)] += multiplicity 

354 else: 

355 self.multiplicity[tuple(order)] = multiplicity 

356 

357 if autocorrect: 

358 self.orders = list(set(self.orders)) 

359 

360 def write(self, filepath): 

361 """ Writes the instance into a file whose destination has been given as argument. If no file extension is 

362 provided the data type of the instance is used. 

363 

364 :param filepath: The destination where to write the instance. 

365 :type filepath: str 

366 """ 

367 if len(path.splitext(filepath)[1]) == 0: 

368 filepath += "." + str(self.data_type) 

369 file = open(filepath, "w", encoding="utf-8") 

370 # Writing metadata in the file header 

371 self.write_metadata(file) 

372 file.write("# NUMBER ALTERNATIVES: {}\n# NUMBER VOTERS: {}\n# NUMBER UNIQUE ORDERS: {}\n".format( 

373 self.num_alternatives, self.num_voters, self.num_unique_orders 

374 )) 

375 for alt, name in self.alternatives_name.items(): 

376 file.write("# ALTERNATIVE NAME {}: {}\n".format(alt, name)) 

377 # Writing the actual ballots with their multiplicity 

378 orders = deepcopy(self.orders) 

379 orders.sort(key=lambda o: (-self.multiplicity[o], -len(o))) 

380 for order in orders: 

381 order_str = "" 

382 for indif_class in order: 

383 if len(indif_class) == 1: 

384 order_str += str(indif_class[0]) + "," 

385 else: 

386 order_str += "{" + ",".join((str(alt) for alt in indif_class)) + "}," 

387 file.write("{}: {}\n".format(self.multiplicity[order], order_str[:-1])) 

388 file.close() 

389 

390 def vote_map(self): 

391 """ Returns the instance described as a vote map, i.e., a dictionary whose keys are orders, mapping 

392 to the number of voters with the given order as their preferences. This format can be useful for some 

393 applications. It also ensures interoperability with the old preflibtools (vote maps were the main object). 

394 

395 :return: A vote map representing the preferences in the instance. 

396 :rtype: dict of (tuples, int) 

397 """ 

398 vote_map = {} 

399 for order in self.orders: 

400 vote_map[order] = self.multiplicity[order] 

401 return vote_map 

402 

403 def full_profile(self): 

404 """ Returns a list containing all the orders appearing in the preferences, with each order appearing as 

405 many times as their multiplicity. 

406 

407 :return: A list of preferences (lists of alternatives). 

408 :rtype: list 

409 """ 

410 res = [] 

411 for order in self.orders: 

412 res += [order] * self.multiplicity[order] 

413 return res 

414 

415 def flatten_strict(self): 

416 """ Strict orders are represented as orders with indifference classes of size 1. This is somewhat heavy when 

417 working with strict preferences. This function flattens strict preferences by removing the indifference 

418 class. 

419 

420 :return: A list of tuples of preference order and multiplicity. 

421 :rtype: list 

422 """ 

423 res = [] 

424 for order in self.orders: 

425 if len(order) != self.num_alternatives: 

426 print("WARNING: You are flattening a non-strict order.") 

427 res.append((tuple(indif_class[0] for indif_class in order), self.multiplicity[order])) 

428 return res 

429 

430 def infer_type(self): 

431 """ Loops through the orders of the instance to infer whether the preferences strict and/or complete,. 

432 

433 :return: The data type of the instance. 

434 :rtype: str  

435 """ 

436 strict = True 

437 complete = True 

438 for order in self.orders: 

439 if max(len(indif_class) for indif_class in order) != 1: 

440 strict = False 

441 if len([alt for indif_class in order for alt in indif_class]) != self.num_alternatives: 

442 complete = False 

443 if not strict and not complete: 

444 return "toi" 

445 if strict and complete: 

446 return "soc" 

447 if strict and not complete: 

448 return "soi" 

449 if not strict and complete: 

450 return "toc" 

451 

452 def recompute_cardinality_param(self): 

453 """ Recomputes the basic cardinality parameters based on the order list in the instance. Numbers that are 

454 recomputed are the number of voters, the sum of voter count and the number of unique orders. 

455 """ 

456 num_voters = 0 

457 for order in self.orders: 

458 num_voters += self.multiplicity[order] 

459 self.num_voters = num_voters 

460 self.num_unique_orders = len(set(self.orders)) 

461 

462 def append_order_list(self, orders): 

463 """ Appends a vote map to the instance. That function incorporates the new orders into the instance and 

464 updates the set of alternatives if needed. 

465 

466 :param orders: A list of tuples of tuples, each tuple representing a preference order.  

467 :type orders: list 

468 """ 

469 alternatives = set(alt for order in orders for indif_class in order for alt in indif_class) 

470 for alt in alternatives: 

471 if alt not in self.alternatives_name: 

472 self.alternatives_name[alt] = "Alternative " + str(alt) 

473 self.num_alternatives = len(self.alternatives_name) 

474 

475 self.num_voters += len(orders) 

476 

477 for order in orders: 

478 multiplicity = len([o for o in orders if o == order]) 

479 if order in self.multiplicity: 

480 self.multiplicity[order] += multiplicity 

481 else: 

482 self.multiplicity[order] = multiplicity 

483 

484 self.num_unique_orders = len(self.multiplicity) 

485 self.orders += deepcopy(orders) 

486 

487 self.data_type = self.infer_type() 

488 

489 def append_vote_map(self, vote_map): 

490 """ Appends a vote map to the instance. That function incorporates the new orders into the instance and 

491 updates the set of alternatives if needed. 

492  

493 :param vote_map: A vote map representing preferences. A vote map is a dictionary whose keys represent 

494 orders (tuples of tuples of int) that are mapped to the number of voters with the given order as  

495 their preferences. We re-map the orders to tuple of tuples to be sure we are dealing with the correct 

496 type. 

497 :type vote_map: dict of (tuple, int) 

498 """ 

499 for ballot, multiplicity in vote_map.items(): 

500 order = tuple(tuple(indif_class) for indif_class in ballot) 

501 if order not in self.orders: 

502 self.orders.append(order) 

503 self.multiplicity[order] = multiplicity 

504 else: 

505 self.multiplicity[order] += multiplicity 

506 self.num_voters += multiplicity 

507 

508 for indif_class in ballot: 

509 for alt in indif_class: 

510 if alt not in self.alternatives_name: 

511 self.alternatives_name[alt] = "Alternative " + str(alt) 

512 

513 self.num_alternatives = len(self.alternatives_name) 

514 self.num_unique_orders = len(self.multiplicity) 

515 

516 self.data_type = self.infer_type() 

517 

518 def populate_IC(self, num_voters, num_alternatives): 

519 """ Populates the instance with a random profile of strict preferences taken from the impartial culture 

520 distribution. Uses :math:`preflibtools.instances.sampling.urnModel` for sampling. 

521 

522 :param num_voters: Number of orders to sample. 

523 :type num_voters: int 

524 :param num_alternatives: Number of alternatives for the sampled orders. 

525 :type num_alternatives: int 

526 """ 

527 self.append_vote_map(generate_IC(num_voters, list(range(num_alternatives)))) 

528 

529 def populate_IC_anon(self, num_voters, num_alternatives): 

530 """ Populates the instance with a random profile of strict preferences taken from the impartial anonymous 

531 culture distribution. Uses :class:`preflibtools.instances.sampling` for sampling. 

532 

533 :param num_voters: Number of orders to sample. 

534 :type num_voters: int 

535 :param num_alternatives: Number of alternatives for the sampled orders. 

536 :type num_alternatives: int 

537 """ 

538 self.append_vote_map(generate_IC_anon(num_voters, list(range(num_alternatives)))) 

539 

540 def populate_urn(self, num_voters, num_alternatives, replace): 

541 """ Populates the instance with a random profile of strict preferences taken from the urn distribution. 

542 Uses :class:`preflibtools.instances.sampling` for sampling. 

543 

544 :param num_voters: Number of orders to sample. 

545 :type num_voters: int 

546 :param num_alternatives: Number of alternatives for the sampled orders. 

547 :type num_alternatives: int 

548 :param replace: The number of replacements for the urn model. 

549 :type replace: int 

550 """ 

551 self.append_vote_map(generate_urn(num_voters, list(range(num_alternatives)), replace)) 

552 

553 def populate_mallows(self, num_voters, num_alternatives, mixture, dispersions, references): 

554 """ Populates the instance with a random profile of strict preferences taken from a mixture of Mallows' 

555 models. Uses :class:`preflibtools.instances.sampling` for sampling. 

556 

557 :param num_voters: Number of orders to sample. 

558 :type num_voters: int 

559 :param num_alternatives: Number of alternatives for the sampled orders. 

560 :type num_alternatives: int 

561 :param mixture: A list of the weights of each element of the mixture. 

562 :type mixture: list of positive numbers 

563 :param dispersions: A list of the dispersion coefficient of each element of the mixture. 

564 :type dispersions: list of float 

565 :param references: A list of the reference orders for each element of the mixture. 

566 :type references: list of tuples of tuples of int 

567 """ 

568 self.append_vote_map(generate_mallows(num_voters, num_alternatives, mixture, dispersions, references)) 

569 

570 def populate_mallows_mix(self, num_voters, num_alternatives, num_references): 

571 """ Populates the instance with a random profile of strict preferences taken from a mixture of Mallows' 

572 models for which reference points and dispersion coefficients are independently and identically  

573 distributed. Uses :class:`preflibtools.instances.sampling` for sampling. 

574 

575 :param num_voters: Number of orders to sample. 

576 :type num_voters: int 

577 :param num_alternatives: Number of alternatives for the sampled orders. 

578 :type num_alternatives: int 

579 :param num_references: Number of element 

580 :type num_references: int 

581 """ 

582 self.append_vote_map(generate_mallows_mix(num_voters, list(range(num_alternatives)), num_references)) 

583 

584 def __str__(self): 

585 return "Ordinal-Instance: {} <{},{}>".format(self.file_name, self.num_voters, self.num_alternatives) 

586 

587class ComparisonInstance(PrefLibInstance): 

588 """ To be implemented. 

589 

590 """ 

591 

592 def __init__(self): 

593 PrefLibInstance.__init__(self) 

594 

595 

596class CategoricalInstance(PrefLibInstance): 

597 """ This is the class representing a PrefLib instance of categorical preferences. It basically contains the data and 

598 information written within a PrefLib file. 

599 """ 

600 

601 def __init__(self, file_path=""): 

602 PrefLibInstance.__init__(self) 

603 self.file_path = file_path 

604 self.num_unique_preferences = 0 

605 self.multiplicity = {} 

606 self.num_categories = 0 

607 self.categories_name = {} 

608 self.preferences = [] 

609 

610 # If a filePath is given as argument, we parse it immediately 

611 if len(file_path) > 0: 

612 self.parse_file(file_path) 

613 

614 def type_validator(self, data_type): 

615 return data_type == "cat" 

616 

617 def parse(self, lines, autocorrect=False): 

618 """ Parses the strings provided as argument, assuming that the latter describes categorical preferences. 

619 

620 :param lines: A list of string, each string being one line of the instance to parse. 

621 :type lines: list 

622 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

623 in the file. Default is False. 

624 :type autocorrect: bool 

625 """ 

626 

627 # The first few lines contain the metadata 

628 i = 0 

629 cat_name_pattern = re.compile(r'# CATEGORY NAME (\d+): (.*)') 

630 for i in range(len(lines)): 

631 line = lines[i].strip() 

632 if line.startswith('#'): 

633 if line.startswith("# NUMBER UNIQUE PREFERENCES"): 

634 self.num_unique_preferences = int(line[28:].strip()) 

635 if line.startswith("# NUMBER CATEGORIES"): 

636 self.num_categories = int(line[20:].strip()) 

637 elif line.startswith("# CATEGORY NAME"): 

638 match = re.match(cat_name_pattern, line) 

639 if match: 

640 cat = int(match.group(1)) 

641 cat_name = match.group(2) 

642 if autocorrect and cat_name in self.categories_name.values(): 

643 tmp = 1 

644 while cat_name + "__" + str(tmp) in self.categories_name.values(): 

645 tmp += 1 

646 self.categories_name[cat] = cat_name + "__" + str(tmp) 

647 else: 

648 self.categories_name[cat] = cat_name 

649 else: 

650 self.parse_metadata(line, autocorrect=autocorrect) 

651 else: 

652 break 

653 

654 # The rest of the lines are about the preferences 

655 pref_pattern = re.compile(r'{[\d,]+?}|[\d,]+|{}') 

656 for line in lines[i:]: 

657 # The first element indicates the multiplicity of the order 

658 multiplicity, pref_str = line.strip().split(":") 

659 multiplicity = int(multiplicity) 

660 pref = [] 

661 for group in re.findall(pref_pattern, pref_str): 

662 if group == '{}': 

663 pref.append(tuple()) 

664 elif group.startswith('{'): 

665 group = group[1:-1] 

666 pref.append(tuple(int(alt.strip()) for alt in group.split(',') if len(alt) > 0)) 

667 else: 

668 for alt in group.split(','): 

669 if len(alt) > 0: 

670 pref.append((int(alt.strip()),)) 

671 pref = tuple(pref) 

672 self.preferences.append(pref) 

673 if autocorrect and pref in self.multiplicity: 

674 self.multiplicity[tuple(pref)] += multiplicity 

675 else: 

676 self.multiplicity[tuple(pref)] = multiplicity 

677 

678 if autocorrect: 

679 self.preferences = list(set(self.preferences)) 

680 self.num_alternatives = len(self.alternatives_name) 

681 self.num_unique_preferences = len(self.preferences) 

682 self.num_voters = sum(self.multiplicity.values()) 

683 

684 def write(self, filepath): 

685 """ Writes the instance into a file whose destination has been given as argument. If no file extension is 

686 provided the data type of the instance is used. 

687 

688 :param filepath: The destination where to write the instance. 

689 :type filepath: str 

690 """ 

691 if len(path.splitext(filepath)[1]) == 0: 

692 filepath += "." + str(self.data_type) 

693 file = open(filepath, "w", encoding="utf-8") 

694 # Writing metadata in the file header 

695 self.write_metadata(file) 

696 file.write("# NUMBER ALTERNATIVES: {}\n# NUMBER VOTERS: {}\n# NUMBER UNIQUE PREFERENCES: {}\n".format( 

697 self.num_alternatives, self.num_voters, self.num_unique_preferences 

698 )) 

699 file.write("# NUMBER CATEGORIES: {}\n".format(self.num_categories)) 

700 for cat, name in self.categories_name.items(): 

701 file.write("# CATEGORY NAME {}: {}\n".format(cat, name)) 

702 for alt, name in self.alternatives_name.items(): 

703 file.write("# ALTERNATIVE NAME {}: {}\n".format(alt, name)) 

704 # Writing the actual ballots with their multiplicity 

705 preferences = deepcopy(self.preferences) 

706 preferences.sort(key=lambda o: (-self.multiplicity[o], -len(o))) 

707 for pref in preferences: 

708 pref_str = "" 

709 for category in pref: 

710 if len(category) == 0: 

711 pref_str += '{},' 

712 elif len(category) == 1: 

713 pref_str += str(category[0]) + "," 

714 else: 

715 pref_str += "{" + ",".join((str(alt) for alt in category)) + "}," 

716 file.write("{}: {}\n".format(self.multiplicity[pref], pref_str[:-1])) 

717 file.close() 

718 

719 def __str__(self): 

720 return "Categorical-Instance: {} <{},{}>".format(self.file_name, self.num_voters, self.num_alternatives) 

721 

722 

723class WeightedDiGraph(object): 

724 """ This class is used to represent weighted directed graphs. 

725 

726 :ivar dict: The dictionary representing the graph mapping each node to its neighbourhood (set of nodes 

727 to which it is connected). A node can be of any hashable type. 

728 :ivar weight: The dictionary mapping every node to its weight. 

729 """ 

730 

731 def __init__(self): 

732 self.node_mapping = dict() 

733 self.weights = dict() 

734 

735 def neighbours(self, node): 

736 """ Returns all the neighbours of a given node. 

737 

738 :param node: The node whose neighbours we want to know. 

739 

740 :return: The set of the neighbours of the node. 

741 :rtype: set 

742 """ 

743 return self.node_mapping[node] 

744 

745 def outgoing_edges(self, node): 

746 """ Returns all the edges leaving a given node. 

747 

748 :param node: The node whose edges we want to get. 

749 

750 :return: The set of the tuples (node, neighbour, edgeWeight) representing (weighted) edges. 

751 :rtype: set of tuples 

752 """ 

753 return {(node, n, self.weights[(node, n)]) for n in self.node_mapping[node]} 

754 

755 def add_node(self, node): 

756 """ Adds a node to the graph if the node does not already exist. 

757 

758 :param node: The node to add. 

759 """ 

760 if node not in self.node_mapping: 

761 self.node_mapping[node] = set() 

762 

763 def add_edge(self, node1, node2, weight): 

764 """ Adds an edge to the graph. If the nodes do not exist in the graph, those are also added. 

765 

766 :param node1: The departure node of the edge. 

767 :param node2: The arrival node of the edge. 

768 :param weight: The weight of the edge. 

769 """ 

770 self.add_node(node1) 

771 self.add_node(node2) 

772 self.node_mapping[node1].add(node2) 

773 self.weights[(node1, node2)] = weight 

774 

775 def edges(self): 

776 """ Returns the set of all the edges of the graph. 

777 

778 :return: A set of tuples (node, neighbour, weight) representing (weighted) edges. 

779 :rtype: set of tuples 

780 """ 

781 return {(n1, n2, self.weights[(n1, n2)]) for n1 in self.node_mapping for n2 in self.node_mapping[n1]} 

782 

783 def nodes(self): 

784 """ Returns the set of all the nodes of the graph. 

785 

786 :return: The set of all the nodes of the graph. 

787 :rtype: set 

788 """ 

789 return self.node_mapping.keys() 

790 

791 def __str__(self): 

792 """ Returns the string used when printing the graph """ 

793 return "Graph with {} vertices and {} edges :\n".format(len(self.node_mapping), len(self.edges())) 

794 

795 

796class MatchingInstance(PrefLibInstance, WeightedDiGraph): 

797 """ This is the class representing a PrefLib instance for matching preferences. It basically contains the data and 

798 information written within a PrefLib file. 

799 

800 """ 

801 

802 def __init__(self, file_path = ""): 

803 PrefLibInstance.__init__(self) 

804 WeightedDiGraph.__init__(self) 

805 self.num_edges = 0 

806 self.file_path = file_path 

807 

808 # If a filePath is given as argument, we parse it immediately 

809 if len(file_path) > 0: 

810 self.parse_file(file_path) 

811 

812 def type_validator(self, data_type): 

813 return data_type == "wmd" 

814 

815 def parse(self, lines, autocorrect=False): 

816 """ Parses the strings, assuming that the latter describes a graph. 

817 

818 :param lines: A list of string, each string being one line of the instance to parse. 

819 :type lines: list 

820 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

821 in the file. Default is False. 

822 :type autocorrect: bool 

823 """ 

824 # The first few lines contain the metadata 

825 i = 0 

826 for i in range(len(lines)): 

827 line = lines[i].strip() 

828 if line.startswith('#'): 

829 if line.startswith("# NUMBER EDGES"): 

830 self.num_edges = int(line[15:].strip()) 

831 else: 

832 self.parse_metadata(line, autocorrect=autocorrect) 

833 else: 

834 break 

835 self.num_voters = self.num_alternatives 

836 

837 for line in lines[i:]: 

838 (vertex1, vertex2, weight) = line.strip().split(",") 

839 self.add_edge(int(vertex1), int(vertex2), float(weight)) 

840 self.num_edges = sum(len(edge_set) for edge_set in self.node_mapping.values()) 

841 

842 def parse_old(self, lines, autocorrect=False): 

843 """ Parses the strings, assuming that the latter describes a graph. 

844 

845 :param lines: A list of string, each string being one line of the instance to parse. 

846 :type lines: list 

847 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors 

848 in the file. Default is False. 

849 :type autocorrect: bool 

850 """ 

851 

852 self.num_alternatives = int(lines[0].strip().split(",")[0]) 

853 self.num_voters = int(lines[0].strip().split(",")[1]) 

854 for i in range(1, self.num_alternatives + 1): 

855 self.alternatives_name[i] = lines[i].split(",")[1].strip() 

856 

857 # Skip the lines that describe the data 

858 graph_first_line = self.num_alternatives + 1 

859 for line in lines[graph_first_line:]: 

860 (vertex1, vertex2, weight) = line.strip().split(",") 

861 weight = float(weight) 

862 vertex1 = int(vertex1) 

863 vertex2 = int(vertex2) 

864 self.add_edge(vertex1, vertex2, weight) 

865 self.num_edges = sum(len(edge_set) for edge_set in self.node_mapping.values()) 

866 

867 def write(self, filepath): 

868 """ Writes the instance into a file whose destination has been given as argument, assuming the instance 

869 represents a graph. If no file extension is provided the data type of the instance is used. 

870 

871 :param filepath: The destination where to write the instance. 

872 :type filepath: str 

873 """ 

874 

875 if len(path.splitext(filepath)[1]) == 0: 

876 filepath += "." + str(self.data_type) 

877 file = open(filepath, "w", encoding="utf-8") 

878 # Writing metadata in the file header 

879 self.write_metadata(file) 

880 file.write("# NUMBER ALTERNATIVES: {}\n# NUMBER EDGES: {}\n".format( 

881 self.num_alternatives, self.num_edges, 

882 )) 

883 for alt, name in self.alternatives_name.items(): 

884 file.write("# ALTERNATIVE NAME {}: {}\n".format(alt, name)) 

885 

886 # Writing the actual graph 

887 nodes = sorted(list(self.nodes())) 

888 for n in nodes: 

889 out_edges = sorted(list(self.outgoing_edges(n)), key=lambda x: x[1]) 

890 for (vertex1, vertex2, weight) in out_edges: 

891 file.write("{},{},{}\n".format(vertex1, vertex2, weight)) 

892 file.close() 

893 

894 def __str__(self): 

895 return "Matching-Instance: {} <{},{}>".format(self.file_name, self.num_voters, self.num_alternatives) 

896 

897 

898def get_parsed_instance(file_path): 

899 """ Infers from the extension of the file given as input the correct instance to use. Parses the file and return 

900 the instance. 

901 

902 :param file_path: The path to the file to be parsed. 

903 :type file_path: str 

904 

905 :return: The instance with the file already parsed. 

906 :rtype: :class:`preflibtools.instances.preflibinstance.PrefLibInstance` 

907 """ 

908 extension = os.path.splitext(file_path)[1] 

909 if extension in ["soc", "soi", "toc", "toi"]: 

910 return OrdinalInstance(file_path) 

911 elif extension == "cat": 

912 return CategoricalInstance(file_path) 

913 elif extension == "wmd": 

914 return MatchingInstance(file_path)