Coverage for /home/simon/Git/preflibtools/preflibtools/instances/preflibinstance.py: 70%
446 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-22 11:34 +0200
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-22 11:34 +0200
1""" This module describes the main class to deal with PrefLib instances..
2"""
3import os.path
5from .sampling import *
7from copy import deepcopy
8from os import path
10import urllib
11import re
14class PrefLibInstance(object):
15 """ This class provide a general template to implement specific classes representing PrefLib instances. It should
16 mainly be used as an abstract class.
18 :ivar file_path: The path to the file the instance is taken from.
19 :ivar file_name: The name of the file the instance is taken from.
20 :ivar data_type: The data type of the instance. Whenever a function only applies to certain types of data
21 (strict and complete orders for instance), we do so by checking this value.
22 :ivar modification_type: The modification type of the file: original if it represents original data; induced
23 or imbued it is derived from some other data; synthetic if it is synthetically generated.
24 :ivar relates_to: The data file this instance relates to, typically the original data file for induced
25 preferences.
26 :ivar related_files: The data files that are to the instance.
27 :ivar title: The title of the instance.
28 :ivar description: A description of the instance.
29 :ivar publication_date: Date at which the corresponding file has been added to PrefLib.com.
30 :ivar modification_date: Last date the file has been modified on PrefLib.com.
31 :ivar num_alternatives: The number of alternatives in the instance.
32 :ivar alternatives_name: A dictionary mapping alternative (int) to their name (str).
33 :ivar num_voters: The number of voters in the instance.
34 """
36 def __init__(self):
37 self.file_path = ""
38 self.file_name = ""
39 self.data_type = ""
40 self.modification_type = ""
41 self.relates_to = ""
42 self.related_files = ""
43 self.title = ""
44 self.description = ""
45 self.publication_date = ""
46 self.modification_date = ""
47 self.num_alternatives = 0
48 self.alternatives_name = {}
49 self.num_voters = 0
50 self.alt_name_pattern = re.compile(r'# ALTERNATIVE NAME (\d+): (.*)')
52 def type_validator(self, data_type):
53 """ Returns a boolean indicating whether the data_type given as argument is a valid one for the python class.
55 :param data_type: A strong representing a data type.
56 :type data_type: str
57 :return: True if the data type is valid for the class and False otherwise.
58 :rtype: bool
59 """
60 pass
62 def parse(self, lines, autocorrect=False):
63 pass
65 def parse_lines(self, lines, autocorrect=False):
66 """ Parses the lines provided as argument. The parser to be used is deducted from the instance's inner value of
67 data_type.
69 :param lines: A list of string, each string being one line of the instance to parse.
70 :type lines: list
71 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
72 in the file. Default is False.
73 :type autocorrect: bool
74 """
76 if self.type_validator(self.data_type):
77 self.parse(lines, autocorrect=autocorrect)
78 else:
79 raise TypeError("File extension " + str(self.data_type) + " is not valid for an ordinal PrefLib instance." +
80 " This file cannot be parsed.")
82 def parse_file(self, filepath, autocorrect=False):
83 """ Parses the file whose path is provided as argument and populates the PreflibInstance object accordingly.
84 The parser to be used is deduced from the file extension.
86 :param filepath: The path to the file to be parsed.
87 :type filepath: str
88 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
89 in the file. Default is False.
90 :type autocorrect: bool
91 """
93 # Populating basic properties of the instance
94 self.file_path = filepath
95 self.file_name = path.split(filepath)[1]
96 self.data_type = path.splitext(filepath)[1][1:]
98 # Read the file
99 file = open(filepath, "r", encoding="utf-8")
100 lines = file.readlines()
101 file.close()
103 self.parse_lines(lines, autocorrect=autocorrect)
105 def parse_str(self, string, data_type, file_name="", autocorrect=False):
106 """ Parses the string provided as argument and populates the PreflibInstance object accordingly.
107 The parser to be used is deduced from the file extension passed as argument.
109 :param string: The string to parse.
110 :type string: str
111 :param data_type: The data type represented by the string.
112 :type data_type: str
113 :param file_name: The value to store in the file_name member of the instance. Default is the empty string.
114 :type file_name: str
115 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
116 in the file. Default is False.
117 :type autocorrect: bool
118 """
120 self.file_path = "parsed_from_string"
121 self.file_name = file_name
122 self.data_type = data_type
124 self.parse_lines(string.splitlines(), autocorrect=autocorrect)
126 def parse_url(self, url, autocorrect=False):
127 """ Parses the file located at the provided URL and populates the PreflibInstance object accordingly.
128 The parser to be used (whether the file describes a graph or an order for instance) is deduced based
129 on the file extension.
131 :param url: The target URL.
132 :type url: str
133 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
134 in the file. Default is False.
135 :type autocorrect: bool
136 """
138 data = urllib.request.urlopen(url)
139 lines = [line.decode("utf-8").strip() for line in data]
140 data.close()
142 self.file_path = url
143 self.file_name = url.split('/')[-1].split('.')[0]
144 self.data_type = url.split('.')[-1]
146 self.parse_lines(lines, autocorrect=autocorrect)
148 def parse_metadata(self, line, autocorrect=False):
149 """ A helper function that parses metadata.
151 :param line: The line to parse.
152 :type line: str
153 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
154 in the file. Default is False.
155 :type autocorrect: bool
156 """
157 if line.startswith("# FILE NAME"):
158 self.file_name = line[12:].strip()
159 elif line.startswith("# TITLE"):
160 self.title = line[8:].strip()
161 elif line.startswith("# DESCRIPTION"):
162 self.description = line[14:].strip()
163 elif line.startswith("# DATA TYPE"):
164 self.data_type = line[12:].strip()
165 elif line.startswith("# MODIFICATION TYPE"):
166 self.modification_type = line[20:].strip()
167 elif line.startswith("# RELATES TO"):
168 self.relates_to = line[13:].strip()
169 elif line.startswith("# RELATED FILES"):
170 self.related_files = line[16:].strip()
171 elif line.startswith("# PUBLICATION DATE"):
172 self.publication_date = line[19:].strip()
173 elif line.startswith("# MODIFICATION DATE"):
174 self.modification_date = line[20:].strip()
175 elif line.startswith("# NUMBER ALTERNATIVES"):
176 self.num_alternatives = int(line[22:].strip())
177 elif line.startswith("# NUMBER VOTERS"):
178 self.num_voters = int(line[16:].strip())
179 elif line.startswith("# ALTERNATIVE NAME"):
180 match = re.match(self.alt_name_pattern, line)
181 if match:
182 alt = int(match.group(1))
183 alt_name = match.group(2)
184 if autocorrect and alt_name in self.alternatives_name.values():
185 tmp = 1
186 while alt_name + "__" + str(tmp) in self.alternatives_name.values():
187 tmp += 1
188 self.alternatives_name[alt] = alt_name + "__" + str(tmp)
189 else:
190 self.alternatives_name[alt] = alt_name
192 def write(self, filepath):
193 pass
195 def write_metadata(self, file):
196 """ A helper function that writes the metadata in the file.
198 :param file: The file to write into as a file object.
200 """
201 file.write("# FILE NAME: {}\n# TITLE: {}\n# DESCRIPTION: {}\n# DATA TYPE: {}\n# MODIFICATION TYPE: {}\n".format(
202 self.file_name, self.title, self.description, self.data_type, self.modification_type))
203 file.write("# RELATES TO: {}\n# RELATED FILES: {}\n# PUBLICATION DATE: {}\n# MODIFICATION DATE: {}\n".format(
204 self.relates_to, self.related_files, self.publication_date, self.modification_date))
206 def __str__(self):
207 return "PrefLib-Instance: {} <{},{}>".format(self.file_name, self.num_voters, self.num_alternatives)
210class OrdinalInstance(PrefLibInstance):
211 """ This is the class representing a PrefLib instance of ordinal preferences. It basically contains the data and
212 information written within a PrefLib file.
214 :param file_path: The path to the file the instance is taken from. If a path is provided as a parameter,
215 the file is immediately parsed and the instance populated accordingly.
216 :type file_path: str, optional
218 :ivar num_unique_orders: The number of unique orders in the instance.
219 :ivar multiplicity: A dictionary mapping each order to the number of voters who submitted that order.
220 :ivar orders: The list of all the distinct orders in the instance.
221 """
223 def __init__(self, file_path=""):
224 PrefLibInstance.__init__(self)
225 self.file_path = file_path
226 self.num_unique_orders = 0
227 self.multiplicity = {}
228 self.orders = []
230 # If a filePath is given as argument, we parse it immediately
231 if len(file_path) > 0:
232 self.parse_file(file_path)
234 def type_validator(self, data_type):
235 return data_type in ['soc', 'soi', 'toc', 'toi']
237 def parse(self, lines, autocorrect=False):
238 """ Parses the strings provided as argument, assuming that the latter describes an order.
240 :param lines: A list of string, each string being one line of the instance to parse.
241 :type lines: list
242 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
243 in the file. Default is False.
244 :type autocorrect: bool
245 """
247 # The first few lines contain the metadata
248 i = 0
249 for i in range(len(lines)):
250 line = lines[i].strip()
251 if line.startswith('#'):
252 if line.startswith("# NUMBER UNIQUE ORDERS"):
253 self.num_unique_orders = int(line[23:].strip())
254 else:
255 self.parse_metadata(line, autocorrect=autocorrect)
256 else:
257 break
259 # The rest of the lines are about the preferences
260 order_pattern = re.compile(r'{[\d,]+?}|[\d,]+')
261 for line in lines[i:]:
262 # The first element indicates the multiplicity of the order
263 multiplicity, order_str = line.strip().split(":")
264 multiplicity = int(multiplicity)
265 order = []
266 for group in re.findall(order_pattern, order_str):
267 if group.startswith('{'):
268 group = group[1:-1]
269 order.append(tuple(int(alt.strip()) for alt in group.split(',') if len(alt) > 0))
270 else:
271 for alt in group.split(','):
272 if len(alt) > 0:
273 order.append((int(alt.strip()),))
274 order = tuple(order)
275 self.orders.append(order)
276 if autocorrect and order in self.multiplicity:
277 self.multiplicity[tuple(order)] += multiplicity
278 else:
279 self.multiplicity[tuple(order)] = multiplicity
281 if autocorrect:
282 self.orders = list(set(self.orders))
283 self.num_alternatives = len(self.alternatives_name)
284 self.num_unique_orders = len(self.orders)
285 self.num_voters = sum(self.multiplicity.values())
287 def parse_old(self, lines, autocorrect=False):
288 """ Parses the strings provided as argument, assuming that the latter describes an order, in the old PrefLib
289 format.
291 :param lines: A list of string, each string being one line of the instance to parse.
292 :type lines: list
293 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
294 in the file. Default is False.
295 :type autocorrect: bool
296 """
298 # The first line gives us the number of alternatives, then comes the names of the alternatives
299 self.num_alternatives = int(lines[0])
300 for i in range(1, self.num_alternatives + 1):
301 alt_name = lines[i].split(",")[1].strip()
302 if autocorrect and alt_name in self.alternatives_name.values():
303 tmp = 1
304 while alt_name + "__" + str(tmp) in self.alternatives_name.values():
305 tmp += 1
306 self.alternatives_name[i] = alt_name + "__" + str(tmp)
307 else:
308 self.alternatives_name[i] = alt_name
310 # We've reached the description of the preferences. We start by some numbers...
311 self.num_voters = int(lines[self.num_alternatives + 1].split(",")[0])
312 self.num_unique_orders = int(lines[self.num_alternatives + 1].split(",")[2])
314 # ... and finally comes the preferences
315 for line in lines[self.num_alternatives + 2:]:
316 # The first element indicates the multiplicity of the order
317 elements = line.strip().split(",")
318 multiplicity = int(elements[0])
320 # Then we deal with the rest
321 in_braces = False
322 order = []
323 indif_class = []
324 for w in elements[1:]:
325 # If there is something in w
326 if w != "{}" and len(w) > 0:
327 # If we are entering a series of ties (grouped by {})
328 if w.startswith("{"):
329 # If w also ends with a }, we remove it
330 if w.endswith("}"):
331 w = w[:-1]
332 in_braces = True
333 indif_class.append(int(w[1:])) # The first element of w is {, so we go beyond that
334 # If we finished reading a series of ties (grouped by {})
335 elif w.endswith("}"):
336 in_braces = False
337 indif_class.append(int(w[:-1])) # The first element of w is }, so we go beyond that
338 order.append(tuple(indif_class))
339 indif_class = []
340 # Otherwise, we are just reading numbers
341 else:
342 # If we are facing ties, we add in the indifference class
343 if in_braces:
344 if int(w) not in indif_class:
345 indif_class.append(int(w))
346 # Otherwise, we add the strict preference.
347 else:
348 if (int(w),) not in order:
349 order.append((int(w),))
350 order = tuple(order)
351 self.orders.append(order)
352 if autocorrect and order in self.multiplicity:
353 self.multiplicity[tuple(order)] += multiplicity
354 else:
355 self.multiplicity[tuple(order)] = multiplicity
357 if autocorrect:
358 self.orders = list(set(self.orders))
360 def write(self, filepath):
361 """ Writes the instance into a file whose destination has been given as argument. If no file extension is
362 provided the data type of the instance is used.
364 :param filepath: The destination where to write the instance.
365 :type filepath: str
366 """
367 if len(path.splitext(filepath)[1]) == 0:
368 filepath += "." + str(self.data_type)
369 file = open(filepath, "w", encoding="utf-8")
370 # Writing metadata in the file header
371 self.write_metadata(file)
372 file.write("# NUMBER ALTERNATIVES: {}\n# NUMBER VOTERS: {}\n# NUMBER UNIQUE ORDERS: {}\n".format(
373 self.num_alternatives, self.num_voters, self.num_unique_orders
374 ))
375 for alt, name in self.alternatives_name.items():
376 file.write("# ALTERNATIVE NAME {}: {}\n".format(alt, name))
377 # Writing the actual ballots with their multiplicity
378 orders = deepcopy(self.orders)
379 orders.sort(key=lambda o: (-self.multiplicity[o], -len(o)))
380 for order in orders:
381 order_str = ""
382 for indif_class in order:
383 if len(indif_class) == 1:
384 order_str += str(indif_class[0]) + ","
385 else:
386 order_str += "{" + ",".join((str(alt) for alt in indif_class)) + "},"
387 file.write("{}: {}\n".format(self.multiplicity[order], order_str[:-1]))
388 file.close()
390 def vote_map(self):
391 """ Returns the instance described as a vote map, i.e., a dictionary whose keys are orders, mapping
392 to the number of voters with the given order as their preferences. This format can be useful for some
393 applications. It also ensures interoperability with the old preflibtools (vote maps were the main object).
395 :return: A vote map representing the preferences in the instance.
396 :rtype: dict of (tuples, int)
397 """
398 vote_map = {}
399 for order in self.orders:
400 vote_map[order] = self.multiplicity[order]
401 return vote_map
403 def full_profile(self):
404 """ Returns a list containing all the orders appearing in the preferences, with each order appearing as
405 many times as their multiplicity.
407 :return: A list of preferences (lists of alternatives).
408 :rtype: list
409 """
410 res = []
411 for order in self.orders:
412 res += [order] * self.multiplicity[order]
413 return res
415 def flatten_strict(self):
416 """ Strict orders are represented as orders with indifference classes of size 1. This is somewhat heavy when
417 working with strict preferences. This function flattens strict preferences by removing the indifference
418 class.
420 :return: A list of tuples of preference order and multiplicity.
421 :rtype: list
422 """
423 res = []
424 for order in self.orders:
425 if len(order) != self.num_alternatives:
426 print("WARNING: You are flattening a non-strict order.")
427 res.append((tuple(indif_class[0] for indif_class in order), self.multiplicity[order]))
428 return res
430 def infer_type(self):
431 """ Loops through the orders of the instance to infer whether the preferences strict and/or complete,.
433 :return: The data type of the instance.
434 :rtype: str
435 """
436 strict = True
437 complete = True
438 for order in self.orders:
439 if max(len(indif_class) for indif_class in order) != 1:
440 strict = False
441 if len([alt for indif_class in order for alt in indif_class]) != self.num_alternatives:
442 complete = False
443 if not strict and not complete:
444 return "toi"
445 if strict and complete:
446 return "soc"
447 if strict and not complete:
448 return "soi"
449 if not strict and complete:
450 return "toc"
452 def recompute_cardinality_param(self):
453 """ Recomputes the basic cardinality parameters based on the order list in the instance. Numbers that are
454 recomputed are the number of voters, the sum of voter count and the number of unique orders.
455 """
456 num_voters = 0
457 for order in self.orders:
458 num_voters += self.multiplicity[order]
459 self.num_voters = num_voters
460 self.num_unique_orders = len(set(self.orders))
462 def append_order_list(self, orders):
463 """ Appends a vote map to the instance. That function incorporates the new orders into the instance and
464 updates the set of alternatives if needed.
466 :param orders: A list of tuples of tuples, each tuple representing a preference order.
467 :type orders: list
468 """
469 alternatives = set(alt for order in orders for indif_class in order for alt in indif_class)
470 for alt in alternatives:
471 if alt not in self.alternatives_name:
472 self.alternatives_name[alt] = "Alternative " + str(alt)
473 self.num_alternatives = len(self.alternatives_name)
475 self.num_voters += len(orders)
477 for order in orders:
478 multiplicity = len([o for o in orders if o == order])
479 if order in self.multiplicity:
480 self.multiplicity[order] += multiplicity
481 else:
482 self.multiplicity[order] = multiplicity
484 self.num_unique_orders = len(self.multiplicity)
485 self.orders += deepcopy(orders)
487 self.data_type = self.infer_type()
489 def append_vote_map(self, vote_map):
490 """ Appends a vote map to the instance. That function incorporates the new orders into the instance and
491 updates the set of alternatives if needed.
493 :param vote_map: A vote map representing preferences. A vote map is a dictionary whose keys represent
494 orders (tuples of tuples of int) that are mapped to the number of voters with the given order as
495 their preferences. We re-map the orders to tuple of tuples to be sure we are dealing with the correct
496 type.
497 :type vote_map: dict of (tuple, int)
498 """
499 for ballot, multiplicity in vote_map.items():
500 order = tuple(tuple(indif_class) for indif_class in ballot)
501 if order not in self.orders:
502 self.orders.append(order)
503 self.multiplicity[order] = multiplicity
504 else:
505 self.multiplicity[order] += multiplicity
506 self.num_voters += multiplicity
508 for indif_class in ballot:
509 for alt in indif_class:
510 if alt not in self.alternatives_name:
511 self.alternatives_name[alt] = "Alternative " + str(alt)
513 self.num_alternatives = len(self.alternatives_name)
514 self.num_unique_orders = len(self.multiplicity)
516 self.data_type = self.infer_type()
518 def populate_IC(self, num_voters, num_alternatives):
519 """ Populates the instance with a random profile of strict preferences taken from the impartial culture
520 distribution. Uses :math:`preflibtools.instances.sampling.urnModel` for sampling.
522 :param num_voters: Number of orders to sample.
523 :type num_voters: int
524 :param num_alternatives: Number of alternatives for the sampled orders.
525 :type num_alternatives: int
526 """
527 self.append_vote_map(generate_IC(num_voters, list(range(num_alternatives))))
529 def populate_IC_anon(self, num_voters, num_alternatives):
530 """ Populates the instance with a random profile of strict preferences taken from the impartial anonymous
531 culture distribution. Uses :class:`preflibtools.instances.sampling` for sampling.
533 :param num_voters: Number of orders to sample.
534 :type num_voters: int
535 :param num_alternatives: Number of alternatives for the sampled orders.
536 :type num_alternatives: int
537 """
538 self.append_vote_map(generate_IC_anon(num_voters, list(range(num_alternatives))))
540 def populate_urn(self, num_voters, num_alternatives, replace):
541 """ Populates the instance with a random profile of strict preferences taken from the urn distribution.
542 Uses :class:`preflibtools.instances.sampling` for sampling.
544 :param num_voters: Number of orders to sample.
545 :type num_voters: int
546 :param num_alternatives: Number of alternatives for the sampled orders.
547 :type num_alternatives: int
548 :param replace: The number of replacements for the urn model.
549 :type replace: int
550 """
551 self.append_vote_map(generate_urn(num_voters, list(range(num_alternatives)), replace))
553 def populate_mallows(self, num_voters, num_alternatives, mixture, dispersions, references):
554 """ Populates the instance with a random profile of strict preferences taken from a mixture of Mallows'
555 models. Uses :class:`preflibtools.instances.sampling` for sampling.
557 :param num_voters: Number of orders to sample.
558 :type num_voters: int
559 :param num_alternatives: Number of alternatives for the sampled orders.
560 :type num_alternatives: int
561 :param mixture: A list of the weights of each element of the mixture.
562 :type mixture: list of positive numbers
563 :param dispersions: A list of the dispersion coefficient of each element of the mixture.
564 :type dispersions: list of float
565 :param references: A list of the reference orders for each element of the mixture.
566 :type references: list of tuples of tuples of int
567 """
568 self.append_vote_map(generate_mallows(num_voters, num_alternatives, mixture, dispersions, references))
570 def populate_mallows_mix(self, num_voters, num_alternatives, num_references):
571 """ Populates the instance with a random profile of strict preferences taken from a mixture of Mallows'
572 models for which reference points and dispersion coefficients are independently and identically
573 distributed. Uses :class:`preflibtools.instances.sampling` for sampling.
575 :param num_voters: Number of orders to sample.
576 :type num_voters: int
577 :param num_alternatives: Number of alternatives for the sampled orders.
578 :type num_alternatives: int
579 :param num_references: Number of element
580 :type num_references: int
581 """
582 self.append_vote_map(generate_mallows_mix(num_voters, list(range(num_alternatives)), num_references))
584 def __str__(self):
585 return "Ordinal-Instance: {} <{},{}>".format(self.file_name, self.num_voters, self.num_alternatives)
587class ComparisonInstance(PrefLibInstance):
588 """ To be implemented.
590 """
592 def __init__(self):
593 PrefLibInstance.__init__(self)
596class CategoricalInstance(PrefLibInstance):
597 """ This is the class representing a PrefLib instance of categorical preferences. It basically contains the data and
598 information written within a PrefLib file.
599 """
601 def __init__(self, file_path=""):
602 PrefLibInstance.__init__(self)
603 self.file_path = file_path
604 self.num_unique_preferences = 0
605 self.multiplicity = {}
606 self.num_categories = 0
607 self.categories_name = {}
608 self.preferences = []
610 # If a filePath is given as argument, we parse it immediately
611 if len(file_path) > 0:
612 self.parse_file(file_path)
614 def type_validator(self, data_type):
615 return data_type == "cat"
617 def parse(self, lines, autocorrect=False):
618 """ Parses the strings provided as argument, assuming that the latter describes categorical preferences.
620 :param lines: A list of string, each string being one line of the instance to parse.
621 :type lines: list
622 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
623 in the file. Default is False.
624 :type autocorrect: bool
625 """
627 # The first few lines contain the metadata
628 i = 0
629 cat_name_pattern = re.compile(r'# CATEGORY NAME (\d+): (.*)')
630 for i in range(len(lines)):
631 line = lines[i].strip()
632 if line.startswith('#'):
633 if line.startswith("# NUMBER UNIQUE PREFERENCES"):
634 self.num_unique_preferences = int(line[28:].strip())
635 if line.startswith("# NUMBER CATEGORIES"):
636 self.num_categories = int(line[20:].strip())
637 elif line.startswith("# CATEGORY NAME"):
638 match = re.match(cat_name_pattern, line)
639 if match:
640 cat = int(match.group(1))
641 cat_name = match.group(2)
642 if autocorrect and cat_name in self.categories_name.values():
643 tmp = 1
644 while cat_name + "__" + str(tmp) in self.categories_name.values():
645 tmp += 1
646 self.categories_name[cat] = cat_name + "__" + str(tmp)
647 else:
648 self.categories_name[cat] = cat_name
649 else:
650 self.parse_metadata(line, autocorrect=autocorrect)
651 else:
652 break
654 # The rest of the lines are about the preferences
655 pref_pattern = re.compile(r'{[\d,]+?}|[\d,]+|{}')
656 for line in lines[i:]:
657 # The first element indicates the multiplicity of the order
658 multiplicity, pref_str = line.strip().split(":")
659 multiplicity = int(multiplicity)
660 pref = []
661 for group in re.findall(pref_pattern, pref_str):
662 if group == '{}':
663 pref.append(tuple())
664 elif group.startswith('{'):
665 group = group[1:-1]
666 pref.append(tuple(int(alt.strip()) for alt in group.split(',') if len(alt) > 0))
667 else:
668 for alt in group.split(','):
669 if len(alt) > 0:
670 pref.append((int(alt.strip()),))
671 pref = tuple(pref)
672 self.preferences.append(pref)
673 if autocorrect and pref in self.multiplicity:
674 self.multiplicity[tuple(pref)] += multiplicity
675 else:
676 self.multiplicity[tuple(pref)] = multiplicity
678 if autocorrect:
679 self.preferences = list(set(self.preferences))
680 self.num_alternatives = len(self.alternatives_name)
681 self.num_unique_preferences = len(self.preferences)
682 self.num_voters = sum(self.multiplicity.values())
684 def write(self, filepath):
685 """ Writes the instance into a file whose destination has been given as argument. If no file extension is
686 provided the data type of the instance is used.
688 :param filepath: The destination where to write the instance.
689 :type filepath: str
690 """
691 if len(path.splitext(filepath)[1]) == 0:
692 filepath += "." + str(self.data_type)
693 file = open(filepath, "w", encoding="utf-8")
694 # Writing metadata in the file header
695 self.write_metadata(file)
696 file.write("# NUMBER ALTERNATIVES: {}\n# NUMBER VOTERS: {}\n# NUMBER UNIQUE PREFERENCES: {}\n".format(
697 self.num_alternatives, self.num_voters, self.num_unique_preferences
698 ))
699 file.write("# NUMBER CATEGORIES: {}\n".format(self.num_categories))
700 for cat, name in self.categories_name.items():
701 file.write("# CATEGORY NAME {}: {}\n".format(cat, name))
702 for alt, name in self.alternatives_name.items():
703 file.write("# ALTERNATIVE NAME {}: {}\n".format(alt, name))
704 # Writing the actual ballots with their multiplicity
705 preferences = deepcopy(self.preferences)
706 preferences.sort(key=lambda o: (-self.multiplicity[o], -len(o)))
707 for pref in preferences:
708 pref_str = ""
709 for category in pref:
710 if len(category) == 0:
711 pref_str += '{},'
712 elif len(category) == 1:
713 pref_str += str(category[0]) + ","
714 else:
715 pref_str += "{" + ",".join((str(alt) for alt in category)) + "},"
716 file.write("{}: {}\n".format(self.multiplicity[pref], pref_str[:-1]))
717 file.close()
719 def __str__(self):
720 return "Categorical-Instance: {} <{},{}>".format(self.file_name, self.num_voters, self.num_alternatives)
723class WeightedDiGraph(object):
724 """ This class is used to represent weighted directed graphs.
726 :ivar dict: The dictionary representing the graph mapping each node to its neighbourhood (set of nodes
727 to which it is connected). A node can be of any hashable type.
728 :ivar weight: The dictionary mapping every node to its weight.
729 """
731 def __init__(self):
732 self.node_mapping = dict()
733 self.weights = dict()
735 def neighbours(self, node):
736 """ Returns all the neighbours of a given node.
738 :param node: The node whose neighbours we want to know.
740 :return: The set of the neighbours of the node.
741 :rtype: set
742 """
743 return self.node_mapping[node]
745 def outgoing_edges(self, node):
746 """ Returns all the edges leaving a given node.
748 :param node: The node whose edges we want to get.
750 :return: The set of the tuples (node, neighbour, edgeWeight) representing (weighted) edges.
751 :rtype: set of tuples
752 """
753 return {(node, n, self.weights[(node, n)]) for n in self.node_mapping[node]}
755 def add_node(self, node):
756 """ Adds a node to the graph if the node does not already exist.
758 :param node: The node to add.
759 """
760 if node not in self.node_mapping:
761 self.node_mapping[node] = set()
763 def add_edge(self, node1, node2, weight):
764 """ Adds an edge to the graph. If the nodes do not exist in the graph, those are also added.
766 :param node1: The departure node of the edge.
767 :param node2: The arrival node of the edge.
768 :param weight: The weight of the edge.
769 """
770 self.add_node(node1)
771 self.add_node(node2)
772 self.node_mapping[node1].add(node2)
773 self.weights[(node1, node2)] = weight
775 def edges(self):
776 """ Returns the set of all the edges of the graph.
778 :return: A set of tuples (node, neighbour, weight) representing (weighted) edges.
779 :rtype: set of tuples
780 """
781 return {(n1, n2, self.weights[(n1, n2)]) for n1 in self.node_mapping for n2 in self.node_mapping[n1]}
783 def nodes(self):
784 """ Returns the set of all the nodes of the graph.
786 :return: The set of all the nodes of the graph.
787 :rtype: set
788 """
789 return self.node_mapping.keys()
791 def __str__(self):
792 """ Returns the string used when printing the graph """
793 return "Graph with {} vertices and {} edges :\n".format(len(self.node_mapping), len(self.edges()))
796class MatchingInstance(PrefLibInstance, WeightedDiGraph):
797 """ This is the class representing a PrefLib instance for matching preferences. It basically contains the data and
798 information written within a PrefLib file.
800 """
802 def __init__(self, file_path = ""):
803 PrefLibInstance.__init__(self)
804 WeightedDiGraph.__init__(self)
805 self.num_edges = 0
806 self.file_path = file_path
808 # If a filePath is given as argument, we parse it immediately
809 if len(file_path) > 0:
810 self.parse_file(file_path)
812 def type_validator(self, data_type):
813 return data_type == "wmd"
815 def parse(self, lines, autocorrect=False):
816 """ Parses the strings, assuming that the latter describes a graph.
818 :param lines: A list of string, each string being one line of the instance to parse.
819 :type lines: list
820 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
821 in the file. Default is False.
822 :type autocorrect: bool
823 """
824 # The first few lines contain the metadata
825 i = 0
826 for i in range(len(lines)):
827 line = lines[i].strip()
828 if line.startswith('#'):
829 if line.startswith("# NUMBER EDGES"):
830 self.num_edges = int(line[15:].strip())
831 else:
832 self.parse_metadata(line, autocorrect=autocorrect)
833 else:
834 break
835 self.num_voters = self.num_alternatives
837 for line in lines[i:]:
838 (vertex1, vertex2, weight) = line.strip().split(",")
839 self.add_edge(int(vertex1), int(vertex2), float(weight))
840 self.num_edges = sum(len(edge_set) for edge_set in self.node_mapping.values())
842 def parse_old(self, lines, autocorrect=False):
843 """ Parses the strings, assuming that the latter describes a graph.
845 :param lines: A list of string, each string being one line of the instance to parse.
846 :type lines: list
847 :param autocorrect: A boolean indicating whether we should try to automatically correct the potential errors
848 in the file. Default is False.
849 :type autocorrect: bool
850 """
852 self.num_alternatives = int(lines[0].strip().split(",")[0])
853 self.num_voters = int(lines[0].strip().split(",")[1])
854 for i in range(1, self.num_alternatives + 1):
855 self.alternatives_name[i] = lines[i].split(",")[1].strip()
857 # Skip the lines that describe the data
858 graph_first_line = self.num_alternatives + 1
859 for line in lines[graph_first_line:]:
860 (vertex1, vertex2, weight) = line.strip().split(",")
861 weight = float(weight)
862 vertex1 = int(vertex1)
863 vertex2 = int(vertex2)
864 self.add_edge(vertex1, vertex2, weight)
865 self.num_edges = sum(len(edge_set) for edge_set in self.node_mapping.values())
867 def write(self, filepath):
868 """ Writes the instance into a file whose destination has been given as argument, assuming the instance
869 represents a graph. If no file extension is provided the data type of the instance is used.
871 :param filepath: The destination where to write the instance.
872 :type filepath: str
873 """
875 if len(path.splitext(filepath)[1]) == 0:
876 filepath += "." + str(self.data_type)
877 file = open(filepath, "w", encoding="utf-8")
878 # Writing metadata in the file header
879 self.write_metadata(file)
880 file.write("# NUMBER ALTERNATIVES: {}\n# NUMBER EDGES: {}\n".format(
881 self.num_alternatives, self.num_edges,
882 ))
883 for alt, name in self.alternatives_name.items():
884 file.write("# ALTERNATIVE NAME {}: {}\n".format(alt, name))
886 # Writing the actual graph
887 nodes = sorted(list(self.nodes()))
888 for n in nodes:
889 out_edges = sorted(list(self.outgoing_edges(n)), key=lambda x: x[1])
890 for (vertex1, vertex2, weight) in out_edges:
891 file.write("{},{},{}\n".format(vertex1, vertex2, weight))
892 file.close()
894 def __str__(self):
895 return "Matching-Instance: {} <{},{}>".format(self.file_name, self.num_voters, self.num_alternatives)
898def get_parsed_instance(file_path):
899 """ Infers from the extension of the file given as input the correct instance to use. Parses the file and return
900 the instance.
902 :param file_path: The path to the file to be parsed.
903 :type file_path: str
905 :return: The instance with the file already parsed.
906 :rtype: :class:`preflibtools.instances.preflibinstance.PrefLibInstance`
907 """
908 extension = os.path.splitext(file_path)[1]
909 if extension in ["soc", "soi", "toc", "toi"]:
910 return OrdinalInstance(file_path)
911 elif extension == "cat":
912 return CategoricalInstance(file_path)
913 elif extension == "wmd":
914 return MatchingInstance(file_path)