IVSparse  v1.0
A sparse matrix compression library.
IVCSC_Constructors.hpp
1 
9 #pragma once
10 
11 namespace IVSparse {
12 
13  // Destructor
14  template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
16  // delete the meta data
17  if (metadata != nullptr) { delete[] metadata; }
18 
19  if (data != nullptr) {
20  // free the data
21  for (size_t i = 0; i < outerDim; i++) { if (data[i] != nullptr) { free(data[i]); } }
22  free(data);
23  }
24 
25  if (endPointers != nullptr) { free(endPointers); }
26  }
27 
28  // Row and Column Constructor
29  template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
31 
32  // set class variables
33  if constexpr (columnMajor) {
34  innerDim = num_cols;
35  outerDim = num_rows;
36  } else {
37  innerDim = num_rows;
38  outerDim = num_cols;
39  }
40  numRows = num_rows;
41  numCols = num_cols;
42  encodeValueType();
43  index_t = sizeof(indexT);
44  nnz = 0;
45 
46  // allocate memory for the data
47  try {
48  data = (void **)malloc(outerDim * sizeof(void *));
49  endPointers = (void **)malloc(outerDim * sizeof(void *));
50  } catch (const std::exception &e) {
51  std::cerr << e.what() << '\n';
52  }
53 
54  // set all data and endpointers to the nullptr
55  for (size_t i = 0; i < outerDim; i++) {
56  data[i] = nullptr;
57  endPointers[i] = nullptr;
58  }
59 
60  // set the metadata
61  metadata = new uint32_t[NUM_META_DATA];
62  metadata[0] = compressionLevel;
63  metadata[1] = innerDim;
64  metadata[2] = outerDim;
65  metadata[3] = nnz;
66  metadata[4] = val_t;
67  metadata[5] = index_t;
68 
69  // calculate the compression size
70  calculateCompSize();
71 
72  // run the user checks
73  #ifdef CSF_DEBUG
74  userChecks();
75  #endif
76  }
77 
78  // Eigen Constructor
79  template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
81  // check if the matrix is empty
82  if (mat.nonZeros() == 0) {
83  val_t = 0;
84  index_t = 0;
85 
86  data = nullptr;
87  endPointers = nullptr;
88  metadata = nullptr;
89  return;
90  }
91 
92  // make sure the matrix is compressed
93  mat.makeCompressed();
94 
95  // get the number of rows and columns
96  numRows = mat.rows();
97  numCols = mat.cols();
98 
99  outerDim = columnMajor ? numCols : numRows;
100  innerDim = columnMajor ? numRows : numCols;
101 
102  // get the number of non-zero elements
103  nnz = mat.nonZeros();
104 
105  // call the compression function
106  compressCSC(mat.valuePtr(), mat.innerIndexPtr(), mat.outerIndexPtr());
107  }
108 
109  // Eigen Row Major Constructor
110  template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
111  SparseMatrix<T, indexT, compressionLevel, columnMajor>::SparseMatrix(Eigen::SparseMatrix<T, Eigen::RowMajor> &mat) {
112  // check if the matrix is empty
113  if (mat.nonZeros() == 0) {
114  val_t = 0;
115  index_t = 0;
116 
117  data = nullptr;
118  endPointers = nullptr;
119  metadata = nullptr;
120  return;
121  }
122 
123  // make sure the matrix is compressed
124  mat.makeCompressed();
125 
126  // get the number of rows and columns
127  numRows = mat.rows();
128  numCols = mat.cols();
129 
130  outerDim = numRows;
131  innerDim = numCols;
132 
133  // get the number of non-zero elements
134  nnz = mat.nonZeros();
135 
136  // call the compression function
137  compressCSC(mat.valuePtr(), mat.innerIndexPtr(), mat.outerIndexPtr());
138  }
139 
140  // Deep Copy Constructor
141  template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
143 
144  // Conversion Constructor
145  template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
146  template <uint8_t otherCompressionLevel>
148 
149  // if already the right compression level
150  if constexpr (otherCompressionLevel == compressionLevel) {
151  *this = other;
152  return;
153  }
154 
155  // make a temporary matrix of the correct compression level
157 
158  // convert other to the right compression level
159  if constexpr (otherCompressionLevel == 1) { temp = other.toIVCSC(); }
160  else if constexpr (otherCompressionLevel == 2) { temp = other.toIVCSC(); }
161 
162  // other should be the same compression level as this now
163  *this = temp;
164 
165  // run the user checks
166  #ifdef CSF_DEBUG
167  userChecks();
168  #endif
169  }
170 
171  // Raw CSC Constructor
172  template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
173  template <typename T2, typename indexT2>
174  SparseMatrix<T, indexT, compressionLevel, columnMajor>::SparseMatrix(T2 *vals, indexT2 *innerIndices, indexT2 *outerPtr, uint32_t num_rows, uint32_t num_cols, uint32_t nnz) {
175  // see if the matrix is empty
176  if (nnz == 0) [[unlikely]] {
177  val_t = 0;
178  index_t = 0;
179  data = nullptr;
180  endPointers = nullptr;
181  metadata = nullptr;
182  return;
183  }
184 
185  // set class variables
186  if (columnMajor) {
187  innerDim = num_rows;
188  outerDim = num_cols;
189  } else {
190  innerDim = num_cols;
191  outerDim = num_rows;
192  }
193  numRows = num_rows;
194  numCols = num_cols;
195  this->nnz = nnz;
196 
197  // call the compression function
198  compressCSC(vals, innerIndices, outerPtr);
199  }
200 
201  // COO Constructor
202  template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
203  template <typename T2, typename indexT2>
204  SparseMatrix<T, indexT, compressionLevel, columnMajor>::SparseMatrix(std::vector<std::tuple<indexT2, indexT2, T2>> &entries, uint32_t num_rows, uint32_t num_cols, uint32_t nnz) {
205 
206  // see if the matrix is empty
207  if (nnz == 0) [[unlikely]] {
208  *this = SparseMatrix<T, indexT, compressionLevel, columnMajor>(num_rows, num_cols);
209  return;
210  }
211 
212  // set class variables
213  if (columnMajor) {
214  innerDim = num_rows;
215  outerDim = num_cols;
216  } else {
217  innerDim = num_cols;
218  outerDim = num_rows;
219  }
220 
221  numRows = num_rows;
222  numCols = num_cols;
223  this->nnz = nnz;
224  encodeValueType();
225  index_t = sizeof(indexT);
226 
227  metadata = new uint32_t[NUM_META_DATA];
228  metadata[0] = compressionLevel;
229  metadata[1] = innerDim;
230  metadata[2] = outerDim;
231  metadata[3] = nnz;
232  metadata[4] = val_t;
233  metadata[5] = index_t;
234 
235  // allocate memory for the data
236  try {
237  data = (void **)malloc(outerDim * sizeof(void *));
238  endPointers = (void **)malloc(outerDim * sizeof(void *));
239  } catch (std::bad_alloc &e) {
240  std::cerr << "Error: Could not allocate memory for IVSparse matrix" << std::endl;
241  exit(1);
242  }
243 
244  // set all data and endpointers to the nullptr
245  for (size_t i = 0; i < outerDim; i++) {
246  data[i] = nullptr;
247  endPointers[i] = nullptr;
248  }
249 
250  // sort the tuples by first by column then by row
251  std::sort(entries.begin(), entries.end(), [](const std::tuple<indexT2, indexT2, T2> &a, const std::tuple<indexT2, indexT2, T2> &b)
252  {
253  if (std::get<1>(a) == std::get<1>(b)) {
254  return std::get<0>(a) < std::get<0>(b);
255  } else {
256  return std::get<1>(a) < std::get<1>(b);
257  } });
258 
259  std::map<T2, std::vector<indexT2>> maps[outerDim];
260 
261  // loop through the tuples
262  for (size_t i = 0; i < nnz; i++) {
263  // get the column
264  indexT2 row = std::get<0>(entries[i]);
265  indexT2 col = std::get<1>(entries[i]);
266  T2 val = std::get<2>(entries[i]);
267 
268  // check if the value is already in the map
269  if (maps[col].find(val) != maps[col].end()) {
270 
271  // value found positive delta encode it
272  maps[col][val].push_back(row - maps[col][val][1]);
273 
274  // update the last index
275  maps[col][val][1] = row;
276 
277  // update the maximum delta
278  if (maps[col][val][maps[col][val].size() - 1] > maps[col][val][0])
279  maps[col][val][0] = maps[col][val][maps[col][val].size() - 1];
280  } else {
281 
282  // value not found
283  maps[col][val] = std::vector<indexT2>{row};
284 
285  // add maximum delta and last index placeholders
286  maps[col][val].push_back(row);
287  maps[col][val].push_back(row);
288  }
289  } // end of loop through tuples
290 
291  // loop through the maps
292  #ifdef CSF_PARALLEL
293  #pragma omp parallel for
294  #endif
295  for (uint32_t i = 0; i < outerDim; i++) {
296 
297  size_t outerByteSize = 0;
298 
299  for (auto &pair : maps[i]) {
300  // change first value to be byte width of the maximum delta
301  pair.second[0] = byteWidth(pair.second[0]);
302 
303  // add the size of the run to the size of the column
304  //* value + index width + indices * index width + delimiter (index width)
305  outerByteSize += sizeof(T) + 1 + (pair.second[0] * (pair.second.size() - 2)) + pair.second[0];
306  }
307 
308  // if column is empty set the data and endpointer to nullptr
309  if (outerByteSize == 0) {
310  data[i] = nullptr;
311  endPointers[i] = nullptr;
312  continue;
313  }
314 
315  // allocate space for the column
316  try { data[i] = malloc(outerByteSize); }
317  catch (std::bad_alloc &e) {
318  std::cout << "Error: " << e.what() << std::endl;
319  exit(1);
320  }
321 
322  // get a help pointer for moving through raw memory
323  void *helpPtr = data[i];
324 
325  // loop through the dictionary and write to memory
326  for (auto &pair : maps[i]) {
327  // Write the value to memory
328  *(T *)helpPtr = (T)pair.first;
329  helpPtr = (T *)helpPtr + 1;
330 
331  // also write the index width
332  *(uint8_t *)helpPtr = (uint8_t)pair.second[0];
333  helpPtr = (uint8_t *)helpPtr + 1;
334 
335  // loop through the indices and write them to memory
336  for (size_t k = 0; k < pair.second.size(); k++) {
337 
338  // if compression level 3 skip the first two indices and cast the index
339  if (k == 0 || k == 1) { continue; }
340 
341  // create a type of the correct width
342  switch (pair.second[0]) {
343  case 1:
344  *(uint8_t *)helpPtr = (uint8_t)pair.second[k];
345  helpPtr = (uint8_t *)helpPtr + 1;
346  break;
347  case 2:
348  *(uint16_t *)helpPtr = (uint16_t)pair.second[k];
349  helpPtr = (uint16_t *)helpPtr + 1;
350  break;
351  case 4:
352  *(uint32_t *)helpPtr = (uint32_t)pair.second[k];
353  helpPtr = (uint32_t *)helpPtr + 1;
354  break;
355  case 8:
356  *(uint64_t *)helpPtr = (uint64_t)pair.second[k];
357  helpPtr = (uint64_t *)helpPtr + 1;
358  break;
359  }
360 
361  } // End of index loop
362 
363  // write a delimiter of the correct width
364  switch (pair.second[0]) {
365  case 1:
366  *(uint8_t *)helpPtr = (uint8_t)DELIM;
367  helpPtr = (uint8_t *)helpPtr + 1;
368  break;
369  case 2:
370  *(uint16_t *)helpPtr = (uint16_t)DELIM;
371  helpPtr = (uint16_t *)helpPtr + 1;
372  break;
373  case 4:
374  *(uint32_t *)helpPtr = (uint32_t)DELIM;
375  helpPtr = (uint32_t *)helpPtr + 1;
376  break;
377  case 8:
378  *(uint64_t *)helpPtr = (uint64_t)DELIM;
379  helpPtr = (uint64_t *)helpPtr + 1;
380  break;
381  }
382  // Set a pointer to the end of the data
383  endPointers[i] = helpPtr;
384 
385  } // End of dictionary loop
386  }
387 
388  // calculate the compression size
389  calculateCompSize();
390  }
391 
392  // IVSparse Vector Constructor
393  template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
395  // if it is the matrix is empty and we need to construct it
396  if (columnMajor) {
397  numRows = vec.getLength();
398  numCols = 1;
399  innerDim = numRows;
400  outerDim = numCols;
401  } else {
402  numRows = 1;
403  numCols = vec.getLength();
404  innerDim = numCols;
405  outerDim = numRows;
406  }
407 
408  nnz = vec.nonZeros();
409 
410  encodeValueType();
411  index_t = sizeof(indexT);
412 
413  // malloc the data
414  try {
415  data = (void **)malloc(sizeof(void *));
416  endPointers = (void **)malloc(sizeof(void *));
417  } catch (std::bad_alloc &e) {
418  throw std::bad_alloc();
419  }
420 
421  // malloc the vector
422  try {
423  // if vector is empty set the data to null
424  if (vec.begin() == vec.end()) {
425  data[0] = nullptr;
426  endPointers[0] = nullptr;
427  } else {
428  data[0] = malloc(vec.byteSize());
429  endPointers[0] = (char *)data[0] + vec.byteSize();
430  }
431  } catch (std::bad_alloc &e) {
432  throw std::bad_alloc();
433  }
434 
435  // copy the vector into the matrix if the vector is not empty
436  if (vec.begin() != vec.end())
437  memcpy(data[0], vec.begin(), vec.byteSize());
438 
439  // set the metadata
440  metadata = new uint32_t[NUM_META_DATA];
441  metadata[0] = compressionLevel;
442  metadata[1] = innerDim;
443  metadata[2] = outerDim;
444  metadata[3] = nnz;
445  metadata[4] = val_t;
446  metadata[5] = index_t;
447 
448  // run the user checks and calculate the compression size
449  calculateCompSize();
450  #ifdef CSF_DEBUG
451  userChecks();
452  #endif
453  }
454 
455  // Array of Vectors Constructor
456  template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
458 
460 
461  for (size_t i = 1; i < vecs.size(); i++) { temp.append(vecs[i]); }
462 
463  *this = temp;
464 
465  calculateCompSize();
466 
467  // run the user checks
468  #ifdef CSF_DEBUG
469  userChecks();
470  #endif
471  }
472 
473  // File Constructor
474  template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
476  FILE *fp = fopen(filename, "rb");
477 
478  if (fp == nullptr) { throw std::runtime_error("Error: Could not open file"); }
479 
480  // read the metadata
481  metadata = new uint32_t[NUM_META_DATA];
482  fread(metadata, sizeof(uint32_t), NUM_META_DATA, fp);
483 
484  // set the matrix info
485  innerDim = metadata[1];
486  outerDim = metadata[2];
487  nnz = metadata[3];
488  val_t = metadata[4];
489  index_t = metadata[5];
490 
491  numRows = columnMajor ? innerDim : outerDim;
492  numCols = columnMajor ? outerDim : innerDim;
493 
494  // if the compression level of the file is different than the compression level of the class
495  if (metadata[0] != compressionLevel) {
496  // throw an error
497  throw std::runtime_error("Error: Compression level of file does not match compression level of class");
498  }
499 
500  // allocate the memory
501  try {
502  data = (void **)malloc(outerDim * sizeof(void *));
503  endPointers = (void **)malloc(outerDim * sizeof(void *));
504  } catch (std::bad_alloc &e) {
505  std::cerr << "Error: Could not allocate memory for IVSparse matrix" << std::endl;
506  exit(1);
507  }
508 
509  // get the vector sizes and allocate memory
510  for (size_t i = 0; i < outerDim; i++) {
511  // get the size of the column
512  uint64_t size;
513  fread(&size, sizeof(uint64_t), 1, fp);
514 
515  // if the size is 0, set the data and endpointer to nullptr
516  if (size == 0) {
517  data[i] = nullptr;
518  endPointers[i] = nullptr;
519  continue;
520  }
521 
522  // malloc the column
523  try {
524  data[i] = malloc(size);
525  endPointers[i] = (char *)data[i] + size;
526  } catch (std::bad_alloc &e) {
527  throw std::bad_alloc();
528  }
529  }
530 
531  // read the data
532  for (size_t i = 0; i < outerDim; i++) { fread(data[i], 1, (uint8_t *)endPointers[i] - (uint8_t *)data[i], fp); }
533 
534  // close the file
535  fclose(fp);
536 
537  // calculate the compresssion size
538  calculateCompSize();
539 
540  // run the user checks
541  #ifdef CSF_DEBUG
542  userChecks();
543  #endif
544 
545  } // end of file constructor
546 
547  //* Private Constructors *//
548 
549  // Private Tranpose Constructor
550  template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
551  SparseMatrix<T, indexT, compressionLevel, columnMajor>::SparseMatrix(std::unordered_map<T, std::vector<indexT>> maps[], uint32_t num_rows, uint32_t num_cols) {
552  // set class variables
553  if constexpr (columnMajor) {
554  innerDim = num_cols;
555  outerDim = num_rows;
556  } else {
557  innerDim = num_rows;
558  outerDim = num_cols;
559  }
560 
561  numRows = num_cols;
562  numCols = num_rows;
563  encodeValueType();
564  index_t = sizeof(indexT);
565 
566  // allocate memory for the data
567  try {
568  data = (void **)malloc(outerDim * sizeof(void *));
569  endPointers = (void **)malloc(outerDim * sizeof(void *));
570  } catch (const std::exception &e) {
571  std::cerr << e.what() << '\n';
572  }
573 
574  // set all data and endpointers to the nullptr
575  for (size_t i = 0; i < outerDim; i++) {
576  data[i] = nullptr;
577  endPointers[i] = nullptr;
578  }
579 
580  //* logic here
581 
582  // loop through the array
583  #ifdef CSF_PARALLEL
584  #pragma omp parallel for
585  #endif
586  for (size_t i = 0; i < outerDim; i++) {
587 
588  // check if the column is empty
589  if (maps[i].empty()) [[unlikely]] {
590  data[i] = nullptr;
591  endPointers[i] = nullptr;
592  continue;
593  }
594 
595  size_t byteSize = 0;
596 
597  // loop through the vectors of the map
598  for (auto &val : maps[i]) {
599  // add the size of the vector to the byteSize
600  byteSize += sizeof(T) + 1 + (val.second[val.second.size() - 1] * (val.second.size() - 1) + val.second[val.second.size() - 1]);
601  }
602 
603  // allocate memory for the vector
604  try { data[i] = malloc(byteSize); }
605  catch (const std::exception &e) { std::cerr << e.what() << '\n'; }
606 
607  // set the end pointer
608  endPointers[i] = (char *)data[i] + byteSize;
609 
610  // compressCSC the column
611  void *helpPtr = data[i];
612 
613  for (auto &val : maps[i]) {
614 
615  nnz += val.second.size() - 1;
616 
617  // set the value
618  *(T *)helpPtr = val.first;
619  helpPtr = (char *)helpPtr + sizeof(T);
620  *(uint8_t *)helpPtr = (uint8_t)val.second[val.second.size() - 1];
621  helpPtr = (uint8_t *)helpPtr + 1;
622 
623  // write the indices
624  for (size_t k = 0; k < val.second.size(); k++) {
625 
626  if (k == val.second.size() - 1)
627  break;
628 
629  switch (val.second[val.second.size() - 1]) {
630  case 1:
631  *(uint8_t *)helpPtr = (uint8_t)val.second[k];
632  helpPtr = (uint8_t *)helpPtr + 1;
633  break;
634  case 2:
635  *(uint16_t *)helpPtr = (uint16_t)val.second[k];
636  helpPtr = (uint16_t *)helpPtr + 1;
637  break;
638  case 4:
639  *(uint32_t *)helpPtr = (uint32_t)val.second[k];
640  helpPtr = (uint32_t *)helpPtr + 1;
641  break;
642  case 8:
643  *(uint64_t *)helpPtr = (uint64_t)val.second[k];
644  helpPtr = (uint64_t *)helpPtr + 1;
645  break;
646  }
647  }
648 
649  // write a delimiter of the correct width
650  switch (val.second[val.second.size() - 1]) {
651  case 1:
652  *(uint8_t *)helpPtr = (uint8_t)DELIM;
653  helpPtr = (uint8_t *)helpPtr + 1;
654  break;
655  case 2:
656  *(uint16_t *)helpPtr = (uint16_t)DELIM;
657  helpPtr = (uint16_t *)helpPtr + 1;
658  break;
659  case 4:
660  *(uint32_t *)helpPtr = (uint32_t)DELIM;
661  helpPtr = (uint32_t *)helpPtr + 1;
662  break;
663  case 8:
664  *(uint64_t *)helpPtr = (uint64_t)DELIM;
665  helpPtr = (uint64_t *)helpPtr + 1;
666  break;
667  }
668  }
669  }
670 
671  //* end logic
672 
673  metadata = new uint32_t[NUM_META_DATA];
674 
675  // Set the meta data
676  metadata[0] = compressionLevel;
677  metadata[1] = innerDim;
678  metadata[2] = outerDim;
679  metadata[3] = nnz;
680  metadata[4] = val_t;
681  metadata[5] = index_t;
682 
683  // calculate the compression size
684  calculateCompSize();
685 
686  // run the user checks
687  #ifdef CSF_DEBUG
688  userChecks();
689  #endif
690 
691  } // end of private transpose constructor
692 
693 } // end of IVSparse namespace
Definition: IVCSC_Vector.hpp:27
uint32_t getLength()
Definition: IVCSC_Vector_Methods.hpp:183
void * end()
Definition: IVCSC_Vector_Methods.hpp:171
size_t byteSize()
Definition: IVCSC_Vector_Methods.hpp:175
uint32_t nonZeros()
Definition: IVCSC_Vector_Methods.hpp:163
void * begin()
Definition: IVCSC_Vector_Methods.hpp:167
Definition: IVCSC_SparseMatrix.hpp:27
void append(typename SparseMatrix< T, indexT, compressionLevel, columnMajor >::Vector &vec)
Definition: IVCSC_Methods.hpp:202
SparseMatrix()
Definition: IVCSC_SparseMatrix.hpp:93
~SparseMatrix()
Destroy the Sparse Matrix object.
Definition: IVCSC_Constructors.hpp:15