IVSparse  v1.0
A sparse matrix compression library.
IVCSC_Private_Methods.hpp
Go to the documentation of this file.
1 
9 #pragma once
10 
11 namespace IVSparse {
12 
13 // Encodes the value type of the matrix in a uint32_t
14 template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
15 void SparseMatrix<T, indexT, compressionLevel, columnMajor>::encodeValueType() {
16  uint8_t byte0 = sizeof(T);
17  uint8_t byte1 = std::is_floating_point<T>::value ? 1 : 0;
18  uint8_t byte2 = std::is_signed_v<T> ? 1 : 0;
19  uint8_t byte3 = columnMajor ? 1 : 0;
20 
21  val_t = (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0;
22 }
23 
24 // Checks if the value type is correct for the matrix
25 template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
26 void SparseMatrix<T, indexT, compressionLevel, columnMajor>::checkValueType() {
27  uint8_t byte0 = val_t & 0xFF;
28  uint8_t byte1 = (val_t >> 8) & 0xFF;
29  uint8_t byte2 = (val_t >> 16) & 0xFF;
30  uint8_t byte3 = (val_t >> 24) & 0xFF;
31  assert(byte0 == sizeof(T) && "Value type size does not match");
32  assert(byte1 == std::is_floating_point_v<T> &&
33  "Value type is not floating point");
34  assert(byte2 == std::is_signed_v<T> && "Value type is not signed");
35  assert(byte3 == columnMajor && "Major direction does not match");
36 }
37 
38 // performs some simple user checks on the matrices metadata
39 template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
40 void SparseMatrix<T, indexT, compressionLevel, columnMajor>::userChecks() {
41  assert((innerDim >= 1 || outerDim >= 1) &&
42  "The matrix must have at least one row, column, and nonzero value");
43  assert(std::is_floating_point<indexT>::value == false &&
44  "The index type must be a non-floating point type");
45  assert((compressionLevel == 3) && "The compression level must be 3");
46  assert((std::is_arithmetic<T>::value && std::is_arithmetic<indexT>::value) &&
47  "The value and index types must be numeric types");
48  assert((std::is_same<indexT, bool>::value == false) &&
49  "The index type must not be bool");
50  assert((innerDim < std::numeric_limits<indexT>::max() &&
51  outerDim < std::numeric_limits<indexT>::max()) &&
52  "The number of rows and columns must be less than the maximum value "
53  "of the index type");
54  checkValueType();
55 }
56 
57 // Calculates the current byte size of the matrix in memory
58 template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
59 void SparseMatrix<T, indexT, compressionLevel, columnMajor>::calculateCompSize() {
60  // set compSize to zero
61  compSize = 0;
62 
63  // add the size of the metadata
64  compSize += META_DATA_SIZE;
65 
66  // add the size of the data pointers
67  compSize += (sizeof(void *) * outerDim) * 2;
68 
69  // add the size of the data itself
70  for (uint32_t i = 0; i < outerDim; i++) {
71  compSize += *((uint8_t **)endPointers + i) - *((uint8_t **)data + i);
72  }
73 }
74 
75 // Compression Algorithm for going from CSC to IVCSC
76 template <typename T, typename indexT, uint8_t compressionLevel, bool columnMajor>
77 template <typename T2, typename indexT2> void SparseMatrix<T, indexT, compressionLevel, columnMajor>::compressCSC(
78  T2 *vals, indexT2 *innerIndices, indexT2 *outerPointers) {
79 
80  // ---- Stage 1: Setup the Matrix ---- //
81 
82  // set the value and index types of the matrix
83  encodeValueType();
84  index_t = sizeof(indexT);
85 
86  // allocate space for metadata
87  metadata = new uint32_t[NUM_META_DATA];
88  metadata[0] = compressionLevel;
89  metadata[1] = innerDim;
90  metadata[2] = outerDim;
91  metadata[3] = nnz;
92  metadata[4] = val_t;
93  metadata[5] = index_t;
94 
95  // run the user checks on the metadata
96  #ifdef IVSPARSE_DEBUG
97  userChecks();
98  #endif
99 
100  // allocate space for the data
101  try {
102  data = (void **)malloc(outerDim * sizeof(void *));
103  endPointers = (void **)malloc(outerDim * sizeof(void *));
104  } catch (std::bad_alloc &e) {
105  std::cout << "Error: " << e.what() << std::endl;
106  exit(1);
107  }
108 
109  // ---- Stage 2: Construct the Dictionary For Each Column ---- //
110 
111  // Loop through each column and construct a middle data structre for the matrix
112 
113  // include guard for parallel processing
114  #ifdef IVSPARSE_HAS_OPENMP
115  #pragma omp parallel for
116  #endif
117  for (uint32_t i = 0; i < outerDim; i++) {
118  // create the data structure to temporarily hold the data
119  std::map<T2, std::vector<indexT2>>
120  dict; // Key = value, Value = vector of indices
121 
122  // check if the current column is empty
123  if (outerPointers[i] == outerPointers[i + 1]) {
124  data[i] = nullptr;
125  endPointers[i] = nullptr;
126  continue;
127  }
128 
129  // loop through each value in the column and add it to dict
130  for (indexT2 j = outerPointers[i]; j < outerPointers[i + 1]; j++) {
131  // check if the value is already in the dictionary or not
132  if (dict.find(vals[j]) != dict.end()) {
133  // add the index to the vector
134 
135  // positive delta encode (PDE)
136  dict[vals[j]].push_back(innerIndices[j] - dict[vals[j]][1]);
137 
138  // update the last index (stored in the second index of the vector)
139  dict[vals[j]][1] = innerIndices[j];
140 
141  // update the maximum delta (stored in the first index of the vector)
142  if (dict[vals[j]][dict[vals[j]].size() - 1] > dict[vals[j]][0]) {
143  dict[vals[j]][0] = dict[vals[j]][dict[vals[j]].size() - 1];
144  }
145  } else {
146  // if value not already in the dictionary add it
147 
148  // create a new vector for the indices
149  dict[vals[j]] = std::vector<indexT2>{innerIndices[j]};
150 
151  // if compression level 3 add the maximum delta and the last index
152  dict[vals[j]].push_back(innerIndices[j]);
153  dict[vals[j]].push_back(innerIndices[j]);
154  }
155 
156  } // end of value loop
157 
158  // ---- Stage 3: Find and Allocate Size of Column Data ---- //
159 
160  // create a variable to hold the size of the column
161  size_t outerByteSize = 0;
162 
163  // loop through dictionary finding the byte size of the total column data
164  for (auto &pair : dict) {
165  // change first value to be byte width of the maximum delta
166  pair.second[0] = byteWidth(pair.second[0]);
167 
168  // add the size of the run to the size of the column
169  //* value + index width + indices * index width + delimiter (index width)
170  outerByteSize += sizeof(T) + 1 +
171  (pair.second[0] * (pair.second.size() - 2)) +
172  pair.second[0];
173  }
174 
175  // allocate space for the column
176  try {
177  data[i] = malloc(outerByteSize);
178  } catch (std::bad_alloc &e) {
179  std::cout << "Error: " << e.what() << std::endl;
180  exit(1);
181  }
182 
183  // ---- Stage 4: Write the Data To Memory ---- //
184 
185  // get a help pointer for moving through raw memory
186  void *helpPtr = data[i];
187 
188  // loop through the dictionary and write to memory
189  for (auto &pair : dict) {
190  // Write the value to memory
191  *(T *)helpPtr = (T)pair.first;
192  helpPtr = (T *)helpPtr + 1;
193 
194  // also write the index width
195  *(uint8_t *)helpPtr = (uint8_t)pair.second[0];
196  helpPtr = (uint8_t *)helpPtr + 1;
197 
198  // loop through the indices and write them to memory
199  for (size_t k = 0; k < pair.second.size(); k++) {
200  // if compression level 3 skip the first two indices and cast the index
201  if (k == 0 || k == 1) {
202  continue;
203  }
204 
205  // create a type of the correct width
206  switch (pair.second[0]) {
207  case 1:
208  *(uint8_t *)helpPtr = (uint8_t)pair.second[k];
209  helpPtr = (uint8_t *)helpPtr + 1;
210  break;
211  case 2:
212  *(uint16_t *)helpPtr = (uint16_t)pair.second[k];
213  helpPtr = (uint16_t *)helpPtr + 1;
214  break;
215  case 4:
216  *(uint32_t *)helpPtr = (uint32_t)pair.second[k];
217  helpPtr = (uint32_t *)helpPtr + 1;
218  break;
219  case 8:
220  *(uint64_t *)helpPtr = (uint64_t)pair.second[k];
221  helpPtr = (uint64_t *)helpPtr + 1;
222  break;
223  }
224 
225  } // End of index loop
226 
227  // write a delimiter of the correct width
228  switch (pair.second[0]) {
229  case 1:
230  *(uint8_t *)helpPtr = (uint8_t)DELIM;
231  helpPtr = (uint8_t *)helpPtr + 1;
232  break;
233  case 2:
234  *(uint16_t *)helpPtr = (uint16_t)DELIM;
235  helpPtr = (uint16_t *)helpPtr + 1;
236  break;
237  case 4:
238  *(uint32_t *)helpPtr = (uint32_t)DELIM;
239  helpPtr = (uint32_t *)helpPtr + 1;
240  break;
241  case 8:
242  *(uint64_t *)helpPtr = (uint64_t)DELIM;
243  helpPtr = (uint64_t *)helpPtr + 1;
244  break;
245  }
246  // Set a pointer to the end of the data
247  endPointers[i] = helpPtr;
248 
249  } // End of dictionary loop
250 
251  } // end of column loop
252 
253  calculateCompSize();
254 
255 } // end of compressCSC
256 
257 } // end of namespace IVSparse