IVSparse  v1.0
A sparse matrix compression library.
VCSC_Private_Methods.hpp
1 
9 #pragma once
10 
11 namespace IVSparse
12 {
13 
14  // Encodes the value type of the matrix in a uint32_t
15  template <typename T, typename indexT, bool columnMajor>
16  void SparseMatrix<T, indexT, 2, columnMajor>::encodeValueType()
17  {
18  uint8_t byte0 = sizeof(T);
19  uint8_t byte1 = std::is_floating_point<T>::value ? 1 : 0;
20  uint8_t byte2 = std::is_signed_v<T> ? 1 : 0;
21  uint8_t byte3 = columnMajor ? 1 : 0;
22 
23  val_t = (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0;
24  }
25 
26  // Checks if the value type is correct for the matrix
27  template <typename T, typename indexT, bool columnMajor>
28  void SparseMatrix<T, indexT, 2, columnMajor>::checkValueType()
29  {
30  uint8_t byte0 = val_t & 0xFF;
31  uint8_t byte1 = (val_t >> 8) & 0xFF;
32  uint8_t byte2 = (val_t >> 16) & 0xFF;
33  uint8_t byte3 = (val_t >> 24) & 0xFF;
34  assert(byte0 == sizeof(T) && "Value type size does not match");
35  assert(byte1 == std::is_floating_point_v<T> && "Value type is not floating point");
36  assert(byte2 == std::is_signed_v<T> && "Value type is not signed");
37  assert(byte3 == columnMajor && "Major direction does not match");
38  }
39 
40  // performs some simple user checks on the matrices metadata
41  template <typename T, typename indexT, bool columnMajor>
42  void SparseMatrix<T, indexT, 2, columnMajor>::userChecks()
43  {
44  assert((innerDim > 1 || outerDim > 1 || nnz > 1) && "The matrix must have at least one row, column, and nonzero value");
45  assert(std::is_floating_point<indexT>::value == false && "The index type must be a non-floating point type");
46  assert((std::is_arithmetic<T>::value && std::is_arithmetic<indexT>::value) && "The value and index types must be numeric types");
47  assert((std::is_same<indexT, bool>::value == false) && "The index type must not be bool");
48  assert((innerDim < std::numeric_limits<indexT>::max() && outerDim < std::numeric_limits<indexT>::max()) && "The number of rows and columns must be less than the maximum value of the index type");
49  checkValueType();
50  }
51 
52  // Calculates the current byte size of the matrix in memory
53  template <typename T, typename indexT, bool columnMajor>
54  void SparseMatrix<T, indexT, 2, columnMajor>::calculateCompSize()
55  {
56  uint64_t TotalvalueSizes = 0;
57  uint64_t TotalindexSizes = 0;
58  uint64_t TotalcountSizes = 0;
59 
60  // set compSize to zero
61  compSize = 0;
62 
63  // add the size of the metadata
64  compSize += META_DATA_SIZE;
65 
66  // add the performance vectors
67  compSize += (sizeof(T *) * outerDim); // values
68  compSize += (sizeof(indexT *) * outerDim); // counts
69  compSize += (sizeof(indexT *) * outerDim); // indices
70 
71  compSize += (sizeof(indexT) * outerDim); // valueSizes
72  compSize += (sizeof(indexT) * outerDim); // indexSizes
73  for (uint32_t i = 0; i < outerDim; i++)
74  {
75  compSize += (sizeof(T) * valueSizes[i]); // values
76  compSize += (sizeof(indexT) * valueSizes[i]); // counts
77  compSize += (sizeof(indexT) * indexSizes[i]); // indices
78  }
79  }
80 
81  // Compression Algorithm for going from CSC to IVCSC
82  template <typename T, typename indexT, bool columnMajor>
83  template <typename T2, typename indexT2>
84  void SparseMatrix<T, indexT, 2, columnMajor>::compressCSC(T2 *vals, indexT2 *innerIndices, indexT2 *outerPointers)
85  {
86  // ---- Stage 1: Setup the Matrix ---- //
87 
88  // set the value and index types of the matrix
89  encodeValueType();
90  index_t = sizeof(indexT);
91 
92  // allocate space for metadata
93  metadata = new uint32_t[NUM_META_DATA];
94  metadata[0] = 2;
95  metadata[1] = innerDim;
96  metadata[2] = outerDim;
97  metadata[3] = nnz;
98  metadata[4] = val_t;
99  metadata[5] = index_t;
100 
101  // run the user checks on the metadata
102  #ifdef CSF_DEBUG
103  userChecks();
104  #endif
105 
106  // allocate space for the 2D Run lenngth encoded CSC matrix
107  try
108  {
109  values = (T **)malloc(sizeof(T *) * outerDim);
110  counts = (indexT **)malloc(sizeof(indexT *) * outerDim);
111  indices = (indexT **)malloc(sizeof(indexT *) * outerDim);
112 
113  valueSizes = (indexT *)malloc(sizeof(indexT) * outerDim);
114  indexSizes = (indexT *)malloc(sizeof(indexT) * outerDim);
115  }
116  catch (std::bad_alloc &e)
117  {
118  std::cerr << "Error: Could not allocate memory for the matrix" << std::endl;
119  exit(1);
120  }
121 
122  // ---- Stage 2: Construct the Dictionary For Each Column ---- //
123 
124  // Loop through each column and construct a middle data structre for the matrix
125  #ifdef CSF_PARALLEL
126  #pragma omp parallel for
127  #endif
128  for (uint32_t i = 0; i < outerDim; i++)
129  {
130  // create the data structure to temporarily hold the data
131  std::map<T2, std::vector<indexT2>> dict; // Key = value, Value = vector of indices
132 
133  // check if the current column is empty
134  if (outerPointers[i] == outerPointers[i + 1])
135  {
136  valueSizes[i] = 0;
137  indexSizes[i] = 0;
138 
139  values[i] = nullptr;
140  counts[i] = nullptr;
141  indices[i] = nullptr;
142  continue;
143  }
144 
145  // create a variable to hold the size of the column
146  size_t numIndices = 0;
147 
148  // loop through each value in the column and add it to dict
149  for (indexT2 j = outerPointers[i]; j < outerPointers[i + 1]; j++)
150  {
151 
152  // check if the value is already in the dictionary or not
153  if (dict.find(vals[j]) != dict.end())
154  {
155  // add the index
156  dict[vals[j]].push_back(innerIndices[j]);
157 
158  numIndices++;
159  }
160  else
161  {
162  // if value not already in the dictionary add it
163 
164  // create a new vector for the indices
165  dict[vals[j]] = std::vector<indexT2>{innerIndices[j]};
166 
167  numIndices++;
168  }
169 
170  } // end value loop
171 
172  // ---- Stage 3: Allocate Size of Column Data ---- //
173 
174  try
175  {
176  values[i] = (T *)malloc(sizeof(T) * dict.size());
177  counts[i] = (indexT *)malloc(sizeof(indexT) * dict.size());
178  indices[i] = (indexT *)malloc(sizeof(indexT) * numIndices);
179  }
180  catch (std::bad_alloc &e)
181  {
182  std::cerr << "Error: Could not allocate memory for the matrix" << std::endl;
183  exit(1);
184  }
185 
186  // set the size of the column
187  valueSizes[i] = dict.size();
188  indexSizes[i] = numIndices;
189  size_t performanceVecSize = 0;
190  size_t indexSize = 0;
191 
192  // ---- Stage 4: Populate the Column Data ---- //
193 
194  for (auto &pair : dict)
195  {
196  values[i][performanceVecSize] = pair.first;
197  counts[i][performanceVecSize] = pair.second.size();
198 
199  for (indexT2 j = 0; j < pair.second.size(); j++)
200  {
201  indices[i][indexSize] = pair.second[j];
202  indexSize++;
203  }
204  performanceVecSize++;
205  }
206 
207  } // end column loop
208 
209  calculateCompSize();
210 
211  } // end compressCSC
212 
213 } // end namespace IVSparse