15 template <
typename T,
typename indexT,
bool columnMajor>
16 void SparseMatrix<T, indexT, 2, columnMajor>::encodeValueType()
18 uint8_t byte0 =
sizeof(T);
19 uint8_t byte1 = std::is_floating_point<T>::value ? 1 : 0;
20 uint8_t byte2 = std::is_signed_v<T> ? 1 : 0;
21 uint8_t byte3 = columnMajor ? 1 : 0;
23 val_t = (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0;
27 template <
typename T,
typename indexT,
bool columnMajor>
28 void SparseMatrix<T, indexT, 2, columnMajor>::checkValueType()
30 uint8_t byte0 = val_t & 0xFF;
31 uint8_t byte1 = (val_t >> 8) & 0xFF;
32 uint8_t byte2 = (val_t >> 16) & 0xFF;
33 uint8_t byte3 = (val_t >> 24) & 0xFF;
34 assert(byte0 ==
sizeof(T) &&
"Value type size does not match");
35 assert(byte1 == std::is_floating_point_v<T> &&
"Value type is not floating point");
36 assert(byte2 == std::is_signed_v<T> &&
"Value type is not signed");
37 assert(byte3 == columnMajor &&
"Major direction does not match");
41 template <
typename T,
typename indexT,
bool columnMajor>
42 void SparseMatrix<T, indexT, 2, columnMajor>::userChecks()
44 assert((innerDim > 1 || outerDim > 1 || nnz > 1) &&
"The matrix must have at least one row, column, and nonzero value");
45 assert(std::is_floating_point<indexT>::value ==
false &&
"The index type must be a non-floating point type");
46 assert((std::is_arithmetic<T>::value && std::is_arithmetic<indexT>::value) &&
"The value and index types must be numeric types");
47 assert((std::is_same<indexT, bool>::value ==
false) &&
"The index type must not be bool");
48 assert((innerDim < std::numeric_limits<indexT>::max() && outerDim < std::numeric_limits<indexT>::max()) &&
"The number of rows and columns must be less than the maximum value of the index type");
53 template <
typename T,
typename indexT,
bool columnMajor>
54 void SparseMatrix<T, indexT, 2, columnMajor>::calculateCompSize()
56 uint64_t TotalvalueSizes = 0;
57 uint64_t TotalindexSizes = 0;
58 uint64_t TotalcountSizes = 0;
64 compSize += META_DATA_SIZE;
67 compSize += (
sizeof(T *) * outerDim);
68 compSize += (
sizeof(indexT *) * outerDim);
69 compSize += (
sizeof(indexT *) * outerDim);
71 compSize += (
sizeof(indexT) * outerDim);
72 compSize += (
sizeof(indexT) * outerDim);
73 for (uint32_t i = 0; i < outerDim; i++)
75 compSize += (
sizeof(T) * valueSizes[i]);
76 compSize += (
sizeof(indexT) * valueSizes[i]);
77 compSize += (
sizeof(indexT) * indexSizes[i]);
82 template <
typename T,
typename indexT,
bool columnMajor>
83 template <
typename T2,
typename indexT2>
84 void SparseMatrix<T, indexT, 2, columnMajor>::compressCSC(T2 *vals, indexT2 *innerIndices, indexT2 *outerPointers)
90 index_t =
sizeof(indexT);
93 metadata =
new uint32_t[NUM_META_DATA];
95 metadata[1] = innerDim;
96 metadata[2] = outerDim;
99 metadata[5] = index_t;
109 values = (T **)malloc(
sizeof(T *) * outerDim);
110 counts = (indexT **)malloc(
sizeof(indexT *) * outerDim);
111 indices = (indexT **)malloc(
sizeof(indexT *) * outerDim);
113 valueSizes = (indexT *)malloc(
sizeof(indexT) * outerDim);
114 indexSizes = (indexT *)malloc(
sizeof(indexT) * outerDim);
116 catch (std::bad_alloc &e)
118 std::cerr <<
"Error: Could not allocate memory for the matrix" << std::endl;
126 #pragma omp parallel for
128 for (uint32_t i = 0; i < outerDim; i++)
131 std::map<T2, std::vector<indexT2>> dict;
134 if (outerPointers[i] == outerPointers[i + 1])
141 indices[i] =
nullptr;
146 size_t numIndices = 0;
149 for (indexT2 j = outerPointers[i]; j < outerPointers[i + 1]; j++)
153 if (dict.find(vals[j]) != dict.end())
156 dict[vals[j]].push_back(innerIndices[j]);
165 dict[vals[j]] = std::vector<indexT2>{innerIndices[j]};
176 values[i] = (T *)malloc(
sizeof(T) * dict.size());
177 counts[i] = (indexT *)malloc(
sizeof(indexT) * dict.size());
178 indices[i] = (indexT *)malloc(
sizeof(indexT) * numIndices);
180 catch (std::bad_alloc &e)
182 std::cerr <<
"Error: Could not allocate memory for the matrix" << std::endl;
187 valueSizes[i] = dict.size();
188 indexSizes[i] = numIndices;
189 size_t performanceVecSize = 0;
190 size_t indexSize = 0;
194 for (
auto &pair : dict)
196 values[i][performanceVecSize] = pair.first;
197 counts[i][performanceVecSize] = pair.second.size();
199 for (indexT2 j = 0; j < pair.second.size(); j++)
201 indices[i][indexSize] = pair.second[j];
204 performanceVecSize++;