14 template <
typename T,
typename indexT,
bool columnMajor>
15 void SparseMatrix<T, indexT, 2, columnMajor>::encodeValueType() {
16 uint8_t byte0 =
sizeof(T);
17 uint8_t byte1 = std::is_floating_point<T>::value ? 1 : 0;
18 uint8_t byte2 = std::is_signed_v<T> ? 1 : 0;
19 uint8_t byte3 = columnMajor ? 1 : 0;
21 val_t = (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0;
25 template <
typename T,
typename indexT,
bool columnMajor>
26 void SparseMatrix<T, indexT, 2, columnMajor>::checkValueType() {
27 uint8_t byte0 = val_t & 0xFF;
28 uint8_t byte1 = (val_t >> 8) & 0xFF;
29 uint8_t byte2 = (val_t >> 16) & 0xFF;
30 uint8_t byte3 = (val_t >> 24) & 0xFF;
31 assert(byte0 ==
sizeof(T) &&
"Value type size does not match");
32 assert(byte1 == std::is_floating_point_v<T> &&
33 "Value type is not floating point");
34 assert(byte2 == std::is_signed_v<T> &&
"Value type is not signed");
35 assert(byte3 == columnMajor &&
"Major direction does not match");
39 template <
typename T,
typename indexT,
bool columnMajor>
40 void SparseMatrix<T, indexT, 2, columnMajor>::userChecks() {
41 assert((innerDim > 1 || outerDim > 1 || nnz > 1) &&
42 "The matrix must have at least one row, column, and nonzero value");
43 assert(std::is_floating_point<indexT>::value ==
false &&
44 "The index type must be a non-floating point type");
45 assert((std::is_arithmetic<T>::value && std::is_arithmetic<indexT>::value) &&
46 "The value and index types must be numeric types");
47 assert((std::is_same<indexT, bool>::value ==
false) &&
48 "The index type must not be bool");
49 assert((innerDim < std::numeric_limits<indexT>::max() &&
50 outerDim < std::numeric_limits<indexT>::max()) &&
51 "The number of rows and columns must be less than the maximum value "
57 template <
typename T,
typename indexT,
bool columnMajor>
58 void SparseMatrix<T, indexT, 2, columnMajor>::calculateCompSize() {
63 compSize += META_DATA_SIZE;
66 compSize += (
sizeof(T *) * outerDim);
67 compSize += (
sizeof(indexT *) * outerDim);
68 compSize += (
sizeof(indexT *) * outerDim);
70 compSize += (
sizeof(indexT) * outerDim);
71 compSize += (
sizeof(indexT) * outerDim);
72 for (uint32_t i = 0; i < outerDim; i++) {
73 compSize += (
sizeof(T) * valueSizes[i]);
74 compSize += (
sizeof(indexT) * valueSizes[i]);
75 compSize += (
sizeof(indexT) * indexSizes[i]);
80 template <
typename T,
typename indexT,
bool columnMajor>
81 template <
typename T2,
typename indexT2>
82 void SparseMatrix<T, indexT, 2, columnMajor>::compressCSC(T2 *vals, indexT2 *innerIndices, indexT2 *outerPointers) {
88 index_t =
sizeof(indexT);
91 metadata =
new uint32_t[NUM_META_DATA];
93 metadata[1] = innerDim;
94 metadata[2] = outerDim;
97 metadata[5] = index_t;
100 #ifdef IVSPARSE_DEBUG
106 values = (T **)malloc(
sizeof(T *) * outerDim);
107 counts = (indexT **)malloc(
sizeof(indexT *) * outerDim);
108 indices = (indexT **)malloc(
sizeof(indexT *) * outerDim);
110 valueSizes = (indexT *)malloc(
sizeof(indexT) * outerDim);
111 indexSizes = (indexT *)malloc(
sizeof(indexT) * outerDim);
112 }
catch (std::bad_alloc &e) {
113 std::cerr <<
"Error: Could not allocate memory for the matrix" << std::endl;
120 #ifdef IVSPARSE_HAS_OPENMP
121 #pragma omp parallel for
123 for (uint32_t i = 0; i < outerDim; i++) {
125 std::map<T2, std::vector<indexT2>>
129 if (outerPointers[i] == outerPointers[i + 1]) {
135 indices[i] =
nullptr;
140 size_t numIndices = 0;
143 for (indexT2 j = outerPointers[i]; j < outerPointers[i + 1]; j++) {
145 if (dict.find(vals[j]) != dict.end()) {
147 dict[vals[j]].push_back(innerIndices[j]);
154 dict[vals[j]] = std::vector<indexT2>{innerIndices[j]};
164 values[i] = (T *)malloc(
sizeof(T) * dict.size());
165 counts[i] = (indexT *)malloc(
sizeof(indexT) * dict.size());
166 indices[i] = (indexT *)malloc(
sizeof(indexT) * numIndices);
167 }
catch (std::bad_alloc &e) {
168 std::cerr <<
"Error: Could not allocate memory for the matrix"
174 valueSizes[i] = dict.size();
175 indexSizes[i] = numIndices;
176 size_t performanceVecSize = 0;
177 size_t indexSize = 0;
181 for (
auto &pair : dict) {
182 values[i][performanceVecSize] = pair.first;
183 counts[i][performanceVecSize] = pair.second.size();
185 for (uint32_t j = 0; j < pair.second.size(); j++) {
186 indices[i][indexSize] = pair.second[j];
189 performanceVecSize++;