Skip to content

Commit 027530d

Browse files
committed
feat: Parallelize masked sparse matrix-matrix product with pthreads
1 parent 80bc753 commit 027530d

File tree

5 files changed

+283
-6
lines changed

5 files changed

+283
-6
lines changed

CMakeLists.txt

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ option(ENABLE_OPENMP "Enable OpenMP support?" OFF)
1212

1313
add_library(coo2csc OBJECT coo2csc.c coo2csc.h)
1414
add_library(mmio OBJECT mmio.h mmio.c)
15-
add_library(graph OBJECT Graph.cpp Graph.hpp)
1615

1716
find_package(PkgConfig)
1817

@@ -24,18 +23,25 @@ if (ENABLE_OPENCILK)
2423
include(FindOpenCilk.cmake)
2524
endif ()
2625

26+
if (ENABLE_PTHREADS)
27+
include(FindPthreads.cmake)
28+
endif ()
29+
2730
if (ENABLE_OPENMP)
2831
set(CMAKE_C_COMPILER gcc)
2932
set(CMAKE_CXX_COMPILER g++)
3033
add_executable(triangle_v3_openmp triangle_v3_openmp.cpp)
34+
add_executable(triangle_v4_openmp triangle_v4_openmp.cpp)
3135
find_package(OpenMP)
3236
if (OpenMP_CXX_FOUND)
3337
target_link_libraries(triangle_v3_openmp PUBLIC OpenMP::OpenMP_CXX mmio coo2csc)
38+
target_link_libraries(triangle_v4_openmp PUBLIC OpenMP::OpenMP_CXX mmio coo2csc)
39+
add_compile_options(-ftree-vectorize -O3)
3440
endif ()
3541
endif ()
3642

37-
add_compile_options(-g)
43+
add_compile_options(-O3)
3844
add_executable(triangle_v3_serial triangle_v3_serial.cpp)
3945
target_link_libraries(triangle_v3_serial PUBLIC mmio coo2csc)
4046
add_executable(triangle_v4_serial triangle_v4_serial.cpp)
41-
target_link_libraries(triangle_v4_serial PUBLIC mmio coo2csc graph)
47+
target_link_libraries(triangle_v4_serial PUBLIC mmio coo2csc)

FindPthreads.cmake

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#.rst:
2+
# FindPthreads
3+
# --------
4+
# Finds the Pthreads library
5+
#
6+
# This will define the following variables::
7+
#
8+
# OPEN_CILK_FOUND - system has OPEN_CILK
9+
# OPEN_CILK_INCLUDE_DIRS - the OPEN_CILK include directories
10+
# OPEN_CILK_LIBRARIES - the OPEN_CILK libraries
11+
12+
#find_library(OPEN_CILK_LIBRARY NAMES libopencilk cilks
13+
# PATHS /usr/local/OpenCilk-9.0.1-Linux/lib/clang/9.0.1/lib/x86_64-unknown-linux-gnu)
14+
15+
16+
set(THREADS_PREFER_PTHREAD_FLAG ON)
17+
find_package(Threads REQUIRED)
18+
add_library(Graph OBJECT Graph.cpp Graph.hpp)
19+
add_executable(triangle_v4_pthreads triangle_v4_pthreads.cpp)
20+
target_link_libraries(triangle_v4_pthreads PUBLIC Threads::Threads mmio coo2csc Graph)
21+
add_compile_options(-O3)

Graph.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#include "Graph.hpp"
2+
3+
void *Graph::adjMatMul() {
4+
for (int i = this->start; i < this->end; ++i) {
5+
for (int j = 0; j < this->cscColumn.at(i + 1) - this->cscColumn.at(i); ++j) {
6+
int a_row = this->cscRow.at(cscColumn.at(i) + j);
7+
int a_col = i;
8+
9+
std::vector<uint32_t> k = std::vector<uint32_t>(this->cscColumn.at(a_row + 1) - this->cscColumn.at(a_row));
10+
std::vector<uint32_t> l = std::vector<uint32_t>(this->cscColumn.at(a_col + 1) - this->cscColumn.at(a_col));
11+
12+
int s;
13+
for (s = 0; s < k.size(); ++s) {
14+
k[s] = this->cscRow.at(this->cscColumn.at(a_row) + s);
15+
}
16+
for (s = 0; s < l.size(); ++s) {
17+
l[s] = this->cscRow.at(this->cscColumn.at(a_col) + s);
18+
}
19+
20+
int m = 0;
21+
int n = 0;
22+
int mul_value = 0;
23+
24+
while (m != k.size() && n != l.size()) {
25+
if (k[m] == l[n]) {
26+
++mul_value;
27+
++m;
28+
++n;
29+
} else if (k[m] > l[n]) {
30+
++n;
31+
} else {
32+
++m;
33+
}
34+
}
35+
36+
if (mul_value) {
37+
c_values.at(this->cscColumn.at(i) + j) = mul_value;
38+
}
39+
}
40+
}
41+
}
42+
43+
Graph::Graph(std::vector<uint32_t> cscRow, std::vector<uint32_t> cscColumn,
44+
std::vector<uint32_t> c_values, int start, int end, int nnz, int id) {
45+
this->cscRow = cscRow;
46+
this->cscColumn = cscColumn;
47+
this->c_values = c_values;
48+
this->nnz = nnz;
49+
this->start = start;
50+
this->end = end;
51+
this->id = id;
52+
}
53+
54+

Graph.hpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#ifndef VERTEXWISE_TRIANGLE_COUNTING_GRAPH_H
2+
#define VERTEXWISE_TRIANGLE_COUNTING_GRAPH_H
3+
4+
#include <iostream>
5+
#include <vector>
6+
7+
class Graph {
8+
public:
9+
10+
std::vector<uint32_t> cscRow;
11+
std::vector<uint32_t> cscColumn;
12+
std::vector<uint32_t> c_values;
13+
int nnz;
14+
int start;
15+
int end;
16+
int id;
17+
void* adjMatMul();
18+
19+
Graph( std::vector<uint32_t> cscRow,std::vector<uint32_t> cscColumn,
20+
std::vector<uint32_t> c_values, int start, int end, int nnz, int id);
21+
Graph()= default;
22+
~Graph()=default;
23+
static void *statAdjMatMul(void *context){
24+
return ((Graph *)context)->adjMatMul();
25+
}
26+
};
27+
28+
29+
30+
#endif //VERTEXWISE_TRIANGLE_COUNTING_GRAPH_H

triangle_v4_pthreads.cpp

Lines changed: 169 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,170 @@
1-
//
2-
// Created by pkarakal on 12/6/20.
3-
//
1+
#include <iostream>
2+
#include <cstdlib>
3+
#include <chrono>
4+
#include <vector>
5+
#include <pthread.h>
6+
#include <algorithm>
7+
#include "Graph.hpp"
48

9+
extern "C" {
10+
#include "mmio.h"
11+
#include "coo2csc.h"
12+
}
13+
14+
15+
int main(int argc, char** argv){
16+
int ret_code;
17+
MM_typecode matcode;
18+
FILE *f;
19+
int M, N, nnz;
20+
uint32_t i;
21+
std::vector<uint32_t> I, J;
22+
std::vector<double> val;
23+
24+
if (argc < 2)
25+
{
26+
fprintf(stderr, "Usage: %s [martix-market-filename] [0 for binary or 1 for non binary]\n", argv[0]);
27+
exit(1);
28+
}
29+
else
30+
{
31+
if ((f = fopen(argv[1], "r")) == nullptr)
32+
exit(1);
33+
}
34+
35+
if (mm_read_banner(f, &matcode) != 0)
36+
{
37+
printf("Could not process Matrix Market banner.\n");
38+
exit(1);
39+
}
40+
41+
int threads{};
42+
if(argc == 3){
43+
threads= atoi(argv[2]);
44+
} else {
45+
threads = 4;
46+
}
47+
48+
49+
/* This is how one can screen matrix types if their application */
50+
/* only supports a subset of the Matrix Market data types. */
51+
52+
if (mm_is_complex(matcode) && mm_is_matrix(matcode) &&
53+
mm_is_sparse(matcode) )
54+
{
55+
printf("Sorry, this application does not support ");
56+
printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode));
57+
exit(1);
58+
}
59+
60+
/* find out size of sparse matrix .... */
61+
62+
if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nnz)) != 0)
63+
exit(1);
64+
65+
66+
I = std::vector<uint32_t>(nnz);
67+
J = std::vector<uint32_t>(nnz);
68+
val = std::vector<double>(nnz);
69+
70+
std::vector<uint32_t> cscRow = std::vector<uint32_t>(2*nnz);
71+
std::vector<uint32_t> cscColumn = std::vector<uint32_t>(N+1);
72+
std::vector<uint32_t> c_values = std::vector<uint32_t>(0);
73+
74+
/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */
75+
/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
76+
/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */
77+
78+
/* Replace missing val column with 1s and change the fscanf to match pattern matrices*/
79+
80+
if (!mm_is_pattern(matcode)) {
81+
for (i = 0; i < nnz; i++) {
82+
fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i]);
83+
I[i]--; /* adjust from 1-based to 0-based */
84+
J[i]--;
85+
}
86+
} else {
87+
for (i = 0; i < nnz; i++) {
88+
fscanf(f, "%d %d\n", &I[i], &J[i]);
89+
val[i] = 1;
90+
I[i]--; /* adjust from 1-based to 0-based */
91+
J[i]--;
92+
}
93+
}
94+
95+
if (f !=stdin) fclose(f);
96+
97+
if(M != N) {
98+
printf("COO matrix' columns and rows are not the same");
99+
}
100+
101+
// create symmetric values
102+
std::vector<uint32_t> temp1 = std::vector<uint32_t>(I.begin(), I.end());
103+
I.insert(std::end(I), std::begin(J), std::end(J));
104+
J.insert(std::end(J), std::begin(temp1), std::end(temp1));
105+
temp1.clear();
106+
107+
if (I[0] < J[0]) {
108+
coo2csc(&cscRow[0], &cscColumn[0], &I[0], &J[0], 2 * nnz, M, 0);
109+
} else {
110+
coo2csc(&cscRow[0], &cscColumn[0], &J[0], &I[0], 2 * nnz, N, 0);
111+
}
112+
113+
std::sort(cscColumn.begin(), cscColumn.end());
114+
115+
std::vector<int>c3 = std::vector<int>(0);
116+
std::vector<int>ones = std::vector<int>(N, 1);
117+
std::vector<int>result_vector = std::vector<int>(N, 0);
118+
c_values = std::vector<uint32_t>(2*nnz);
119+
120+
auto start = std::chrono::high_resolution_clock::now();
121+
122+
Graph graphs[threads];
123+
124+
pthread_t *pthreads;
125+
pthreads = (pthread_t *)malloc(threads*sizeof(pthread_t));
126+
127+
int chunk = 1;
128+
if(threads > 0) {
129+
chunk = N / (threads);
130+
}
131+
132+
for(i = 0; i < threads-1; i++) {
133+
graphs[i] = Graph(cscRow, cscColumn, c_values, i*chunk, (i+1)*chunk, nnz, i);
134+
pthread_create(&pthreads[i], NULL, Graph::statAdjMatMul, &graphs[i]);
135+
}
136+
// The last thread is left out so as to calculate the mod of the chunk division!
137+
138+
graphs[threads -1] = Graph(cscRow,cscColumn, c_values, (threads-1)*chunk, (threads)*chunk + (N%threads), nnz, threads -1 );
139+
140+
pthread_create(&pthreads[threads - 1], NULL, Graph::statAdjMatMul, &graphs[threads - 1]);
141+
142+
for(i = 0; i < threads; i++) {
143+
pthread_join(pthreads[i], NULL);
144+
}
145+
146+
for(i = 0; i < N; i++) {
147+
for(int j = 0; j < cscColumn.at(i+1) - cscColumn.at(i); j++) {
148+
int row = cscRow.at(cscColumn.at(i) + j);
149+
int col = i;
150+
int value = c_values.at(cscColumn.at(i) + j);
151+
result_vector.at(row) += value * ones.at(col);
152+
}
153+
}
154+
155+
for(int res: result_vector){
156+
c3.push_back(res/2);
157+
}
158+
159+
auto stop = std::chrono::high_resolution_clock::now();
160+
161+
std::chrono::duration<double> elapsed = stop - start;
162+
std::cout<<"Took "<< elapsed.count() <<std::endl;
163+
164+
// for(int item: c3){
165+
// std::cout<< item << " ";
166+
// }
167+
std::cout<<std::endl;
168+
169+
return 0;
170+
}

0 commit comments

Comments
 (0)