// Suzaku Workpool pattern version 2 -- Application: Matrix Multiplication // B. Wilkinson March 23, 2016 #include #include "suzaku.h" #define N 4 // size of matrices #define T N * N // required Suzaku constant, number of tasks, max = INT_MAX - 1 double A[N][N], B[N][N], C[N][N], D[N][N]; // required workpool functions void init(int *tasks) { *tasks = T; return; } void diffuse(int taskID) { // uses same approach as Seeds sample but inefficient copying arrays // taskID used in computation int i; int a, b; double rowA[N],colB[N]; a = taskID / N; // row b = taskID % N; // column for (i = 0; i < N; i++) { rowA[i] = A[a][i]; // copy row of A. Strictly do not need to as can do SZ_Put("rowA",A[a]); // but will be needed in block multiplication colB[i] = B[i][b]; // copy one column of B into output } SZ_Put("rowA",rowA); SZ_Put("colB",colB); return; } void compute(int taskID) { int i; double out; double rowA[N],colB[N]; SZ_Get("rowA",rowA); SZ_Get("colB",colB); out = 0; for (i = 0; i < N; i++) { out += rowA[i] * colB[i]; } SZ_Put("out",&out); return; } void gather(int taskID) { int a,b; double out; SZ_Get("out",&out); a = taskID / N; b = taskID % N; C[a][b]= out; return; } // additional routines void print_array(double array[N][N]) { // print out an array int i,j; for (i = 0; i < N; i++){ printf("\n"); for(j = 0; j < N; j++) { printf("%5.2f ", array[i][j]); } } printf("\n"); return; } int main(int argc, char *argv[]) { // All variables declared here are in every process int p; // number of processes, set by SZ_Init() int i,j,k; double sum; double time1, time2; // for timing in master SZ_Init(p); // initialize MPI environment, sets P to number of processes for (i = 0; i < N; i++) { // set some initial values for A and B for (j = 0; j < N; j++) { A[i][j] = i + j*N; B[i][j] = j + i*N; } } // sequential matrix multiplication, answer in D time1 = SZ_Wtime(); //start time measurement for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { sum = 0; for (k=0; k < N; k++) { sum += A[i][k]*B[k][j]; } D[i][j] = sum; } } time2 = SZ_Wtime(); //end time measurement printf("Time of sequential computation: %f seconds\n", time2-time1); time1 = SZ_Wtime(); // record time stamp SZ_Parallel_begin // start of parallel section SZ_Workpool2(init,diffuse,compute,gather); // workpool matrix multiplication,answer in C SZ_Parallel_end; // end of parallel time2 = SZ_Wtime(); // record time stamp printf("Time of parallel computation: %f seconds\n", time2-time1); printf("Array A"); print_array(A); printf("Array B"); print_array(B); printf("Array C"); print_array(C); // check sequential and parallel versions give same answers int error = 0; for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { if (C[i][j] != D[i][j]) error = -1; } } if (error == -1) printf("ERROR, sequential and parallel versions give different answers\n"); else printf("Sequential and parallel versions give same answers\n"); SZ_Finalize(); return 0; }