Int the first workshop for the DPS915 course(Parallel Programming Fundamentals) we had to profile a simple application.
I wrote a previous blog post listing the steps to profile an application on osx.

The application we had to profile was:

 
 // Profile a Serial Application – Workshop 1  
 // w1.cpp

 #include   
 #include   
 #include   
 #include   
 using namespace std;

 void init(float** a, int n) {  
 float f = 1.0f / RAND_MAX;  
 for (int i = 0; i < n; i++)  
 for (int j = 0; j < n; j++)  
 a[i][j] = rand() * f;  
 }

 void add(float** a, float** b, float** c, int n) {  
 for (int i = 0; i < n; i++)  
 for (int j = 0; j < n; j++)  
 c[i][j] = a[i][j] + 3.0f * b[i][j];  
 }

 void multiply(float** a, float** b, float** c, int n) {  
 for (int i = 0; i < n; i++)  
 for (int j = 0; j < n; j++) {  
 float sum = 0.0f;  
 for (int k = 0; k < n; k++)  
 sum += a[i][k] * b[k][j];  
 c[i][j] = sum;  
 }  
 }

 int main(int argc, char* argv[]) {  
 // start timing  
 time_t ts, te;  
 ts = time(nullptr);

 // interpret command-line arguments  
 if (argc != 3) {  
 cerr << "**invalid number of arguments**" << endl;  
 return 1;  
 }  
 int n = atoi(argv[1]); // size of matrices  
 int nr = atoi(argv[2]); // number of runs

 float** a = new float*[n];  
 for (int i = 0; i < n; i++)  
 a[i] = new float[n];  
 float** b = new float*[n];  
 for (int i = 0; i < n; i++)  
 b[i] = new float[n];  
 float** c = new float*[n];  
 for (int i = 0; i < n; i++)  
 c[i] = new float[n];  
 srand(time(nullptr));  
 init(a, n);  
 init(b, n);

 for (int i = 0; i < nr; i++) {  
 add(a, b, c, n);  
 multiply(a, b, c, n);  
 }

 for (int i = 0; i < n; i++)  
 delete [] a[i];  
 delete [] a;  
 for (int i = 0; i < n; i++)  
 delete [] b[i];  
 delete [] b;  
 for (int i = 0; i < n; i++)  
 delete [] c[i];  
 delete [] c;

 // elapsed time  
 te = time(nullptr);  
 cout << setprecision(0);  
 cout << "Elapsed time : " << difftime(te, ts) << endl;  
 }  

We had to run the application with 12 different combinations to see how much time the program spent executing the “add” and “multiply” functions.

Here is the profile results:

To easy the process of generating the profile data, I create a bash script to automate the runs:

 
 #!/bin/bash

# First Set  
 N[0]=80  
 NR[0]=50

N[1]=160  
 NR[1]=50

N[2]=320  
 NR[2]=50

# Second Set  
 N[3]=80  
 NR[3]=100

N[4]=160  
 NR[4]=100

N[5]=320  
 NR[5]=100

# Third Set  
 N[6]=80  
 NR[6]=200

N[7]=160  
 NR[7]=200

N[8]=320  
 NR[8]=200

# Fourth Set  
 N[9]=80  
 NR[9]=400

N[10]=160  
 NR[10]=400

N[11]=320  
 NR[11]=400

if [ $(uname) = "Darwin" ]  
 then  
 OS="mac"  
 CC="g++-4.7"  
 else  
 OS="linux"  
 CC="g++"  
 fi

echo "OS $OS"

OPTIONS="-std=c++0x -O2 -g -pg"  
 OBJ="w1"  
 SRC="w1.cpp"

INSTRUMENT_TEMPLATE="/Applications/Xcode.app/Contents/Applications/Instruments.app/Contents/Resources/templates/Time Profiler.tracetemplate"  
 #compile workshop  
 $CC $OPTIONS -o $OBJ $SRC

#generate profile info  
 for i in {0..11}  
 do  
 echo "Running ${i}th set"  
 if [ $OS = "mac" ]  
 then  
 echo "Running on MacOS"  
 instruments -t "$INSTRUMENT_TEMPLATE" -D results/mac/"${N[$i]}x${NR[$i]}.log" $OBJ ${N[$i]} ${NR[$i]}  
 else  
 echo "Running some linux distro."  
 ./$OBJ ${N[$i]} ${NR[$i]}  
 gprof -p $OBJ > "results/linux/${N[$i]}x${NR[$i]}.log"  
 fi  
 done  

The script works both on mac and linux.
If it’s running on a mac, it uses the Instruments Time Profiler, on a linux distro it uses gprof.

I’m committing all my course work to github

Any suggestions are more than welcome :)