numactl --interleave=all ./testing_dgetrf -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.6.0  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_dgetrf [options] [-h|--help]

ngpu 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   |PA-LU|/(N*|A|)
=========================================================================
  100   100     ---   (  ---  )      0.60 (   0.00)     ---   
 1000  1000     ---   (  ---  )     58.50 (   0.01)     ---   
   10    10     ---   (  ---  )      0.03 (   0.00)     ---   
   20    20     ---   (  ---  )      0.21 (   0.00)     ---   
   30    30     ---   (  ---  )      0.00 (   0.02)     ---   
   40    40     ---   (  ---  )      0.89 (   0.00)     ---   
   50    50     ---   (  ---  )      1.29 (   0.00)     ---   
   60    60     ---   (  ---  )      1.82 (   0.00)     ---   
   70    70     ---   (  ---  )      1.71 (   0.00)     ---   
   80    80     ---   (  ---  )      2.47 (   0.00)     ---   
   90    90     ---   (  ---  )      2.87 (   0.00)     ---   
  100   100     ---   (  ---  )      3.62 (   0.00)     ---   
  200   200     ---   (  ---  )      4.03 (   0.00)     ---   
  300   300     ---   (  ---  )      8.82 (   0.00)     ---   
  400   400     ---   (  ---  )     14.86 (   0.00)     ---   
  500   500     ---   (  ---  )     21.87 (   0.00)     ---   
  600   600     ---   (  ---  )     29.41 (   0.00)     ---   
  700   700     ---   (  ---  )     37.04 (   0.01)     ---   
  800   800     ---   (  ---  )     45.54 (   0.01)     ---   
  900   900     ---   (  ---  )     52.82 (   0.01)     ---   
 1000  1000     ---   (  ---  )     62.48 (   0.01)     ---   
 2000  2000     ---   (  ---  )    154.80 (   0.03)     ---   
 3000  3000     ---   (  ---  )    251.08 (   0.07)     ---   
 4000  4000     ---   (  ---  )    320.89 (   0.13)     ---   
 5000  5000     ---   (  ---  )    410.68 (   0.20)     ---   
 6000  6000     ---   (  ---  )    483.75 (   0.30)     ---   
 7000  7000     ---   (  ---  )    575.18 (   0.40)     ---   
 8000  8000     ---   (  ---  )    638.29 (   0.53)     ---   
 9000  9000     ---   (  ---  )    677.18 (   0.72)     ---   
10000 10000     ---   (  ---  )    724.13 (   0.92)     ---   
12000 12000     ---   (  ---  )    800.38 (   1.44)     ---   
14000 14000     ---   (  ---  )    848.85 (   2.15)     ---   
16000 16000     ---   (  ---  )    888.31 (   3.07)     ---   
18000 18000     ---   (  ---  )    912.50 (   4.26)     ---   
20000 20000     ---   (  ---  )    940.37 (   5.67)     ---   

numactl --interleave=all ./testing_dgetrf_gpu -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.6.0  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_dgetrf_gpu [options] [-h|--help]

    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   |PA-LU|/(N*|A|)
=========================================================================
  100   100     ---   (  ---  )      0.55 (   0.00)     ---  
 1000  1000     ---   (  ---  )     62.50 (   0.01)     ---  
   10    10     ---   (  ---  )      0.01 (   0.00)     ---  
   20    20     ---   (  ---  )      0.10 (   0.00)     ---  
   30    30     ---   (  ---  )      0.21 (   0.00)     ---  
   40    40     ---   (  ---  )      0.46 (   0.00)     ---  
   50    50     ---   (  ---  )      0.76 (   0.00)     ---  
   60    60     ---   (  ---  )      1.14 (   0.00)     ---  
   70    70     ---   (  ---  )      1.23 (   0.00)     ---  
   80    80     ---   (  ---  )      1.69 (   0.00)     ---  
   90    90     ---   (  ---  )      2.04 (   0.00)     ---  
  100   100     ---   (  ---  )      2.50 (   0.00)     ---  
  200   200     ---   (  ---  )      2.67 (   0.00)     ---  
  300   300     ---   (  ---  )      6.80 (   0.00)     ---  
  400   400     ---   (  ---  )     12.39 (   0.00)     ---  
  500   500     ---   (  ---  )     19.27 (   0.00)     ---  
  600   600     ---   (  ---  )     27.91 (   0.01)     ---  
  700   700     ---   (  ---  )     37.40 (   0.01)     ---  
  800   800     ---   (  ---  )     47.21 (   0.01)     ---  
  900   900     ---   (  ---  )     57.21 (   0.01)     ---  
 1000  1000     ---   (  ---  )     67.08 (   0.01)     ---  
 2000  2000     ---   (  ---  )    184.44 (   0.03)     ---  
 3000  3000     ---   (  ---  )    315.04 (   0.06)     ---  
 4000  4000     ---   (  ---  )    397.68 (   0.11)     ---  
 5000  5000     ---   (  ---  )    515.53 (   0.16)     ---  
 6000  6000     ---   (  ---  )    632.60 (   0.23)     ---  
 7000  7000     ---   (  ---  )    708.18 (   0.32)     ---  
 8000  8000     ---   (  ---  )    783.29 (   0.44)     ---  
 9000  9000     ---   (  ---  )    801.62 (   0.61)     ---  
10000 10000     ---   (  ---  )    847.98 (   0.79)     ---  
12000 12000     ---   (  ---  )    910.15 (   1.27)     ---  
14000 14000     ---   (  ---  )    970.27 (   1.89)     ---  
16000 16000     ---   (  ---  )   1003.93 (   2.72)     ---  
18000 18000     ---   (  ---  )   1025.04 (   3.79)     ---  
20000 20000     ---   (  ---  )   1047.88 (   5.09)     ---  
