= mri_ca_register timing info =

 * tests conducted by NJS from 17-20 march 2012
 * using subject 'ernie'
 * using 'dev' build(s)
 * commandline:
{{{
mri_ca_register \
  -nobigventricles \
  -T transforms/talairach.lta \
  -align-after \
  -mask brainmask.mgz \
  norm.mgz \
  /autofs/cluster/freesurfer/centos6_x86_64/dev/average/RB_all_2008-03-26.gca \
  transforms/talairach.m3z
}}}
 * the opteron was a 'seychelles' cluster node (node0355), running CentOS4.8
 * the 2.66GHz intel was machine 'namic', running Centos6.2
 * the 3GHz intel was a 'launchpad' cluster node, running Centos5
 * the 3.3GHz intel was machine 'monster', which has 8 processors, running Centos6.0

||'''processor'''||'''gcc v'''||'''flags'''||'''OMP threads'''||'''mri_ca_register runtime'''||
||2GHz AMD Opteron 246||3.4.6||-O3 -msse2 -mfpmath=sse||NA||12 hours, 46 minutes||
||2.66GHz Intel Xeon E5430 (Core)||3.4.6||-O3 -msse2 -mfpmath=sse||NA|| 6 hours, 3 minutes||
||3GHz Intel Xeon E5472 (Core)||3.4.6||-O3 -msse2 -mfpmath=sse||NA|| 5 hours, 46 minutes||
||3.3GHz Intel Xeon W5590 (Nehalem)||3.4.6||-O3 -msse2 -mfpmath=sse||NA||3 hours, 8 minutes||
||3.3GHz Intel Xeon W5590 (Nehalem)||4.1.2||-O3 -msse2 -mfpmath=sse||NA||3 hours, 10 minutes||
||3.3GHz Intel Xeon W5590 (Nehalem)||4.4.5||-O3 -msse2 -mfpmath=sse||NA||1 hours, 56 minutes||
||3.3GHz Intel Xeon W5590 (Nehalem)||4.4.5||-fopenmp -O3 -ftree-vectorize -msse4.1 -mfpmath=sse||1||1 hours, 58 minutes||
||3.3GHz Intel Xeon W5590 (Nehalem)||4.4.5||-fopenmp -O3 -ftree-vectorize -msse4.1 -mfpmath=sse||2||1 hours, 14 minutes||
||3.3GHz Intel Xeon W5590 (Nehalem)||4.4.5||-fopenmp -O3 -ftree-vectorize -msse4.1 -mfpmath=sse||3||0 hours, 57 minutes||
||3.3GHz Intel Xeon W5590 (Nehalem)||4.4.5||-fopenmp -O3 -ftree-vectorize -msse4.1 -mfpmath=sse||4||0 hours, 50 minutes||
||3.3GHz Intel Xeon W5590 (Nehalem)||4.4.5||-fopenmp -O3 -ftree-vectorize -msse4.1 -mfpmath=sse||5||0 hours, 44 minutes||
||3.3GHz Intel Xeon W5590 (Nehalem)||4.4.5||-fopenmp -O3 -ftree-vectorize -msse4.1 -mfpmath=sse||6||0 hours, 41 minutes||
||3.3GHz Intel Xeon W5590 (Nehalem)||4.4.5||-fopenmp -O3 -ftree-vectorize -msse4.1 -mfpmath=sse||7||0 hours, 40 minutes||
||3.3GHz Intel Xeon W5590 (Nehalem)||4.4.5||-fopenmp -O3 -ftree-vectorize -msse4.1 -mfpmath=sse||8||0 hours, 38 minutes||
||GPU: Tesla C2050|| || || ||0 hours, 19 minutes||

== observations ==
 * asegstatsdiff comparisons show minimal differences in results
 * nehalem architecture makes a difference (compared to amd opteron 200 series)
 * gcc 4.4.5 alone drops 1 hour of time
 * -ftree-vectorize -msse4.1 flags does not make any difference over -msse2
 * omp threads plot:
{{attachment:runtimes.jpg}}