<p><b>dwj07@fsu.edu</b> 2011-10-13 11:29:57 -0600 (Thu, 13 Oct 2011)</p><p><br>
        -- BRANCH COMMIT --<br>
<br>
        Finializing module_timer, and updating a few of the timer calls to reflect the new options.<br>
<br>
        Also updating the makefile to better handle the PAPI libraries.<br>
        Added three new options on the trunk's makefile.<br>
                ifort-serial<br>
                ifort-papi<br>
                ifort-papi-serial<br>
<br>
        ifort-serial using ifort without any mpi calls and system clock timers<br>
        ifort-papi uses ifort with mpi calls and the papi timers<br>
        ifort-papi-serial uses ifort without mpi calls with the papi timers<br>
<br>
        standard ifort uses mpi calls with mpi timers<br>
</p><hr noshade><pre><font color="gray">Modified: branches/ocean_projects/performance/Makefile
===================================================================
--- branches/ocean_projects/performance/Makefile        2011-10-12 23:39:32 UTC (rev 1070)
+++ branches/ocean_projects/performance/Makefile        2011-10-13 17:29:57 UTC (rev 1071)
@@ -102,11 +102,25 @@
         &quot;CFLAGS = -O3 -m64&quot; \
         &quot;LDFLAGS = -O3&quot; \
         &quot;CORE = $(CORE)&quot; \
-        &quot;CPPFLAGS = -DRKIND=8 $(MODEL_FORMULATION) -D_PAPI -D_MPI -DUNDERSCORE -m64 $(FILE_OFFSET) $(ZOLTAN_DEFINE)&quot; )
+        &quot;CPPFLAGS = -DRKIND=8 $(MODEL_FORMULATION) -D_PAPI -D_MPI -DUNDERSCORE -m64 $(FILE_OFFSET) $(ZOLTAN_DEFINE)&quot; \
+        &quot;PAPILIBS = -L$(PAPI)/lib -lpapi&quot; )
 
+ifort-papi-serial:
+        ( make all \
+        &quot;FC = ifort&quot; \
+        &quot;CC = gcc&quot; \
+        &quot;SFC = ifort&quot; \
+        &quot;SCC = gcc&quot; \
+        &quot;FFLAGS = -real-size 64 -O3 -convert big_endian -FR&quot; \
+        &quot;CFLAGS = -O3 -m64&quot; \
+        &quot;LDFLAGS = -O3&quot; \
+        &quot;CORE = $(CORE)&quot; \
+        &quot;CPPFLAGS = -DRKIND=8 $(MODEL_FORMULATION) -D_PAPI -DUNDERSCORE -m64 $(FILE_OFFSET) $(ZOLTAN_DEFINE)&quot; \
+        &quot;PAPILIBS = -L$(PAPI)/lib -lpapi&quot; )
+
 ifort:
         ( make all \
-        &quot;FC = mpif90&quot; \
+        &quot;FC = ifort&quot; \
         &quot;CC = gcc&quot; \
         &quot;SFC = ifort&quot; \
         &quot;SCC = gcc&quot; \
@@ -114,7 +128,7 @@
         &quot;CFLAGS = -O3 -m64&quot; \
         &quot;LDFLAGS = -O3&quot; \
         &quot;CORE = $(CORE)&quot; \
-        &quot;CPPFLAGS = -DRKIND=8 $(MODEL_FORMULATION) -D_MPI -DUNDERSCORE -m64 $(FILE_OFFSET) $(ZOLTAN_DEFINE)&quot; )
+        &quot;CPPFLAGS = -DRKIND=8 $(MODEL_FORMULATION) -DUNDERSCORE -m64 $(FILE_OFFSET) $(ZOLTAN_DEFINE)&quot; )
 
 gfortran:
         ( make all \
@@ -167,7 +181,7 @@
 
 CPPINCLUDES = -I../inc -I$(NETCDF)/include -I$(PAPI)/include
 FCINCLUDES = -I../inc -I$(NETCDF)/include -I$(PAPI)/include
-LIBS = -L$(NETCDF)/lib -lnetcdf -L$(PAPI)/lib -lpapi
+LIBS = -L$(NETCDF)/lib -lnetcdf $(PAPILIBS)
 
 RM = rm -f
 CPP = cpp -C -P -traditional

Modified: branches/ocean_projects/performance/src/core_ocean/mpas_ocn_mpas_core.F
===================================================================
--- branches/ocean_projects/performance/src/core_ocean/mpas_ocn_mpas_core.F        2011-10-12 23:39:32 UTC (rev 1070)
+++ branches/ocean_projects/performance/src/core_ocean/mpas_ocn_mpas_core.F        2011-10-13 17:29:57 UTC (rev 1071)
@@ -34,6 +34,8 @@
    integer, parameter :: restartAlarmID = 2
    integer, parameter :: statsAlarmID = 3
 
+   type (timer_node), pointer :: submod_inits, vlev_init, core_init, diag_init
+
    contains
 
    subroutine mpas_core_init(domain, startTimeStamp)!{{{
@@ -53,6 +55,7 @@
       integer :: err
 
       ! Initialize submodules before initializing blocks.
+      call timer_start(&quot;submodule init&quot;, .false., submod_inits)
       call ocn_timestep_init(err)
 
       call ocn_vel_pressure_grad_init(err)
@@ -68,7 +71,9 @@
       call ocn_vmix_init(err)
 
       call ocn_equation_of_state_init(err)
+      call timer_stop(&quot;submodule init&quot;, submod_inits)
 
+      call timer_start(&quot;vertical level init&quot;, .false., vlev_init)
       if (.not. config_do_restart) call setup_sw_test_case(domain)
 
       call compute_maxLevel(domain)
@@ -90,10 +95,12 @@
            ' config_new_btr_variables_from==last_subcycle'
          call dmpar_abort(dminfo)
       endif
+      call timer_stop(&quot;vertical level init&quot;, vlev_init)
 
       !
       ! Initialize core
       !
+      call timer_start(&quot;init core&quot;, .false., core_init)
       dt = config_dt
 
       call simulation_clock_init(domain, dt, startTimeStamp)
@@ -120,6 +127,7 @@
 
       restart_frame = 1
       current_outfile_frames = 0
+      call timer_stop(&quot;init core&quot;, core_init)
 
    end subroutine mpas_core_init!}}}
 
@@ -195,9 +203,9 @@
       integer :: i, iEdge, iCell, k
    
    
-      call timer_start(&quot;init diagnostics&quot;)
+      call timer_start(&quot;init diagnostics&quot;, .false., diag_init)
       call ocn_diagnostic_solve(dt, block % state % time_levs(1) % state, mesh)
-      call timer_stop(&quot;init diagnostics&quot;)
+      call timer_stop(&quot;init diagnostics&quot;, diag_init)
 
       call compute_mesh_scaling(mesh)
  

Modified: branches/ocean_projects/performance/src/core_ocean/mpas_ocn_tendency.F
===================================================================
--- branches/ocean_projects/performance/src/core_ocean/mpas_ocn_tendency.F        2011-10-12 23:39:32 UTC (rev 1070)
+++ branches/ocean_projects/performance/src/core_ocean/mpas_ocn_tendency.F        2011-10-13 17:29:57 UTC (rev 1071)
@@ -67,7 +67,9 @@
    !
    !--------------------------------------------------------------------
 
+   type (timer_node), pointer :: tracer_tend_hadv_timer
 
+
 !***********************************************************************
 
 contains
@@ -491,9 +493,9 @@
       ! and then change maxLevelEdgeTop to maxLevelEdgeBot in the following section.
       ! tracer_edge at the boundary will also need to be defined for flux boundaries.
 
-      call timer_start(&quot;tracer tend horiz adv&quot;)
+      call timer_start(&quot;tracer tend horiz adv&quot;, .false., tracer_tend_hadv_timer)
       call ocn_tracer_hadv_tend(grid, u, h_edge, tracers, tend_tr, err)
-      call timer_stop(&quot;tracer tend horiz adv&quot;)
+      call timer_stop(&quot;tracer tend horiz adv&quot;, tracer_tend_hadv_timer)
 
       !
       ! tracer tendency: vertical advection term -d/dz( h \phi w)

Modified: branches/ocean_projects/performance/src/core_ocean/mpas_ocn_thick_hadv.F
===================================================================
--- branches/ocean_projects/performance/src/core_ocean/mpas_ocn_thick_hadv.F        2011-10-12 23:39:32 UTC (rev 1070)
+++ branches/ocean_projects/performance/src/core_ocean/mpas_ocn_thick_hadv.F        2011-10-13 17:29:57 UTC (rev 1071)
@@ -107,7 +107,7 @@
       integer, dimension(:), pointer :: maxLevelEdgeTop
       integer, dimension(:,:), pointer :: cellsOnEdge
 
-      real (kind=RKIND) :: flux
+      real (kind=RKIND) :: flux, invArea1, invArea2
       real (kind=RKIND), dimension(:), pointer :: dvEdge, areaCell
 
       !-----------------------------------------------------------------
@@ -134,32 +134,29 @@
          do iEdge=1,nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
+
+            invArea1 = 1.0 / areaCell(cell1)
+            invArea2 = 1.0 / areaCell(cell2)
             do k=1,nVertLevels
                flux = u(k,iEdge) * dvEdge(iEdge) * h_edge(k,iEdge)
-               tend(k,cell1) = tend(k,cell1) - flux
-               tend(k,cell2) = tend(k,cell2) + flux
+               tend(k,cell1) = tend(k,cell1) - flux*invArea1
+               tend(k,cell2) = tend(k,cell2) + flux*invArea2
             end do
          end do
-         do iCell=1,nCells
-            do k=1,nVertLevels
-               tend(k,iCell) = tend(k,iCell) / areaCell(iCell)
-            end do
-         end do
-
       elseif (config_vert_grid_type.eq.'zlevel') then
 
          do iEdge=1,nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
+
+            invArea1 = 1.0 / areaCell(cell1)
+            invArea2 = 1.0 / areaCell(cell2)
             do k=1,min(1,maxLevelEdgeTop(iEdge))
                flux = u(k,iEdge) * dvEdge(iEdge) * h_edge(k,iEdge)
-               tend(k,cell1) = tend(k,cell1) - flux
-               tend(k,cell2) = tend(k,cell2) + flux
+               tend(k,cell1) = tend(k,cell1) - flux * invArea1
+               tend(k,cell2) = tend(k,cell2) + flux * invArea2
             end do
          end do
-         do iCell=1,nCells
-           tend(1,iCell) = tend(1,iCell) / areaCell(iCell)
-         end do
 
       endif ! config_vert_grid_type
 

Modified: branches/ocean_projects/performance/src/core_ocean/mpas_ocn_vel_hmix_del4.F
===================================================================
--- branches/ocean_projects/performance/src/core_ocean/mpas_ocn_vel_hmix_del4.F        2011-10-12 23:39:32 UTC (rev 1070)
+++ branches/ocean_projects/performance/src/core_ocean/mpas_ocn_vel_hmix_del4.F        2011-10-13 17:29:57 UTC (rev 1071)
@@ -123,7 +123,7 @@
       integer, dimension(:,:), pointer :: cellsOnEdge, verticesOnEdge
 
 
-      real (kind=RKIND) :: u_diffusion, r
+      real (kind=RKIND) :: u_diffusion, invArea1, invArea2, realTmp1, realTmp2
       real (kind=RKIND), dimension(:), pointer :: dcEdge, dvEdge, areaTriangle, &amp;
             meshScalingDel4, areaCell
 
@@ -178,38 +178,39 @@
       do iEdge=1,nEdges
          vertex1 = verticesOnEdge(1,iEdge)
          vertex2 = verticesOnEdge(2,iEdge)
+
+         invArea1 = 1.0 / areaTriangle(vertex1)
+         invArea2 = 1.0 / areaTriangle(vertex2)
          do k=1,maxLevelEdgeTop(iEdge)
-            delsq_circulation(k,vertex1) = delsq_circulation(k,vertex1) &amp;
-               - dcEdge(iEdge) * delsq_u(k,iEdge)
-            delsq_circulation(k,vertex2) = delsq_circulation(k,vertex2) &amp;
-               + dcEdge(iEdge) * delsq_u(k,iEdge)
+
+            realTmp1 =  - dcEdge(iEdge) * delsq_u(k,iEdge)
+            realTmp2 = dcEdge(iEdge) * delsq_u(k,iEdge)
+
+            delsq_circulation(k,vertex1) = delsq_circulation(k,vertex1) + realTmp1
+            delsq_circulation(k,vertex2) = delsq_circulation(k,vertex2) + realTmp2
+
+            delsq_vorticity(k, vertex1) = delsq_vorticity(k,vertex1) + realTmp1 * invArea1
+            delsq_vorticity(k, vertex2) = delsq_vorticity(k,vertex2) + realTmp2 * invArea2
          end do
       end do
-      do iVertex=1,nVertices
-         r = 1.0 / areaTriangle(iVertex)
-         do k=1,maxLevelVertexBot(iVertex)
-            delsq_vorticity(k,iVertex) = delsq_circulation(k,iVertex) * r
-         end do
-      end do
 
       ! Divergence using </font>
<font color="red">abla^2 u
       delsq_divergence(:,:) = 0.0
       do iEdge=1,nEdges
          cell1 = cellsOnEdge(1,iEdge)
          cell2 = cellsOnEdge(2,iEdge)
+
+         invArea1 = 1.0 / areaCell(cell1)
+         invArea2 = 1.0 / areaCell(cell2)
+
          do k=1,maxLevelEdgeTop(iEdge)
-           delsq_divergence(k,cell1) = delsq_divergence(k,cell1) &amp;
-             + delsq_u(k,iEdge)*dvEdge(iEdge)
-           delsq_divergence(k,cell2) = delsq_divergence(k,cell2) &amp;
-             - delsq_u(k,iEdge)*dvEdge(iEdge)
+           realTmp1 = delsq_u(k, iEdge) * dvEdge(iEdge)
+           realTmp2 = - delsq_u(k, iEdge) * dvEdge(iEdge)
+
+           delsq_divergence(k,cell1) = delsq_divergence(k,cell1) + realTmp1 * invArea1
+           delsq_divergence(k,cell2) = delsq_divergence(k,cell2) + realTmp2 * invArea2
          end do
       end do
-      do iCell = 1,nCells
-         r = 1.0 / areaCell(iCell)
-         do k = 1,maxLevelCell(iCell)
-            delsq_divergence(k,iCell) = delsq_divergence(k,iCell) * r
-         end do
-      end do
 
       ! Compute - \kappa </font>
<font color="black">abla^4 u 
       ! as  </font>
<font color="black">abla div(</font>
<font color="black">abla^2 u) + k \times </font>
<font color="black">abla ( k \cross curl(</font>
<font color="gray">abla^2 u) )
@@ -220,16 +221,15 @@
          vertex2 = verticesOnEdge(2,iEdge)
 
          do k=1,maxLevelEdgeTop(iEdge)
-            delsq_u(k,iEdge) = &amp; 
-               ( divergence(k,cell2)  - divergence(k,cell1) ) / dcEdge(iEdge)  &amp;
-              -( vorticity(k,vertex2) - vorticity(k,vertex1)) / dvEdge(iEdge)
 
-            u_diffusion = (  delsq_divergence(k,cell2) &amp;
-                           - delsq_divergence(k,cell1) ) / dcEdge(iEdge)  &amp;
-                         -viscVortCoef &amp;
-                         *(  delsq_vorticity(k,vertex2) &amp;
-                           - delsq_vorticity(k,vertex1) ) / dvEdge(iEdge)
+            realTmp1 = (divergence(k, cell2) - divergence(k,cell1)) / dcEdge(iEdge)
+            realTmp2 = (vorticity(k, vertex2) - vorticity(k, vertex1)) / dvEdge(iEdge)
+            delsq_u(k,iEdge) = realTmp1 - realTmp2
 
+            realTmp1 = (delsq_divergence(k, cell2) - delsq_divergence(k,cell1)) / dcEdge(iEdge)
+            realTmp2 = (delsq_vorticity(k, vertex2) - delsq_vorticity(k, vertex1)) / dvEdge(iEdge)
+            u_diffusion = realTmp1 - viscVortCoef * realTmp2
+
             u_diffusion = meshScalingDel4(iEdge) * eddyVisc4 * u_diffusion
 
             tend(k,iEdge) = tend(k,iEdge) - u_diffusion

Modified: branches/ocean_projects/performance/src/driver/module_mpas_subdriver.F
===================================================================
--- branches/ocean_projects/performance/src/driver/module_mpas_subdriver.F        2011-10-12 23:39:32 UTC (rev 1070)
+++ branches/ocean_projects/performance/src/driver/module_mpas_subdriver.F        2011-10-13 17:29:57 UTC (rev 1071)
@@ -8,7 +8,9 @@
    type (io_output_object) :: output_obj
    integer :: output_frame
 
+   type (timer_node), pointer :: total_timer, init_timer, run_timer, final_timer
 
+
    contains
 
 
@@ -19,8 +21,8 @@
       real (kind=RKIND) :: dt
       character(len=32) :: timeStamp
 
-      call timer_start(&quot;total time&quot;)
-      call timer_start(&quot;initialize&quot;)
+      call timer_start(&quot;total time&quot;, .false., total_timer)
+      call timer_start(&quot;initialize&quot;, .false., init_timer)
 
       !
       ! Initialize infrastructure
@@ -46,17 +48,17 @@
          call output_state_init(output_obj, domain, &quot;OUTPUT&quot;)         
       end if
 
-      call timer_stop(&quot;initialize&quot;)
+      call timer_stop(&quot;initialize&quot;, init_timer)
    end subroutine mpas_init
 
 
    subroutine mpas_run()
 
       implicit none
-      call timer_start(&quot;run&quot;)
+      call timer_start(&quot;run&quot;, .false., run_timer)
 
       call mpas_core_run(domain, output_obj, output_frame)
-      call timer_stop(&quot;run&quot;)
+      call timer_stop(&quot;run&quot;, run_timer)
 
    end subroutine mpas_run
 
@@ -65,7 +67,7 @@
    
       implicit none
 
-      call timer_start(&quot;finalize&quot;)
+      call timer_start(&quot;finalize&quot;, .false., final_timer)
 
       !
       ! Finalize output streams
@@ -78,8 +80,8 @@
       !
       call mpas_core_finalize(domain)
 
-      call timer_stop(&quot;finalize&quot;)
-      call timer_stop(&quot;total time&quot;)
+      call timer_stop(&quot;finalize&quot;, final_timer)
+      call timer_stop(&quot;total time&quot;, total_timer)
       call timer_write()
 
 

Modified: branches/ocean_projects/performance/src/framework/module_timer.F
===================================================================
--- branches/ocean_projects/performance/src/framework/module_timer.F        2011-10-12 23:39:32 UTC (rev 1070)
+++ branches/ocean_projects/performance/src/framework/module_timer.F        2011-10-13 17:29:57 UTC (rev 1071)
@@ -64,6 +64,30 @@
             end do timer_search
           endif
 
+          if(present(timer_ptr)) then
+            timer_found = .true.
+            if(.not.associated(timer_ptr)) then
+              current =&gt; all_timers
+              find_end_ptr: do while((.not.timer_added) .and. (associated(current%next)))
+                current =&gt; current%next
+              end do find_end_ptr
+
+              allocate(timer_ptr)
+
+              current%next =&gt; timer_ptr
+              current =&gt; timer_ptr
+              nullify(timer_ptr%next)
+              current%levels = levels
+              current%timer_name = timer_name
+              current%running = .false.
+              current%total_time = 0.0
+              current%max_time = 0.0
+              current%min_time = 100000000.0
+              current%avg_time = 0.0
+              current%calls = 0
+            endif
+          endif
+
           if(.not.timer_found) then
             current =&gt; all_timers
             find_end: do while((.not.timer_added) .and. (associated(current%next)))

</font>
</pre>