<p><b>dwj07@fsu.edu</b> 2011-10-13 11:29:57 -0600 (Thu, 13 Oct 2011)</p><p><br>
        -- BRANCH COMMIT --<br>
<br>
        Finializing module_timer, and updating a few of the timer calls to reflect the new options.<br>
<br>
        Also updating the makefile to better handle the PAPI libraries.<br>
        Added three new options on the trunk's makefile.<br>
                ifort-serial<br>
                ifort-papi<br>
                ifort-papi-serial<br>
<br>
        ifort-serial using ifort without any mpi calls and system clock timers<br>
        ifort-papi uses ifort with mpi calls and the papi timers<br>
        ifort-papi-serial uses ifort without mpi calls with the papi timers<br>
<br>
        standard ifort uses mpi calls with mpi timers<br>
</p><hr noshade><pre><font color="gray">Modified: branches/ocean_projects/performance/Makefile
===================================================================
--- branches/ocean_projects/performance/Makefile        2011-10-12 23:39:32 UTC (rev 1070)
+++ branches/ocean_projects/performance/Makefile        2011-10-13 17:29:57 UTC (rev 1071)
@@ -102,11 +102,25 @@
        "CFLAGS = -O3 -m64" \
        "LDFLAGS = -O3" \
        "CORE = $(CORE)" \
-        "CPPFLAGS = -DRKIND=8 $(MODEL_FORMULATION) -D_PAPI -D_MPI -DUNDERSCORE -m64 $(FILE_OFFSET) $(ZOLTAN_DEFINE)" )
+        "CPPFLAGS = -DRKIND=8 $(MODEL_FORMULATION) -D_PAPI -D_MPI -DUNDERSCORE -m64 $(FILE_OFFSET) $(ZOLTAN_DEFINE)" \
+        "PAPILIBS = -L$(PAPI)/lib -lpapi" )
+ifort-papi-serial:
+        ( make all \
+        "FC = ifort" \
+        "CC = gcc" \
+        "SFC = ifort" \
+        "SCC = gcc" \
+        "FFLAGS = -real-size 64 -O3 -convert big_endian -FR" \
+        "CFLAGS = -O3 -m64" \
+        "LDFLAGS = -O3" \
+        "CORE = $(CORE)" \
+        "CPPFLAGS = -DRKIND=8 $(MODEL_FORMULATION) -D_PAPI -DUNDERSCORE -m64 $(FILE_OFFSET) $(ZOLTAN_DEFINE)" \
+        "PAPILIBS = -L$(PAPI)/lib -lpapi" )
+
ifort:
        ( make all \
-        "FC = mpif90" \
+        "FC = ifort" \
        "CC = gcc" \
        "SFC = ifort" \
        "SCC = gcc" \
@@ -114,7 +128,7 @@
        "CFLAGS = -O3 -m64" \
        "LDFLAGS = -O3" \
        "CORE = $(CORE)" \
-        "CPPFLAGS = -DRKIND=8 $(MODEL_FORMULATION) -D_MPI -DUNDERSCORE -m64 $(FILE_OFFSET) $(ZOLTAN_DEFINE)" )
+        "CPPFLAGS = -DRKIND=8 $(MODEL_FORMULATION) -DUNDERSCORE -m64 $(FILE_OFFSET) $(ZOLTAN_DEFINE)" )
gfortran:
        ( make all \
@@ -167,7 +181,7 @@
CPPINCLUDES = -I../inc -I$(NETCDF)/include -I$(PAPI)/include
FCINCLUDES = -I../inc -I$(NETCDF)/include -I$(PAPI)/include
-LIBS = -L$(NETCDF)/lib -lnetcdf -L$(PAPI)/lib -lpapi
+LIBS = -L$(NETCDF)/lib -lnetcdf $(PAPILIBS)
RM = rm -f
CPP = cpp -C -P -traditional
Modified: branches/ocean_projects/performance/src/core_ocean/mpas_ocn_mpas_core.F
===================================================================
--- branches/ocean_projects/performance/src/core_ocean/mpas_ocn_mpas_core.F        2011-10-12 23:39:32 UTC (rev 1070)
+++ branches/ocean_projects/performance/src/core_ocean/mpas_ocn_mpas_core.F        2011-10-13 17:29:57 UTC (rev 1071)
@@ -34,6 +34,8 @@
integer, parameter :: restartAlarmID = 2
integer, parameter :: statsAlarmID = 3
+ type (timer_node), pointer :: submod_inits, vlev_init, core_init, diag_init
+
contains
subroutine mpas_core_init(domain, startTimeStamp)!{{{
@@ -53,6 +55,7 @@
integer :: err
! Initialize submodules before initializing blocks.
+ call timer_start("submodule init", .false., submod_inits)
call ocn_timestep_init(err)
call ocn_vel_pressure_grad_init(err)
@@ -68,7 +71,9 @@
call ocn_vmix_init(err)
call ocn_equation_of_state_init(err)
+ call timer_stop("submodule init", submod_inits)
+ call timer_start("vertical level init", .false., vlev_init)
if (.not. config_do_restart) call setup_sw_test_case(domain)
call compute_maxLevel(domain)
@@ -90,10 +95,12 @@
' config_new_btr_variables_from==last_subcycle'
call dmpar_abort(dminfo)
endif
+ call timer_stop("vertical level init", vlev_init)
!
! Initialize core
!
+ call timer_start("init core", .false., core_init)
dt = config_dt
call simulation_clock_init(domain, dt, startTimeStamp)
@@ -120,6 +127,7 @@
restart_frame = 1
current_outfile_frames = 0
+ call timer_stop("init core", core_init)
end subroutine mpas_core_init!}}}
@@ -195,9 +203,9 @@
integer :: i, iEdge, iCell, k
- call timer_start("init diagnostics")
+ call timer_start("init diagnostics", .false., diag_init)
call ocn_diagnostic_solve(dt, block % state % time_levs(1) % state, mesh)
- call timer_stop("init diagnostics")
+ call timer_stop("init diagnostics", diag_init)
call compute_mesh_scaling(mesh)
Modified: branches/ocean_projects/performance/src/core_ocean/mpas_ocn_tendency.F
===================================================================
--- branches/ocean_projects/performance/src/core_ocean/mpas_ocn_tendency.F        2011-10-12 23:39:32 UTC (rev 1070)
+++ branches/ocean_projects/performance/src/core_ocean/mpas_ocn_tendency.F        2011-10-13 17:29:57 UTC (rev 1071)
@@ -67,7 +67,9 @@
!
!--------------------------------------------------------------------
+ type (timer_node), pointer :: tracer_tend_hadv_timer
+
!***********************************************************************
contains
@@ -491,9 +493,9 @@
! and then change maxLevelEdgeTop to maxLevelEdgeBot in the following section.
! tracer_edge at the boundary will also need to be defined for flux boundaries.
- call timer_start("tracer tend horiz adv")
+ call timer_start("tracer tend horiz adv", .false., tracer_tend_hadv_timer)
call ocn_tracer_hadv_tend(grid, u, h_edge, tracers, tend_tr, err)
- call timer_stop("tracer tend horiz adv")
+ call timer_stop("tracer tend horiz adv", tracer_tend_hadv_timer)
!
! tracer tendency: vertical advection term -d/dz( h \phi w)
Modified: branches/ocean_projects/performance/src/core_ocean/mpas_ocn_thick_hadv.F
===================================================================
--- branches/ocean_projects/performance/src/core_ocean/mpas_ocn_thick_hadv.F        2011-10-12 23:39:32 UTC (rev 1070)
+++ branches/ocean_projects/performance/src/core_ocean/mpas_ocn_thick_hadv.F        2011-10-13 17:29:57 UTC (rev 1071)
@@ -107,7 +107,7 @@
integer, dimension(:), pointer :: maxLevelEdgeTop
integer, dimension(:,:), pointer :: cellsOnEdge
- real (kind=RKIND) :: flux
+ real (kind=RKIND) :: flux, invArea1, invArea2
real (kind=RKIND), dimension(:), pointer :: dvEdge, areaCell
!-----------------------------------------------------------------
@@ -134,32 +134,29 @@
do iEdge=1,nEdges
cell1 = cellsOnEdge(1,iEdge)
cell2 = cellsOnEdge(2,iEdge)
+
+ invArea1 = 1.0 / areaCell(cell1)
+ invArea2 = 1.0 / areaCell(cell2)
do k=1,nVertLevels
flux = u(k,iEdge) * dvEdge(iEdge) * h_edge(k,iEdge)
- tend(k,cell1) = tend(k,cell1) - flux
- tend(k,cell2) = tend(k,cell2) + flux
+ tend(k,cell1) = tend(k,cell1) - flux*invArea1
+ tend(k,cell2) = tend(k,cell2) + flux*invArea2
end do
end do
- do iCell=1,nCells
- do k=1,nVertLevels
- tend(k,iCell) = tend(k,iCell) / areaCell(iCell)
- end do
- end do
-
elseif (config_vert_grid_type.eq.'zlevel') then
do iEdge=1,nEdges
cell1 = cellsOnEdge(1,iEdge)
cell2 = cellsOnEdge(2,iEdge)
+
+ invArea1 = 1.0 / areaCell(cell1)
+ invArea2 = 1.0 / areaCell(cell2)
do k=1,min(1,maxLevelEdgeTop(iEdge))
flux = u(k,iEdge) * dvEdge(iEdge) * h_edge(k,iEdge)
- tend(k,cell1) = tend(k,cell1) - flux
- tend(k,cell2) = tend(k,cell2) + flux
+ tend(k,cell1) = tend(k,cell1) - flux * invArea1
+ tend(k,cell2) = tend(k,cell2) + flux * invArea2
end do
end do
- do iCell=1,nCells
- tend(1,iCell) = tend(1,iCell) / areaCell(iCell)
- end do
endif ! config_vert_grid_type
Modified: branches/ocean_projects/performance/src/core_ocean/mpas_ocn_vel_hmix_del4.F
===================================================================
--- branches/ocean_projects/performance/src/core_ocean/mpas_ocn_vel_hmix_del4.F        2011-10-12 23:39:32 UTC (rev 1070)
+++ branches/ocean_projects/performance/src/core_ocean/mpas_ocn_vel_hmix_del4.F        2011-10-13 17:29:57 UTC (rev 1071)
@@ -123,7 +123,7 @@
integer, dimension(:,:), pointer :: cellsOnEdge, verticesOnEdge
- real (kind=RKIND) :: u_diffusion, r
+ real (kind=RKIND) :: u_diffusion, invArea1, invArea2, realTmp1, realTmp2
real (kind=RKIND), dimension(:), pointer :: dcEdge, dvEdge, areaTriangle, &
meshScalingDel4, areaCell
@@ -178,38 +178,39 @@
do iEdge=1,nEdges
vertex1 = verticesOnEdge(1,iEdge)
vertex2 = verticesOnEdge(2,iEdge)
+
+ invArea1 = 1.0 / areaTriangle(vertex1)
+ invArea2 = 1.0 / areaTriangle(vertex2)
do k=1,maxLevelEdgeTop(iEdge)
- delsq_circulation(k,vertex1) = delsq_circulation(k,vertex1) &
- - dcEdge(iEdge) * delsq_u(k,iEdge)
- delsq_circulation(k,vertex2) = delsq_circulation(k,vertex2) &
- + dcEdge(iEdge) * delsq_u(k,iEdge)
+
+ realTmp1 = - dcEdge(iEdge) * delsq_u(k,iEdge)
+ realTmp2 = dcEdge(iEdge) * delsq_u(k,iEdge)
+
+ delsq_circulation(k,vertex1) = delsq_circulation(k,vertex1) + realTmp1
+ delsq_circulation(k,vertex2) = delsq_circulation(k,vertex2) + realTmp2
+
+ delsq_vorticity(k, vertex1) = delsq_vorticity(k,vertex1) + realTmp1 * invArea1
+ delsq_vorticity(k, vertex2) = delsq_vorticity(k,vertex2) + realTmp2 * invArea2
end do
end do
- do iVertex=1,nVertices
- r = 1.0 / areaTriangle(iVertex)
- do k=1,maxLevelVertexBot(iVertex)
- delsq_vorticity(k,iVertex) = delsq_circulation(k,iVertex) * r
- end do
- end do
! Divergence using </font>
<font color="red">abla^2 u
delsq_divergence(:,:) = 0.0
do iEdge=1,nEdges
cell1 = cellsOnEdge(1,iEdge)
cell2 = cellsOnEdge(2,iEdge)
+
+ invArea1 = 1.0 / areaCell(cell1)
+ invArea2 = 1.0 / areaCell(cell2)
+
do k=1,maxLevelEdgeTop(iEdge)
- delsq_divergence(k,cell1) = delsq_divergence(k,cell1) &
- + delsq_u(k,iEdge)*dvEdge(iEdge)
- delsq_divergence(k,cell2) = delsq_divergence(k,cell2) &
- - delsq_u(k,iEdge)*dvEdge(iEdge)
+ realTmp1 = delsq_u(k, iEdge) * dvEdge(iEdge)
+ realTmp2 = - delsq_u(k, iEdge) * dvEdge(iEdge)
+
+ delsq_divergence(k,cell1) = delsq_divergence(k,cell1) + realTmp1 * invArea1
+ delsq_divergence(k,cell2) = delsq_divergence(k,cell2) + realTmp2 * invArea2
end do
end do
- do iCell = 1,nCells
- r = 1.0 / areaCell(iCell)
- do k = 1,maxLevelCell(iCell)
- delsq_divergence(k,iCell) = delsq_divergence(k,iCell) * r
- end do
- end do
! Compute - \kappa </font>
<font color="black">abla^4 u
! as </font>
<font color="black">abla div(</font>
<font color="black">abla^2 u) + k \times </font>
<font color="black">abla ( k \cross curl(</font>
<font color="gray">abla^2 u) )
@@ -220,16 +221,15 @@
vertex2 = verticesOnEdge(2,iEdge)
do k=1,maxLevelEdgeTop(iEdge)
- delsq_u(k,iEdge) = &
- ( divergence(k,cell2) - divergence(k,cell1) ) / dcEdge(iEdge) &
- -( vorticity(k,vertex2) - vorticity(k,vertex1)) / dvEdge(iEdge)
- u_diffusion = ( delsq_divergence(k,cell2) &
- - delsq_divergence(k,cell1) ) / dcEdge(iEdge) &
- -viscVortCoef &
- *( delsq_vorticity(k,vertex2) &
- - delsq_vorticity(k,vertex1) ) / dvEdge(iEdge)
+ realTmp1 = (divergence(k, cell2) - divergence(k,cell1)) / dcEdge(iEdge)
+ realTmp2 = (vorticity(k, vertex2) - vorticity(k, vertex1)) / dvEdge(iEdge)
+ delsq_u(k,iEdge) = realTmp1 - realTmp2
+ realTmp1 = (delsq_divergence(k, cell2) - delsq_divergence(k,cell1)) / dcEdge(iEdge)
+ realTmp2 = (delsq_vorticity(k, vertex2) - delsq_vorticity(k, vertex1)) / dvEdge(iEdge)
+ u_diffusion = realTmp1 - viscVortCoef * realTmp2
+
u_diffusion = meshScalingDel4(iEdge) * eddyVisc4 * u_diffusion
tend(k,iEdge) = tend(k,iEdge) - u_diffusion
Modified: branches/ocean_projects/performance/src/driver/module_mpas_subdriver.F
===================================================================
--- branches/ocean_projects/performance/src/driver/module_mpas_subdriver.F        2011-10-12 23:39:32 UTC (rev 1070)
+++ branches/ocean_projects/performance/src/driver/module_mpas_subdriver.F        2011-10-13 17:29:57 UTC (rev 1071)
@@ -8,7 +8,9 @@
type (io_output_object) :: output_obj
integer :: output_frame
+ type (timer_node), pointer :: total_timer, init_timer, run_timer, final_timer
+
contains
@@ -19,8 +21,8 @@
real (kind=RKIND) :: dt
character(len=32) :: timeStamp
- call timer_start("total time")
- call timer_start("initialize")
+ call timer_start("total time", .false., total_timer)
+ call timer_start("initialize", .false., init_timer)
!
! Initialize infrastructure
@@ -46,17 +48,17 @@
call output_state_init(output_obj, domain, "OUTPUT")
end if
- call timer_stop("initialize")
+ call timer_stop("initialize", init_timer)
end subroutine mpas_init
subroutine mpas_run()
implicit none
- call timer_start("run")
+ call timer_start("run", .false., run_timer)
call mpas_core_run(domain, output_obj, output_frame)
- call timer_stop("run")
+ call timer_stop("run", run_timer)
end subroutine mpas_run
@@ -65,7 +67,7 @@
implicit none
- call timer_start("finalize")
+ call timer_start("finalize", .false., final_timer)
!
! Finalize output streams
@@ -78,8 +80,8 @@
!
call mpas_core_finalize(domain)
- call timer_stop("finalize")
- call timer_stop("total time")
+ call timer_stop("finalize", final_timer)
+ call timer_stop("total time", total_timer)
call timer_write()
Modified: branches/ocean_projects/performance/src/framework/module_timer.F
===================================================================
--- branches/ocean_projects/performance/src/framework/module_timer.F        2011-10-12 23:39:32 UTC (rev 1070)
+++ branches/ocean_projects/performance/src/framework/module_timer.F        2011-10-13 17:29:57 UTC (rev 1071)
@@ -64,6 +64,30 @@
end do timer_search
endif
+ if(present(timer_ptr)) then
+ timer_found = .true.
+ if(.not.associated(timer_ptr)) then
+ current => all_timers
+ find_end_ptr: do while((.not.timer_added) .and. (associated(current%next)))
+ current => current%next
+ end do find_end_ptr
+
+ allocate(timer_ptr)
+
+ current%next => timer_ptr
+ current => timer_ptr
+ nullify(timer_ptr%next)
+ current%levels = levels
+ current%timer_name = timer_name
+ current%running = .false.
+ current%total_time = 0.0
+ current%max_time = 0.0
+ current%min_time = 100000000.0
+ current%avg_time = 0.0
+ current%calls = 0
+ endif
+ endif
+
if(.not.timer_found) then
current => all_timers
find_end: do while((.not.timer_added) .and. (associated(current%next)))
</font>
</pre>