<p><b>dwj07@fsu.edu</b> 2012-08-22 15:29:51 -0600 (Wed, 22 Aug 2012)</p><p><br>
        -- BRANCH COMMIT --<br>
        Second cut at parallel element loops.<br>
        Currently has a namelist flag that controls if you use the first cut, or the second cut.<br>
<br>
        Second cut involves rewriting a lot of loops.<br>
</p><hr noshade><pre><font color="gray">Modified: branches/omp_blocks/openmp_test/src/core_sw_elements/Registry
===================================================================
--- branches/omp_blocks/openmp_test/src/core_sw_elements/Registry        2012-08-21 17:39:57 UTC (rev 2112)
+++ branches/omp_blocks/openmp_test/src/core_sw_elements/Registry        2012-08-22 21:29:51 UTC (rev 2113)
@@ -22,6 +22,7 @@
 namelist logical     sw_model  config_bottom_drag           false
 namelist real        sw_model  config_apvm_upwinding        0.5
 namelist integer     sw_model  config_num_halos             2
+namelist logical     sw_model  config_old_loops             false
 namelist character   io        config_input_name            grid.nc
 namelist character   io        config_output_name           output.nc
 namelist character   io        config_restart_name          restart.nc

Modified: branches/omp_blocks/openmp_test/src/core_sw_elements/mpas_sw_time_integration.F
===================================================================
--- branches/omp_blocks/openmp_test/src/core_sw_elements/mpas_sw_time_integration.F        2012-08-21 17:39:57 UTC (rev 2112)
+++ branches/omp_blocks/openmp_test/src/core_sw_elements/mpas_sw_time_integration.F        2012-08-22 21:29:51 UTC (rev 2113)
@@ -9,7 +9,6 @@
 
    contains
 
-
    subroutine sw_timestep(domain, dt, timeStamp)
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
    ! Advance model state forward in time by the specified time step
@@ -96,7 +95,7 @@
         block % state % time_levs(2) % state % h % array(:,:) = block % state % time_levs(1) % state % h % array(:,:)
         !$omp end workshare
 
-        !$omp do private(iCell, k)
+        !$omp do private(iCell, k) 
         do iCell=1,block % mesh % nCells  ! couple tracers to h
           do k=1,block % mesh % nVertLevels
             block % state % time_levs(2) % state % tracers % array(:,k,iCell) = block % state % time_levs(1) % state % tracers % array(:,k,iCell) &amp;
@@ -152,9 +151,15 @@
 
        block =&gt; domain % blocklist
        do while (associated(block))
+          call mpas_timer_start('sw_compute_tend')
           call sw_compute_tend(block % tend, block % provis, block % mesh)
+          call mpas_timer_stop('sw_compute_tend')
+          call mpas_timer_start('sw_compute_scalar_tend')
           call sw_compute_scalar_tend(block % tend, block % provis, block % mesh)
+          call mpas_timer_stop('sw_compute_scalar_tend')
+          call mpas_timer_start('sw_enforce_boundary')
           call sw_enforce_boundary_edge(block % tend, block % mesh)
+          call mpas_timer_stop('sw_enforce_boundary')
           block =&gt; block % next
        end do
 
@@ -182,7 +187,7 @@
                                              + rk_substep_weights(rk_step) * block % tend % h % array(:,:)
              !$omp end workshare
 
-             !$omp do private(iCell, k)
+             !$omp do private(iCell, k) 
              do iCell=1,block % mesh % nCells
                 do k=1,block % mesh % nVertLevels
                    block % provis % tracers % array(:,k,iCell) = ( block % state % time_levs(1) % state % h % array(k,iCell) * &amp;
@@ -197,7 +202,9 @@
                 block % provis % u % array(:,:) = block % state % time_levs(1) % state % u % array(:,:)
                 !$omp end workshare
              end if
+             call mpas_timer_start('sw_compute_solve_diagnostics')
              call sw_compute_solve_diagnostics(dt, block % provis, block % mesh)
+             call mpas_timer_stop('sw_compute_solve_diagnostics')
              block =&gt; block % next
           end do
        end if
@@ -213,7 +220,7 @@
                                   + rk_weights(rk_step) * block % tend % h % array(:,:) 
           !$omp end workshare
 
-          !$omp do private(iCell, k)
+          !$omp do private(iCell, k) 
           do iCell=1,block % mesh % nCells
              do k=1,block % mesh % nVertLevels
                 block % state % time_levs(2) % state % tracers % array(:,k,iCell) =  &amp;
@@ -242,7 +249,7 @@
       !$omp end single
       block =&gt; domain % blocklist
       do while (associated(block))
-         !$omp do private(iCell, k)
+         !$omp do private(iCell, k) 
          do iCell=1,block % mesh % nCells
             do k=1,block % mesh % nVertLevels
                block % state % time_levs(2) % state % tracers % array(:,k,iCell) = &amp;
@@ -300,7 +307,7 @@
       integer :: iEdge, iCell, iVertex, k, cell1, cell2, vertex1, vertex2, eoe, i, j
       real (kind=RKIND) :: flux, vorticity_abs, workpv, q, upstream_bias
 
-      integer :: nCells, nEdges, nVertices, nVertLevels
+      integer :: nCells, nEdges, nVertices, nVertLevels, vertexDegree
       real (kind=RKIND), dimension(:), pointer :: h_s, fVertex, fEdge, dvEdge, dcEdge, areaCell, areaTriangle, &amp;
                                                   meshScalingDel2, meshScalingDel4
       real (kind=RKIND), dimension(:,:), pointer :: vh, weightsOnEdge, kiteAreasOnVertex, h_edge, h, u, v, tend_h, tend_u, &amp;
@@ -353,6 +360,7 @@
       nEdges      = grid % nEdges
       nVertices   = grid % nVertices
       nVertLevels = grid % nVertLevels
+      vertexDegree = grid % vertexDegree
 
       u_src =&gt; grid % u_src % array
 
@@ -366,7 +374,9 @@
       tend_h(:,:) = 0.0
       !$omp end workshare
 
-      !$omp do private(iEdge, cell1, cell2, flux)
+      !DWJ 08/22/12 Old tend_h loop
+      if(config_old_loops) then
+      !$omp do private(iEdge, cell1, cell2, flux) 
       do iEdge=1,nEdges
          cell1 = cellsOnEdge(1,iEdge)
          cell2 = cellsOnEdge(2,iEdge)
@@ -382,8 +392,26 @@
          end do
       end do 
       !$omp end do
+      else
+      !DWJ 08/22/12 New tend_h loop
+      !$omp do private(iCell, i, iEdge, k, flux)
+      do iCell=1,grid % nCellsSolve
+        do i = 1, grid % nEdgesOnCell % array(iCell)
+          iEdge = grid % edgesOnCell % array(i, iCell)
+          do k = 1, nVertLevels
+            if(iCell == cellsOnEdge(1, iEdge)) then
+              flux = - u(k, iEdge) * dvEdge(iEdge) *  h_edge(k,iEdge)
+            else
+              flux =   u(k, iEdge) * dvEdge(iEdge) *  h_edge(k,iEdge)
+            end if
+            tend_h(k, iCell) = tend_h(k, iCell) + flux
+          end do
+        end do
+      end do
+      !$omp end do
+      end if
 
-      !$omp do private(iCell, k)
+      !$omp do private(iCell, k) 
       do iCell=1,grid % nCellsSolve
          do k=1,nVertLevels
             tend_h(k,iCell) = tend_h(k,iCell) / areaCell(iCell)
@@ -399,7 +427,7 @@
       tend_u(:,:) = 0.0
       !$omp end workshare
 
-      !$omp do private(iCell, cell1, cell2, vertex1, vertex2, k, q, j, eoe, workpv)
+      !$omp do private(iCell, cell1, cell2, vertex1, vertex2, k, q, j, eoe, workpv) 
       do iEdge=1,grid % nEdgesSolve
          cell1 = cellsOnEdge(1,iEdge)
          cell2 = cellsOnEdge(2,iEdge)
@@ -434,7 +462,7 @@
       tend_u(:,:) = 0.0
       !$omp end workshare
 
-      !$omp do private(iEdge, vertex1, vertex2, cell1, cell2, k, vorticity_abs, workpv)
+      !$omp do private(iEdge, vertex1, vertex2, cell1, cell2, k, vorticity_abs, workpv) 
       do iEdge=1,grid % nEdgesSolve
          vertex1 = verticesOnEdge(1,iEdge)
          vertex2 = verticesOnEdge(2,iEdge)
@@ -460,7 +488,7 @@
      ! Compute diffusion, computed as </font>
<font color="black">abla divergence - k \times </font>
<font color="gray">abla vorticity
      !                    only valid for visc == constant
      if (config_h_mom_eddy_visc2 &gt; 0.0) then
-        !$omp do private(iEdge, cell1, cell2, vertex1, vertex2, k, u_diffusion)
+        !$omp do private(iEdge, cell1, cell2, vertex1, vertex2, k, u_diffusion) 
         do iEdge=1,grid % nEdgesSolve
            cell1 = cellsOnEdge(1,iEdge)
            cell2 = cellsOnEdge(2,iEdge)
@@ -500,7 +528,7 @@
         !$omp end workshare
 
         ! Compute </font>
<font color="black">abla^2 u = </font>
<font color="black">abla divergence + k \times </font>
<font color="gray">abla vorticity
-        !$omp do private(iEdge, cell1, cell2, vertex1, vertex2, k)
+        !$omp do private(iEdge, cell1, cell2, vertex1, vertex2, k) 
         do iEdge=1,grid % nEdges
            cell1 = cellsOnEdge(1,iEdge)
            cell2 = cellsOnEdge(2,iEdge)
@@ -521,7 +549,9 @@
         delsq_circulation(:,:) = 0.0
         !$omp end workshare
 
-        !$omp do private(iEdge, vertex1, vertex2)
+        if(config_old_loops) then
+        !DWJ 08/22/12 Old delsq_circ loop
+        !$omp do private(iEdge, vertex1, vertex2) 
         do iEdge=1,nEdges
            vertex1 = verticesOnEdge(1,iEdge)
            vertex2 = verticesOnEdge(2,iEdge)
@@ -538,8 +568,7 @@
            end do
         end do
         !$omp end do
-
-        !$omp do private(iVertex, r, k)
+        !$omp do private(iVertex, r, k) 
         do iVertex=1,nVertices
            r = 1.0 / areaTriangle(iVertex)
            do k=1,nVertLevels
@@ -547,13 +576,28 @@
            end do
         end do
         !$omp end do
+        else
+        !DWJ 08/22/12 New delsq_circ loop
+        !$omp do private(iVertex, i, iEdge, k)
+        do iVertex=1,nVertices
+          do i = 1, vertexDegree
+            iEdge = edgesOnVertex(i, iVertex)
+            do k = 1, nVertLevels
+              delsq_circulation(k, iVertex) = delsq_circulation(k, iVertex) - dcEdge(iEdge) * delsq_u(k, iEdge) / areaTriangle(iVertex)
+            end do
+          end do
+        end do
+        !$omp end do
+        end if
 
         ! Divergence using </font>
<font color="gray">abla^2 u
         !$omp workshare
         delsq_divergence(:,:) = 0.0
         !$omp end workshare
 
-        !$omp do private(iEdge, cell1, cell2, k)
+        if(config_old_loops) then
+        !DWJ 08/22/12 Old delsq_div loop
+        !$omp do private(iEdge, cell1, cell2, k) 
         do iEdge=1,nEdges
            cell1 = cellsOnEdge(1,iEdge)
            cell2 = cellsOnEdge(2,iEdge)
@@ -571,7 +615,7 @@
         end do
         !$omp end do
 
-        !$omp do private(iCell, r, k)
+        !$omp do private(iCell, r, k) 
         do iCell = 1,nCells
            r = 1.0 / areaCell(iCell)
            do k = 1,nVertLevels
@@ -579,10 +623,27 @@
            end do
         end do
         !$omp end do
+        else
+        !DWJ 08/22/12 New delsq_div loop
+        !$omp do private(iCell, i, iEdge, k)
+        do iCell = 1, nCells 
+          do i = 1, nEdgesOnCell(iCell)
+            iEdge = edgesOnEdge(i, iCell)
+            do k = 1, nVertLevels
+              if(iCell == cellsOnEdge(1, iEdge)) then
+                delsq_divergence(k, iCell) = delsq_divergence(k, iCell) + delsq_u(k, iEdge)*dvEdge(iEdge) / areaCell(iCell)
+              else
+                delsq_divergence(k, iCell) = delsq_divergence(k, iCell) - delsq_u(k, iEdge)*dvEdge(iEdge) / areaCell(iCell)
+              end if
+            end do
+          end do
+        end do
+        !$omp end do
+        end if
 
         ! Compute - \kappa </font>
<font color="black">abla^4 u 
         ! as  </font>
<font color="black">abla div(</font>
<font color="black">abla^2 u) + k \times </font>
<font color="black">abla ( k \cross curl(</font>
<font color="gray">abla^2 u) )
-        !$omp do private(iEdge, cell1, cell2, vertex1, vertex2, k, u_diffusion)
+        !$omp do private(iEdge, cell1, cell2, vertex1, vertex2, k, u_diffusion) 
         do iEdge=1,grid % nEdgesSolve
            cell1 = cellsOnEdge(1,iEdge)
            cell2 = cellsOnEdge(2,iEdge)
@@ -618,7 +679,7 @@
 
      ! Compute u (velocity) tendency from wind stress (u_src)
      if(config_wind_stress) then
-         !$omp do private(iEdge)
+         !$omp do private(iEdge) 
          do iEdge=1,grid % nEdges
             tend_u(1,iEdge) =  tend_u(1,iEdge) &amp;
                   + u_src(1,iEdge)/rho_ref/h_edge(1,iEdge)
@@ -627,7 +688,7 @@
      endif
 
      if (config_bottom_drag) then
-         !$omp do private(iEdge, ke_edge)
+         !$omp do private(iEdge, ke_edge) 
          do iEdge=1,grid % nEdges
              ! bottom drag is the same as POP:
              ! -c |u| u  where c is unitless and 1.0e-3.
@@ -660,7 +721,7 @@
       type (state_type), intent(in) :: s
       type (mesh_type), intent(in) :: grid
 
-      integer :: iCell, iEdge, k, iTracer, cell1, cell2, i
+      integer :: iCell, iEdge, k, iTracer, cell1, cell2, i, j
       real (kind=RKIND) :: flux, tracer_edge, r
       real (kind=RKIND) :: invAreaCell1, invAreaCell2, tracer_turb_flux
       integer, dimension(:,:), pointer :: boundaryEdge
@@ -697,7 +758,8 @@
 
       if (config_tracer_adv_order == 2) then
 
-      !$omp do private(iEdge, cell1, cell2, k, iTracer, tracer_edge, flux)
+      if(config_old_loops) then
+      !$omp do private(iEdge, cell1, cell2, k, iTracer, tracer_edge, flux) 
       do iEdge=1,grid % nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
@@ -718,10 +780,35 @@
             end if
       end do 
       !$omp end do
+      else
+        !$omp do private(iCell, i, iEdge, cell1, cell2, k, iTracer, tracer_edge, flux)
+        do iCell = 1, grid % nCellsSolve
+          do i = 1, grid % nEdgesOnCell % array(iCell)
+            iEdge = grid % edgesOnCell % array(i, iCell)
+            cell1 = cellsOnEdge(1, iEdge)
+            cell2 = cellsOnEdge(2, iEdge)
+            do k = 1, grid % nVertLevels
+              do iTracer = 1, grid % nTracers
+                tracer_edge= 0.5 * (tracers(iTracer,k,cell1) + tracers(iTracer,k,cell2))
+                if(iCell == cell1) then
+                  flux = - u(k, iEdge) * dvEdge(iEdge) * h_edge(k,iEdge) * tracer_edge
+                else
+                  flux =   u(k, iEdge) * dvEdge(iEdge) * h_edge(k,iEdge) * tracer_edge
+                end if
 
+                tracer_tend(iTracer, k, iCell) = tracer_tend(iTracer,k,iCell) + flux/areaCell(iCell)
+              
+              end do
+            end do
+          end do
+        end do
+        !$omp end do
+      end if
+
       else if (config_tracer_adv_order == 3) then
 
-         !$omp do private(iEdge, cell1, cell2, d2fdx2_cell1, d2fdx2_cell2, iTracer, i, flux)
+          if(config_old_loops) then
+         !$omp do private(iEdge, cell1, cell2, d2fdx2_cell1, d2fdx2_cell2, iTracer, i, flux) 
          do iEdge=1,grid%nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
@@ -783,10 +870,70 @@
             end if
          end do
          !$omp end do
+         else
+           !$omp do private(iCell, j, iEdge, cell1, cell2, k, d2fdx2_cell1, d2fdx2_cell2, iTracer, i, flux)
+           do iCell = 1, grid % nCellsSolve
+             do j = 1, grid % nEdgesOnCell % array(iCell)
+                iEdge = grid % edgesOnCell % array(j, iCell)
+                cell1 = cellsOnEdge(1, iEdge)
+                cell2 = cellsOnEdge(2, iEdge)
+                do k=1,grid % nVertLevels
 
+                  d2fdx2_cell1 = 0.0
+                  d2fdx2_cell2 = 0.0
+
+                  do iTracer=1,grid % nTracers

+                     !-- if not a boundary cell
+                     if(boundaryCell(k,cell1).eq.0.and.boundaryCell(k,cell2).eq.0) then
+
+                        d2fdx2_cell1 = deriv_two(1,1,iEdge) * tracers(iTracer,k,cell1)
+                        d2fdx2_cell2 = deriv_two(1,2,iEdge) * tracers(iTracer,k,cell2)
+
+                        !-- all edges of cell 1
+                        do i=1, grid % nEdgesOnCell % array(cell1)
+                                d2fdx2_cell1 = d2fdx2_cell1 + &amp;
+                                deriv_two(i+1,1,iEdge) * tracers(iTracer,k,grid % CellsOnCell % array (i,cell1))
+                        end do
+
+                        !-- all edges of cell 2
+                        do i=1, grid % nEdgesOnCell % array(cell2)
+                                d2fdx2_cell2 = d2fdx2_cell2 + &amp;
+                                deriv_two(i+1,2,iEdge) * tracers(iTracer,k,grid % CellsOnCell % array (i,cell2))
+                        end do
+
+                     endif
+
+                     !-- if u &gt; 0:
+                     if (u(k,iEdge) &gt; 0) then
+                        flux = dvEdge(iEdge) * u(k,iEdge) * h_edge(k,iEdge) * (          &amp;
+                             0.5*(tracers(iTracer,k,cell1) + tracers(iTracer,k,cell2))      &amp;
+                             -(dcEdge(iEdge) **2) * (d2fdx2_cell1 + d2fdx2_cell2) / 12.          &amp;
+                             -(dcEdge(iEdge) **2) * coef_3rd_order*(d2fdx2_cell1 - d2fdx2_cell2) / 12. )
+                     !-- else u &lt;= 0:
+                     else
+                        flux = dvEdge(iEdge) *  u(k,iEdge) * h_edge(k,iEdge) * (          &amp;
+                             0.5*(tracers(iTracer,k,cell1) + tracers(iTracer,k,cell2))      &amp;
+                             -(dcEdge(iEdge) **2) * (d2fdx2_cell1 + d2fdx2_cell2) / 12.          &amp;
+                             +(dcEdge(iEdge) **2) * coef_3rd_order*(d2fdx2_cell1 - d2fdx2_cell2) / 12. )
+                     end if
+
+                     if(cell1 == iCell) then
+                       tracer_tend(iTracer,k,iCell) = tracer_tend(iTracer,k,iCell) - flux / areaCell(iCell)
+                     else
+                       tracer_tend(iTracer,k,iCell) = tracer_tend(iTracer,k,iCell) + flux / areaCell(iCell)
+                     end if
+                  enddo
+               end do
+             end do
+           end do
+           !$omp end do
+         end if
+
       else  if (config_tracer_adv_order == 4) then
 
-         !$omp do private(iEdge, cell1, cell2, k, d2fdx2_cell1, d2fdx2_cell2, iTracer, i, flux)
+         if(config_old_loops) then
+         !$omp do private(iEdge, cell1, cell2, k, d2fdx2_cell1, d2fdx2_cell2, iTracer, i, flux) 
          do iEdge=1,grid%nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
@@ -838,7 +985,57 @@
             end if
          end do
          !$omp end do
+         else
+           !$omp do private(iCell, j, iEdge, cell1, cell2, k, d2fdx2_cell1, d2fdx2_cell2, iTracer, i)
+           do iCell = 1, grid % nCellsSolve
+             do j = 1, grid % nEdgesOnCell % array(iCell)
+               iEdge = grid % edgesOnCell % array(j, iCell)
+               cell1 = cellsOnEdge(1, iEdge)
+               cell2 = cellsOnEdge(2, iEdge)
+               do k=1,grid % nVertLevels
 
+                  d2fdx2_cell1 = 0.0
+                  d2fdx2_cell2 = 0.0
+
+                  do iTracer=1,grid % nTracers
+
+                     !-- if not a boundary cell
+                     if(boundaryCell(k,cell1).eq.0.and.boundaryCell(k,cell2).eq.0) then
+
+                        d2fdx2_cell1 = deriv_two(1,1,iEdge) * tracers(iTracer,k,cell1)
+                        d2fdx2_cell2 = deriv_two(1,2,iEdge) * tracers(iTracer,k,cell2)
+
+                        !-- all edges of cell 1
+                        do i=1, grid % nEdgesOnCell % array (cell1)
+                                d2fdx2_cell1 = d2fdx2_cell1 + &amp;
+                                deriv_two(i+1,1,iEdge) * tracers(iTracer,k,grid % CellsOnCell % array (i,cell1))
+                        end do
+
+                        !-- all edges of cell 2
+                        do i=1, grid % nEdgesOnCell % array (cell2)
+                                d2fdx2_cell2 = d2fdx2_cell2 + &amp;
+                                deriv_two(i+1,2,iEdge) * tracers(iTracer,k,grid % CellsOnCell % array (i,cell2))
+                        end do
+
+                     endif
+
+                     flux = dvEdge(iEdge) *  u(k,iEdge) * h_edge(k,iEdge) * (          &amp;
+                          0.5*(tracers(iTracer,k,cell1) + tracers(iTracer,k,cell2))      &amp;
+                             -(dcEdge(iEdge) **2) * (d2fdx2_cell1 + d2fdx2_cell2) / 12. )
+
+                     if(cell1 == iCell) then
+                       tracer_tend(iTracer, k, iCell) = tracer_tend(iTracer, k, iCell) - flux/areaCell(iCell)
+                     else
+                       tracer_tend(iTracer, k, iCell) = tracer_tend(iTracer, k, iCell) + flux/areaCell(iCell)
+                     end if
+                  enddo
+               end do
+
+             end do
+           end do
+           !$omp end do
+         end if
+
       endif   ! if (config_tracer_adv_order == 2 )
 
       !
@@ -858,7 +1055,8 @@
          where(boundaryEdge.eq.1) boundaryMask=0.0
          !$omp end workshare
 
-         !$omp do private(iEdge, cell1, cell2, invAreaCell1, invAreaCell2, k, iTracer, tracer_turb_flux, flux)
+         if(config_old_loops) then
+         !$omp do private(iEdge, cell1, cell2, invAreaCell1, invAreaCell2, k, iTracer, tracer_turb_flux, flux) 
          do iEdge=1,grid % nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
@@ -885,7 +1083,33 @@
 
          end do
          !$omp end do
+         else
+           !$omp do private(iCell, invAreaCell1, i, iEdge, cell1, cell2, k, iTracer, tracer_turb_flux, flux)
+           do iCell = 1, grid % nCellsSolve
+             invAreaCell1 = 1.0 / areaCell(iCell)
+             do i = 1, grid % nEdgesOnCell % array(iCell)
+               iEdge = grid % edgesOnCell % array(i, iCell)
+               cell1 = cellsOnEdge(1, iEdge)
+               cell2 = cellsOnEdge(2, iEdge)
 
+               do k = 1, grid %nVertLevels
+                 do iTracer = 1, grid % nTracers
+                   tracer_turb_flux = config_h_tracer_eddy_diff2 * (tracers(iTracer,k, cell2) - tracers(iTracer, k, cell1)) /dcEdge(iEdge)
+
+                   flux = dvEdge(iEdge) * h_edge(k, iEdge) * tracer_turb_flux * boundaryMask(k, iEdge)
+
+                   if(iCell == cell1) then
+                     tracer_tend(iTracer, k, iCell) = tracer_tend(iTracer, k, iCell) + flux * invAreaCell1
+                   else
+                     tracer_tend(iTracer, k, iCell) = tracer_tend(iTracer, k, iCell) - flux * invAreaCell1
+                   end if
+                 end do
+               end do
+             end do
+           end do
+           !$omp end do
+         end if
+
          !$omp single
          deallocate(boundaryMask)
          !$omp end single
@@ -914,8 +1138,9 @@
          delsq_tracer(:,:,:) = 0.
          !$omp end workshare
 
+         if(config_old_loops) then
          ! first del2: div(h </font>
<font color="gray">abla \phi) at cell center
-         !$omp do private(iEdge, cell1, cell2, k, iTracer)
+         !$omp do private(iEdge, cell1, cell2, k, iTracer) 
          do iEdge=1,grid % nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
@@ -932,7 +1157,31 @@
          end do
          !$omp end do
 
-         !$omp do private(iCell, r, k, iTracer)
+         else
+           !$omp do private(iCell, i, iEdge, cell1, cell2, k, iTracer)
+           do iCell = 1, grid % nCellsSolve
+             do i = 1, grid % nEdgesOnCell % array(iCell)
+               iEdge = grid % edgesOnCell % array(i, iCell)
+               cell1 = cellsOnEdge(1, iEdge)
+               cell2 = cellsOnEdge(2, iEdge)
+
+               do k = 1, grid % nVertLevels
+                 do iTracer = 1, grid % nTracers
+                   if(iCell == cell1) then
+                     delsq_tracer(iTracer,k,iCell) = delsq_tracer(iTracer, k, iCell) + dvEdge(iEdge) * h_edge(k,iEdge) &amp;
+                         * (tracers(iTracer, k, cell2) - tracers(iTracer, k, cell1))/dcEdge(iEdge) * boundaryMask(k,iEdge)
+                   else
+                     delsq_tracer(iTracer,k,iCell) = delsq_tracer(iTracer, k, iCell) - dvEdge(iEdge) * h_edge(k,iEdge) &amp;
+                         * (tracers(iTracer, k, cell2) - tracers(iTracer, k, cell1))/dcEdge(iEdge) * boundaryMask(k,iEdge)
+                   end if
+                 end do
+               end do
+             end do
+           end do
+           !$omp end do
+         end if
+
+         !$omp do private(iCell, r, k, iTracer) 
          do iCell = 1, grid % nCells
             r = 1.0 / grid % areaCell % array(iCell)
             do k=1,grid % nVertLevels
@@ -943,8 +1192,9 @@
          end do
          !$omp end do
 
+         if(config_old_loops) then
          ! second del2: div(h </font>
<font color="gray">abla [delsq_tracer]) at cell center
-         !$omp do private(iEdge, cell1, cell2, invAreaCell1, invAreaCell2, k, iTracer, tracer_turb_flux, flux)
+         !$omp do private(iEdge, cell1, cell2, invAreaCell1, invAreaCell2, k, iTracer, tracer_turb_flux, flux) 
          do iEdge=1,grid % nEdges
             cell1 = grid % cellsOnEdge % array(1,iEdge)
             cell2 = grid % cellsOnEdge % array(2,iEdge)
@@ -967,7 +1217,31 @@
 
          end do
          !$omp end do
+         else
+           !$omp do private(iCell, invAreaCell1, i, iEdge, cell1, cell2, k, iTracer, tracer_turb_flux, flux)
+           do iCell = 1, grid % nCellsSolve
+             invAreaCell1 = 1.0 / grid % areaCell % array(iCell)
+             do i = 1, grid % nEdgesOnCell % array(iCell)
+               iEdge = grid % edgesOnCell % array(i, iCell)
+               cell1 = grid % cellsOnEdge % array(1, iEdge)
+               cell2 = grid % cellsOnEdge % array(2, iEdge)
+               do k = 1, grid % nVertLevels
+                 do iTracer = 1, grid % nTracers
+                   tracer_turb_flux = config_h_tracer_eddy_diff4 * (delsq_tracer(iTracer,k,cell2) - delsq_tracer(iTracer,k,cell1)) /dcEdge(iEdge)
+                   flux = dvEdge(iEdge) * tracer_turb_flux
 
+                   if(iCell == cell1) then
+                     tracer_tend(iTracer,k,iCell) = tracer_tend(iTracer,k,iCell) - flux * invAreaCell1 * boundaryMask(k,iEdge)
+                   else
+                     tracer_tend(iTracer,k,iCell) = tracer_tend(iTracer,k,iCell) + flux * invAreaCell1 * boundaryMask(k,iEdge)
+                   end if
+                 end do
+               end do
+             end do
+           end do
+           !$omp end do
+         end if
+
          !$omp sections
          !$omp section
          deallocate(delsq_tracer)
@@ -1064,7 +1338,8 @@
       boundaryCell(:,:) = 0
       !$omp end workshare
 
-      !$omp do private(iEdge, k, cell1, cell2)
+      if(config_old_loops) then
+      !$omp do private(iEdge, k, cell1, cell2) 
       do iEdge=1,nEdges
        do k=1,nVertLevels
          if(boundaryEdge(k,iEdge).eq.1) then
@@ -1081,6 +1356,18 @@
        enddo
       enddo
       !$omp end do
+      else
+        !$omp do private(iCell, i, iEdge, k)
+        do iCell = 1, nCells
+          do i = 1, grid % nEdgesOnCell % array(iCell)
+            iEdge = grid % edgesOnCell % array(i, iCell)
+            do k = 1, nVertLevels
+              boundaryCell(k, iCell) = max(boundaryCell(k,iCell), boundaryEdge(k, iEdge))
+            end do
+          end do
+        end do
+        !$omp end do
+      end if
 
       !
       ! Compute height on cell edges at velocity locations
@@ -1093,7 +1380,7 @@
 
       if (config_thickness_adv_order == 2) then
 
-         !$omp do private(iEdge, cell1, cell2, k)
+         !$omp do private(iEdge, cell1, cell2, k) 
          do iEdge=1,grid % nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
@@ -1107,7 +1394,7 @@
 
       else if (config_thickness_adv_order == 3) then
 
-         !$omp do private(iEdge, cell1, cell2, k, d2fdx2_cell1, d2fdx2_cell2, i)
+         !$omp do private(iEdge, cell1, cell2, k, d2fdx2_cell1, d2fdx2_cell2, i) 
          do iEdge=1,grid%nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
@@ -1161,7 +1448,7 @@
 
       else  if (config_thickness_adv_order == 4) then
 
-         !$omp do private(iEdge, cell1, cell2, k, d2fdx2_cell1, d2fdx2_cell2, i)
+         !$omp do private(iEdge, cell1, cell2, k, d2fdx2_cell1, d2fdx2_cell2, i) 
          do iEdge=1,grid%nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
@@ -1218,7 +1505,8 @@
       circulation(:,:) = 0.0
       !$omp end workshare
 
-      !$omp do private(iEdge, k)
+      if(config_old_loops) then
+      !$omp do private(iEdge, k) 
       do iEdge=1,nEdges
          do k=1,nVertLevels
             !$omp critical (crit_vert1_circ)
@@ -1231,8 +1519,27 @@
          end do
       end do
       !$omp end do
+      else
+        !$omp do private(iVertex, i, iEdge, vertex1, vertex2, k)
+        do iVertex = 1, grid % nVertices
+          do i = 1, grid % vertexDegree
+            iEdge = grid % edgesOnVertex % array(i, iVertex)
+            vertex1 = grid % verticesOnEdge % array(1, iEdge)
+            vertex2 = grid % verticesOnEdge % array(2, iEdge)
 
-      !$omp do private(iVertex, k)
+            do k = 1, grid % nVertLevels
+              if(vertex1 == iVertex) then
+                circulation(k, iVertex) = circulation(k, iVertex) - dcEdge(iEdge) * u(k, iEdge)
+              else
+                circulation(k, iVertex) = circulation(k, iVertex) + dcEdge(iEdge) * u(k, iEdge)
+              end if
+            end do
+          end do
+        end do
+        !$omp end do
+      end if
+
+      !$omp do private(iVertex, k) 
       do iVertex=1,nVertices
          do k=1,nVertLevels
             vorticity(k,iVertex) = circulation(k,iVertex) / areaTriangle(iVertex)
@@ -1248,7 +1555,8 @@
       divergence(:,:) = 0.0
       !$omp end workshare
 
-      !$omp do private(iEdge, cell1, cell2, k)
+      if(config_old_loops) then
+      !$omp do private(iEdge, cell1, cell2, k) 
       do iEdge=1,nEdges
          cell1 = cellsOnEdge(1,iEdge)
          cell2 = cellsOnEdge(2,iEdge)
@@ -1268,8 +1576,27 @@
          end if
       end do
       !$omp end do
+      else
+        !$omp do private(iCell, i, iEdge, cell1, cell2)
+        do iCell = 1, nCells
+          do i = 1, grid % nEdgesOnCell % array(iCell)
+            iEdge = grid % edgesOnCell % array(i, iCell)
+            cell1 = cellsOnEdge(1, iEdge)
+            cell2 = cellsOnEdge(2, iEdge)
 
-      !$omp do private(iCell, r, k)
+            do k = 1, nVertLevels
+              if(cell1 == iCell) then
+                divergence(k, iCell) = divergence(k, iCell) + u(k, iEdge) * dvEdge(iEdge)
+              else
+                divergence(k, iCell) = divergence(k, iCell) - u(k, iEdge) * dvEdge(iEdge)
+              end if
+            end do
+          end do
+        end do
+        !$omp end do
+      end if
+
+      !$omp do private(iCell, r, k) 
       do iCell = 1,nCells
         r = 1.0 / areaCell(iCell)
         do k = 1,nVertLevels
@@ -1285,7 +1612,7 @@
       ke(:,:) = 0.0
       !$omp end workshare
 
-      !$omp do private(iCell, i, iEdge, k)
+      !$omp do private(iCell, i, iEdge, k) 
       do iCell=1,nCells
          do i=1,nEdgesOnCell(iCell)
             iEdge = edgesOnCell(i,iCell)
@@ -1306,7 +1633,7 @@
       v(:,:) = 0.0
       !$omp end workshare
 
-      !$omp do private(iEdge, i, eoe, k)
+      !$omp do private(iEdge, i, eoe, k) 
       do iEdge = 1,nEdges
          do i=1,nEdgesOnEdge(iEdge)
             eoe = edgesOnEdge(i,iEdge)
@@ -1325,7 +1652,7 @@
       vh(:,:) = 0.0
       !$omp end workshare
 
-      !$omp do private(iEdge, j, eoe, k)
+      !$omp do private(iEdge, j, eoe, k) 
       do iEdge=1,grid % nEdgesSolve
          do j=1,nEdgesOnEdge(iEdge)
             eoe = edgesOnEdge(j,iEdge)
@@ -1342,7 +1669,7 @@
       ! Compute height at vertices, pv at vertices, and average pv to edge locations
       !  ( this computes pv_vertex at all vertices bounding real cells and distance-1 ghost cells )
       !
-      !$omp do private(iVertex, k, i)
+      !$omp do private(iVertex, k, i) 
       do iVertex = 1,nVertices
          do k=1,nVertLevels
             h_vertex(k,iVertex) = 0.0
@@ -1360,7 +1687,7 @@
       ! Compute gradient of PV in the tangent direction
       !   ( this computes gradPVt at all edges bounding real cells and distance-1 ghost cells )
       !
-      !$omp do private(iEdge, k)
+      !$omp do private(iEdge, k) 
       do iEdge = 1,nEdges
          do k = 1,nVertLevels
            gradPVt(k,iEdge) = (pv_vertex(k,verticesOnEdge(2,iEdge)) - pv_vertex(k,verticesOnEdge(1,iEdge))) / &amp;
@@ -1377,7 +1704,7 @@
       pv_edge(:,:) = 0.0
       !$omp end workshare
 
-      !$omp do private(iVertex, i, iEdge, k)
+      !$omp do private(iVertex, i, iEdge, k) 
       do iVertex = 1,nVertices
         do i=1,grid % vertexDegree
            iEdge = edgesOnVertex(i,iVertex)
@@ -1392,7 +1719,7 @@
       !
       ! Modify PV edge with upstream bias. 
       !
-      !$omp do private(iEdge, k)
+      !$omp do private(iEdge, k) 
       do iEdge = 1,nEdges
          do k = 1,nVertLevels
            pv_edge(k,iEdge) = pv_edge(k,iEdge) - config_apvm_upwinding * v(k,iEdge) * dt * gradPVt(k,iEdge)
@@ -1410,7 +1737,7 @@
       vorticity_cell(:,:) = 0.0
       !$omp end workshare
 
-      !$omp do private(iVertex, i, iCell, k)
+      !$omp do private(iVertex, i, iCell, k) 
       do iVertex = 1, nVertices
        do i=1,grid % vertexDegree
          iCell = cellsOnVertex(i,iVertex)
@@ -1432,7 +1759,7 @@
       gradPVn(:,:) = 0.0
       !$omp end workshare
 
-      !$omp do private(iEdge, k)
+      !$omp do private(iEdge, k) 
       do iEdge = 1,nEdges
         if( cellsOnEdge(1,iEdge) &lt;= nCells .and. cellsOnEdge(2,iEdge) &lt;= nCells) then
           do k = 1,nVertLevels
@@ -1445,7 +1772,7 @@
 
       ! Modify PV edge with upstream bias.
       !
-      !$omp do private(iEdge, k)
+      !$omp do private(iEdge, k) 
       do iEdge = 1,nEdges
          do k = 1,nVertLevels
            pv_edge(k,iEdge) = pv_edge(k,iEdge) - config_apvm_upwinding * u(k,iEdge) * dt * gradPVn(k,iEdge)
@@ -1511,7 +1838,7 @@
  
       if(maxval(boundaryEdge).le.0) return
 
-      !$omp do private(iEdge, k)
+      !$omp do private(iEdge, k) 
       do iEdge = 1,nEdges
         do k = 1,nVertLevels
 

</font>
</pre>