<p><b>dwj07@fsu.edu</b> 2012-08-21 11:14:59 -0600 (Tue, 21 Aug 2012)</p><p><br>
        -- BRANCH COMMIT --<br>
<br>
        First cut at OpenMP on element loops.<br>
<br>
        Setting up Makefile to handle OpenMP.<br>
</p><hr noshade><pre><font color="gray">Modified: branches/omp_blocks/openmp_test/Makefile
===================================================================
--- branches/omp_blocks/openmp_test/Makefile        2012-08-21 14:29:49 UTC (rev 2110)
+++ branches/omp_blocks/openmp_test/Makefile        2012-08-21 17:14:59 UTC (rev 2111)
@@ -131,6 +131,8 @@
         &quot;DEBUG = $(DEBUG)&quot; \
         &quot;SERIAL = $(SERIAL)&quot; \
         &quot;USE_PAPI = $(USE_PAPI)&quot; \
+        &quot;OPENMP = $(OPENMP)&quot; \
+        &quot;OMP_FLAG = -fopenmp&quot; \
         &quot;CPPFLAGS = $(MODEL_FORMULATION) -DUNDERSCORE -m64 $(FILE_OFFSET) $(ZOLTAN_DEFINE)&quot; )
 
 g95:
@@ -267,6 +269,15 @@
         PAPI_MESSAGE=&quot;Papi libraries are off.&quot;
 endif # USE_PAPI IF
 
+ifeq &quot;$(OPENMP)&quot; &quot;true&quot;
+        override CFLAGS += $(OMP_FLAG)
+        override FFLAGS += $(OMP_FLAG)
+        override LDFLAGS += $(OMP_FLAG)
+        OMP_MESSAGE=&quot;OpenMP is on.&quot;
+else
+        OMP_MESSAGE=&quot;OpenMP is off.&quot;
+endif
+
 ifneq ($(wildcard $(NETCDF)/lib/libnetcdff.*), ) # CHECK FOR NETCDF4
         LIBS += -lnetcdff
 endif # CHECK FOR NETCDF4
@@ -306,6 +317,7 @@
         @echo $(DEBUG_MESSAGE)
         @echo $(SERIAL_MESSAGE)
         @echo $(PAPI_MESSAGE)
+        @echo $(OMP_MESSAGE)
 clean:
         cd src; $(MAKE) clean RM=&quot;$(RM)&quot; CORE=&quot;$(CORE)&quot;
         $(RM) $(CORE)_model.exe

Modified: branches/omp_blocks/openmp_test/src/core_sw_elements/mpas_sw_mpas_core.F
===================================================================
--- branches/omp_blocks/openmp_test/src/core_sw_elements/mpas_sw_mpas_core.F        2012-08-21 14:29:49 UTC (rev 2110)
+++ branches/omp_blocks/openmp_test/src/core_sw_elements/mpas_sw_mpas_core.F        2012-08-21 17:14:59 UTC (rev 2111)
@@ -156,12 +156,15 @@
       integer :: itimestep
       real (kind=RKIND) :: dt
       type (block_type), pointer :: block_ptr
+      type (block_type) :: block
 
       type (MPAS_Time_Type) :: currTime
       character(len=StrKIND) :: timeStamp
       integer :: ierr
    
       ! Eventually, dt should be domain specific
+      !$omp parallel default(shared)
+      !$omp single
       dt = config_dt
 
       currTime = mpas_get_clock_time(clock, MPAS_NOW, ierr)
@@ -173,8 +176,10 @@
       ! During integration, time level 1 stores the model state at the beginning of the
       !   time step, and time level 2 stores the state advanced dt in time by timestep(...)
       itimestep = 0
+      !$omp end single
       do while (.not. mpas_is_clock_stop_time(clock))
 
+         !$omp single
          itimestep = itimestep + 1
          call mpas_advance_clock(clock)
 
@@ -183,18 +188,28 @@
          write(0,*) 'Doing timestep ', trim(timeStamp)
 
          call mpas_timer_start(&quot;time integration&quot;)
+         !$omp end single
          call mpas_timestep(domain, itimestep, dt, timeStamp)
+         !$omp single
          call mpas_timer_stop(&quot;time integration&quot;)
+         !$omp end single
 
          ! Move time level 2 fields back into time level 1 for next time step
+         !$omp single private(block_ptr)
          block_ptr =&gt; domain % blocklist
          do while(associated(block_ptr))
-            call mpas_shift_time_levels_state(block_ptr % state)
+            block = block_ptr 
+            !$omp task firstprivate(block)
+            call mpas_shift_time_levels_state(block % state)
+            !$omp end task
             block_ptr =&gt; block_ptr % next
          end do
+         !$omp taskwait
+         !$omp end single
 
          !TODO: mpas_get_clock_ringing_alarms is probably faster than multiple mpas_is_alarm_ringing...
 
+         !$omp single
          if (mpas_is_alarm_ringing(clock, outputAlarmID, ierr=ierr)) then
             call mpas_reset_clock_alarm(clock, outputAlarmID, ierr=ierr)
             ! output_frame will always be &gt; 1 here unless it was reset after the maximum number of frames per outfile was reached
@@ -213,8 +228,10 @@
             call mpas_output_state_for_domain(restart_obj, domain, 1)
             call mpas_output_state_finalize(restart_obj, domain % dminfo)
          end if
+         !$omp end single
 
       end do
+      !$omp end parallel
 
    end subroutine mpas_core_run
    

Modified: branches/omp_blocks/openmp_test/src/core_sw_elements/mpas_sw_time_integration.F
===================================================================
--- branches/omp_blocks/openmp_test/src/core_sw_elements/mpas_sw_time_integration.F        2012-08-21 14:29:49 UTC (rev 2110)
+++ branches/omp_blocks/openmp_test/src/core_sw_elements/mpas_sw_time_integration.F        2012-08-21 17:14:59 UTC (rev 2111)
@@ -5,8 +5,8 @@
    use mpas_configure
    use mpas_constants
    use mpas_dmpar
+   use mpas_timer
 
-
    contains
 
 
@@ -27,6 +27,7 @@
       character(len=*), intent(in) :: timeStamp
 
       type (block_type), pointer :: block
+      type (block_type) :: block_d
 
       if (trim(config_time_integration) == 'RK4') then
          call sw_rk4(domain, dt)
@@ -36,11 +37,17 @@
          stop
       end if
 
+      !$omp single private(block)
       block =&gt; domain % blocklist
       do while (associated(block))
-         block % state % time_levs(2) % state % xtime % scalar = timeStamp 
+         block_d = block
+         !$omp task firstprivate(block_d)
+         block_d % state % time_levs(2) % state % xtime % scalar = timeStamp 
+         !$omp end task
          block =&gt; block % next
       end do
+      !$omp taskwait
+      !$omp end single
 
    end subroutine sw_timestep
 
@@ -70,7 +77,10 @@
 
       real (kind=RKIND), dimension(4) :: rk_weights, rk_substep_weights
 
+      !$omp single
+      call mpas_timer_start('computations')
       call mpas_setup_provis_states(domain % blocklist)
+      !$omp end single
    
      !
      ! Initialize time_levs(2) with state at current time
@@ -81,31 +91,44 @@
      block =&gt; domain % blocklist
      do while (associated(block))
 
+        !$omp workshare
         block % state % time_levs(2) % state % u % array(:,:) = block % state % time_levs(1) % state % u % array(:,:)
         block % state % time_levs(2) % state % h % array(:,:) = block % state % time_levs(1) % state % h % array(:,:)
+        !$omp end workshare
+
+        !$omp do private(iCell, k)
         do iCell=1,block % mesh % nCells  ! couple tracers to h
           do k=1,block % mesh % nVertLevels
             block % state % time_levs(2) % state % tracers % array(:,k,iCell) = block % state % time_levs(1) % state % tracers % array(:,k,iCell) &amp;
                                                                       * block % state % time_levs(1) % state % h % array(k,iCell)
            end do
         end do
+        !$omp end do
 
+        !$omp single
         call mpas_copy_state(block % provis, block % state % time_levs(1) % state)
+        !$omp end single
 
         block =&gt; block % next
      end do
 
+     !$omp sections
+     !$omp section
      rk_weights(1) = dt/6.
      rk_weights(2) = dt/3.
      rk_weights(3) = dt/3.
      rk_weights(4) = dt/6.
 
+     !$omp section
      rk_substep_weights(1) = dt/2.
      rk_substep_weights(2) = dt/2.
      rk_substep_weights(3) = dt
      rk_substep_weights(4) = 0.
+     !$omp end sections
+     !$omp single
+      call mpas_timer_stop('computations')
+      !$omp end single
 
-
      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
      ! BEGIN RK loop 
      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
@@ -113,12 +136,17 @@
 
 ! --- update halos for diagnostic variables
 
+        !$omp single
+        call mpas_timer_start('communications')
         call mpas_dmpar_exch_halo_field(domain % blocklist % provis % pv_edge)
 
         if (config_h_mom_eddy_visc4 &gt; 0.0) then
             call mpas_dmpar_exch_halo_field(domain % blocklist % provis % divergence)
             call mpas_dmpar_exch_halo_field(domain % blocklist % provis % vorticity)
        end if
+        call mpas_timer_stop('communications')
+        call mpas_timer_start('computations')
+       !$omp end single
 
 ! --- compute tendencies
 
@@ -132,19 +160,29 @@
 
 ! --- update halos for prognostic variables
 
+       !$omp single
+        call mpas_timer_stop('computations')
+        call mpas_timer_start('communications')
        call mpas_dmpar_exch_halo_field(domain % blocklist % tend % u)
        call mpas_dmpar_exch_halo_field(domain % blocklist % tend % h)
        call mpas_dmpar_exch_halo_field(domain % blocklist % tend % tracers)
+        call mpas_timer_stop('communications')
+        call mpas_timer_start('computations')
+       !$omp end single
 
 ! --- compute next substep state
 
        if (rk_step &lt; 4) then
           block =&gt; domain % blocklist
           do while (associated(block))
+             !$omp workshare
              block % provis % u % array(:,:) = block % state % time_levs(1) % state % u % array(:,:)  &amp;
                                              + rk_substep_weights(rk_step) * block % tend % u % array(:,:)
              block % provis % h % array(:,:) = block % state % time_levs(1) % state % h % array(:,:)  &amp;
                                              + rk_substep_weights(rk_step) * block % tend % h % array(:,:)
+             !$omp end workshare
+
+             !$omp do private(iCell, k)
              do iCell=1,block % mesh % nCells
                 do k=1,block % mesh % nVertLevels
                    block % provis % tracers % array(:,k,iCell) = ( block % state % time_levs(1) % state % h % array(k,iCell) * &amp;
@@ -153,8 +191,11 @@
                                                                  ) / block % provis % h % array(k,iCell)
                 end do
              end do
+             !$omp end do
              if (config_test_case == 1) then    ! For case 1, wind field should be fixed
+                !$omp workshare
                 block % provis % u % array(:,:) = block % state % time_levs(1) % state % u % array(:,:)
+                !$omp end workshare
              end if
              call sw_compute_solve_diagnostics(dt, block % provis, block % mesh)
              block =&gt; block % next
@@ -165,10 +206,14 @@
 
        block =&gt; domain % blocklist
        do while (associated(block))
+          !$omp workshare
           block % state % time_levs(2) % state % u % array(:,:) = block % state % time_levs(2) % state % u % array(:,:) &amp;
                                   + rk_weights(rk_step) * block % tend % u % array(:,:) 
           block % state % time_levs(2) % state % h % array(:,:) = block % state % time_levs(2) % state % h % array(:,:) &amp;
                                   + rk_weights(rk_step) * block % tend % h % array(:,:) 
+          !$omp end workshare
+
+          !$omp do private(iCell, k)
           do iCell=1,block % mesh % nCells
              do k=1,block % mesh % nVertLevels
                 block % state % time_levs(2) % state % tracers % array(:,k,iCell) =  &amp;
@@ -176,9 +221,13 @@
                                               + rk_weights(rk_step) * block % tend % tracers % array(:,k,iCell)
              end do
           end do
+          !$omp end do
           block =&gt; block % next
        end do
 
+       !$omp single
+        call mpas_timer_stop('computations')
+        !$omp end single
       end do
       !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
       ! END RK loop 
@@ -188,8 +237,12 @@
       !
       !  A little clean up at the end: decouple new scalar fields and compute diagnostics for new state
       !
+      !$omp single
+      call mpas_timer_start('computations')
+      !$omp end single
       block =&gt; domain % blocklist
       do while (associated(block))
+         !$omp do private(iCell, k)
          do iCell=1,block % mesh % nCells
             do k=1,block % mesh % nVertLevels
                block % state % time_levs(2) % state % tracers % array(:,k,iCell) = &amp;
@@ -197,13 +250,17 @@
                                                                    / block % state % time_levs(2) % state % h % array(k,iCell)
             end do
          end do
+         !$omp end do
 
          if (config_test_case == 1) then    ! For case 1, wind field should be fixed
+            !$omp workshare
             block % state % time_levs(2) % state % u % array(:,:) = block % state % time_levs(1) % state % u % array(:,:)
+            !$omp end workshare
          end if
 
          call sw_compute_solve_diagnostics(dt, block % state % time_levs(2) % state, block % mesh)
 
+         !$omp single
          call mpas_reconstruct(block % mesh, block % state % time_levs(2) % state % u % array,          &amp;
                           block % state % time_levs(2) % state % uReconstructX % array,            &amp;
                           block % state % time_levs(2) % state % uReconstructY % array,            &amp;
@@ -211,11 +268,15 @@
                           block % state % time_levs(2) % state % uReconstructZonal % array,        &amp;
                           block % state % time_levs(2) % state % uReconstructMeridional % array    &amp;
                          )
+         !$omp end single
 
          block =&gt; block % next
       end do
 
+      !$omp single
       call mpas_deallocate_provis_states(domain % blocklist)
+      call mpas_timer_stop('computations')
+      !$omp end single
 
    end subroutine sw_rk4
 
@@ -256,7 +317,6 @@
       real (kind=RKIND), parameter :: rho_ref = 1000.0
       real (kind=RKIND) :: ke_edge
 
-
       h           =&gt; s % h % array
       u           =&gt; s % u % array
       v           =&gt; s % v % array
@@ -299,31 +359,47 @@
       meshScalingDel2 =&gt; grid % meshScalingDel2 % array
       meshScalingDel4 =&gt; grid % meshScalingDel4 % array
 
-
       !
       ! Compute height tendency for each cell
       !
+      !$omp workshare
       tend_h(:,:) = 0.0
+      !$omp end workshare
+
+      !$omp do private(iEdge, cell1, cell2, flux)
       do iEdge=1,nEdges
          cell1 = cellsOnEdge(1,iEdge)
          cell2 = cellsOnEdge(2,iEdge)
          do k=1,nVertLevels
             flux = u(k,iEdge) * dvEdge(iEdge) * h_edge(k,iEdge)
+            !$omp critical
             tend_h(k,cell1) = tend_h(k,cell1) - flux
+            !$omp end critical
+
+            !$omp critical
             tend_h(k,cell2) = tend_h(k,cell2) + flux
+            !$omp end critical
          end do
       end do 
+      !$omp end do
+
+      !$omp do private(iCell, k)
       do iCell=1,grid % nCellsSolve
          do k=1,nVertLevels
             tend_h(k,iCell) = tend_h(k,iCell) / areaCell(iCell)
          end do
       end do
+      !$omp end do
 
 #ifdef LANL_FORMULATION
       !
       ! Compute u (normal) velocity tendency for each edge (cell face)
       !
+      !$omp workshare
       tend_u(:,:) = 0.0
+      !$omp end workshare
+
+      !$omp do private(iCell, cell1, cell2, vertex1, vertex2, k, q, j, eoe, workpv)
       do iEdge=1,grid % nEdgesSolve
          cell1 = cellsOnEdge(1,iEdge)
          cell2 = cellsOnEdge(2,iEdge)
@@ -345,6 +421,7 @@
                                   ) / dcEdge(iEdge)
          end do
       end do
+      !$omp end do
 
 
 #endif
@@ -353,7 +430,11 @@
       !
       ! Compute u (normal) velocity tendency for each edge (cell face)
       !
+      !$omp workshare
       tend_u(:,:) = 0.0
+      !$omp end workshare
+
+      !$omp do private(iEdge, vertex1, vertex2, cell1, cell2, k, vorticity_abs, workpv)
       do iEdge=1,grid % nEdgesSolve
          vertex1 = verticesOnEdge(1,iEdge)
          vertex2 = verticesOnEdge(2,iEdge)
@@ -373,11 +454,13 @@
                               dcEdge(iEdge)
          end do
       end do
+      !$omp end do
 #endif
 
      ! Compute diffusion, computed as </font>
<font color="black">abla divergence - k \times </font>
<font color="gray">abla vorticity
      !                    only valid for visc == constant
      if (config_h_mom_eddy_visc2 &gt; 0.0) then
+        !$omp do private(iEdge, cell1, cell2, vertex1, vertex2, k, u_diffusion)
         do iEdge=1,grid % nEdgesSolve
            cell1 = cellsOnEdge(1,iEdge)
            cell2 = cellsOnEdge(2,iEdge)
@@ -391,6 +474,7 @@
               tend_u(k,iEdge) = tend_u(k,iEdge) + u_diffusion
            end do
         end do
+        !$omp end do
      end if
 
      !
@@ -400,14 +484,23 @@
      !   strictly only valid for h_mom_eddy_visc4 == constant
      !
      if (config_h_mom_eddy_visc4 &gt; 0.0) then
+        !$omp sections
+        !$omp section
         allocate(delsq_divergence(nVertLevels, nCells+1))
+        !$omp section
         allocate(delsq_u(nVertLevels, nEdges+1))
+        !$omp section
         allocate(delsq_circulation(nVertLevels, nVertices+1))
+        !$omp section
         allocate(delsq_vorticity(nVertLevels, nVertices+1))
+        !$omp end sections
 
+        !$omp workshare
         delsq_u(:,:) = 0.0
+        !$omp end workshare
 
         ! Compute </font>
<font color="black">abla^2 u = </font>
<font color="black">abla divergence + k \times </font>
<font color="gray">abla vorticity
+        !$omp do private(iEdge, cell1, cell2, vertex1, vertex2, k)
         do iEdge=1,grid % nEdges
            cell1 = cellsOnEdge(1,iEdge)
            cell2 = cellsOnEdge(2,iEdge)
@@ -421,47 +514,75 @@
 
            end do
         end do
+        !$omp end do
 
         ! vorticity using </font>
<font color="blue">abla^2 u
+        !$omp workshare
         delsq_circulation(:,:) = 0.0
+        !$omp end workshare
+
+        !$omp do private(iEdge, vertex1, vertex2)
         do iEdge=1,nEdges
            vertex1 = verticesOnEdge(1,iEdge)
            vertex2 = verticesOnEdge(2,iEdge)
            do k=1,nVertLevels
+              !$omp critical
               delsq_circulation(k,vertex1) = delsq_circulation(k,vertex1) &amp;
                    - dcEdge(iEdge) * delsq_u(k,iEdge)
+              !$omp end critical
+
+              !$omp critical
               delsq_circulation(k,vertex2) = delsq_circulation(k,vertex2) &amp;
                    + dcEdge(iEdge) * delsq_u(k,iEdge)
+              !$omp end critical
            end do
         end do
+        !$omp end do
+
+        !$omp do private(iVertex, r, k)
         do iVertex=1,nVertices
            r = 1.0 / areaTriangle(iVertex)
            do k=1,nVertLevels
               delsq_vorticity(k,iVertex) = delsq_circulation(k,iVertex) * r
            end do
         end do
+        !$omp end do
 
         ! Divergence using </font>
<font color="blue">abla^2 u
+        !$omp workshare
         delsq_divergence(:,:) = 0.0
+        !$omp end workshare
+
+        !$omp do private(iEdge, cell1, cell2, k)
         do iEdge=1,nEdges
            cell1 = cellsOnEdge(1,iEdge)
            cell2 = cellsOnEdge(2,iEdge)
            do k=1,nVertLevels
+              !$omp critical
               delsq_divergence(k,cell1) = delsq_divergence(k,cell1) &amp;
                    + delsq_u(k,iEdge)*dvEdge(iEdge)
+              !$omp end critical
+
+              !$omp critical
               delsq_divergence(k,cell2) = delsq_divergence(k,cell2) &amp;
                    - delsq_u(k,iEdge)*dvEdge(iEdge)
+              !$omp end critical
            end do
         end do
+        !$omp end do
+
+        !$omp do private(iCell, r, k)
         do iCell = 1,nCells
            r = 1.0 / areaCell(iCell)
            do k = 1,nVertLevels
               delsq_divergence(k,iCell) = delsq_divergence(k,iCell) * r
            end do
         end do
+        !$omp end do
 
         ! Compute - \kappa </font>
<font color="black">abla^4 u 
         ! as  </font>
<font color="black">abla div(</font>
<font color="black">abla^2 u) + k \times </font>
<font color="black">abla ( k \cross curl(</font>
<font color="gray">abla^2 u) )
+        !$omp do private(iEdge, cell1, cell2, vertex1, vertex2, k, u_diffusion)
         do iEdge=1,grid % nEdgesSolve
            cell1 = cellsOnEdge(1,iEdge)
            cell2 = cellsOnEdge(2,iEdge)
@@ -480,23 +601,33 @@
 
            end do
         end do
+        !$omp end do
 
+        !$omp sections
+        !$omp section
         deallocate(delsq_divergence)
+        !$omp section
         deallocate(delsq_u)
+        !$omp section
         deallocate(delsq_circulation)
+        !$omp section
         deallocate(delsq_vorticity)
+        !$omp end sections
 
      end if
 
      ! Compute u (velocity) tendency from wind stress (u_src)
      if(config_wind_stress) then
+         !$omp do private(iEdge)
          do iEdge=1,grid % nEdges
             tend_u(1,iEdge) =  tend_u(1,iEdge) &amp;
                   + u_src(1,iEdge)/rho_ref/h_edge(1,iEdge)
          end do
+         !$omp end do
      endif
 
      if (config_bottom_drag) then
+         !$omp do private(iEdge, ke_edge)
          do iEdge=1,grid % nEdges
              ! bottom drag is the same as POP:
              ! -c |u| u  where c is unitless and 1.0e-3.
@@ -508,6 +639,7 @@
                   - 1.0e-3*u(1,iEdge) &amp;
                   *sqrt(2.0*ke_edge)/h_edge(1,iEdge)
          end do
+         !$omp end do
      endif
  
    end subroutine sw_compute_tend
@@ -559,11 +691,13 @@
       if (config_tracer_adv_order == 3) coef_3rd_order = 1.0
       if (config_tracer_adv_order == 3 .and. config_monotonic) coef_3rd_order = 0.25
 
-
+      !$omp workshare
       tracer_tend(:,:,:) = 0.0
+      !$omp end workshare
 
       if (config_tracer_adv_order == 2) then
 
+      !$omp do private(iEdge, cell1, cell2, k, iTracer, tracer_edge, flux)
       do iEdge=1,grid % nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
@@ -572,15 +706,22 @@
                   do iTracer=1,grid % nTracers
                      tracer_edge = 0.5 * (tracers(iTracer,k,cell1) + tracers(iTracer,k,cell2))
                      flux = u(k,iEdge) * dvEdge(iEdge) * h_edge(k,iEdge) * tracer_edge
+                     !$omp critical
                      tracer_tend(iTracer,k,cell1) = tracer_tend(iTracer,k,cell1) - flux/areaCell(cell1)
+                     !$omp end critical
+
+                     !$omp critical
                      tracer_tend(iTracer,k,cell2) = tracer_tend(iTracer,k,cell2) + flux/areaCell(cell2)
+                     !$omp end critical
                   end do 
                end do 
             end if
       end do 
+      !$omp end do
 
       else if (config_tracer_adv_order == 3) then
 
+         !$omp do private(iEdge, cell1, cell2, d2fdx2_cell1, d2fdx2_cell2, iTracer, i, flux)
          do iEdge=1,grid%nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
@@ -630,15 +771,22 @@
                      end if
 
                      !-- update tendency
+                     !$omp critical
                      tracer_tend(iTracer,k,cell1) = tracer_tend(iTracer,k,cell1) - flux/areaCell(cell1)
+                     !$omp end critical
+
+                     !$omp critical
                      tracer_tend(iTracer,k,cell2) = tracer_tend(iTracer,k,cell2) + flux/areaCell(cell2)
+                     !$omp end critical
                   enddo
                end do
             end if
          end do
+         !$omp end do
 
       else  if (config_tracer_adv_order == 4) then
 
+         !$omp do private(iEdge, cell1, cell2, k, d2fdx2_cell1, d2fdx2_cell2, iTracer, i, flux)
          do iEdge=1,grid%nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
@@ -678,12 +826,18 @@
                              -(dcEdge(iEdge) **2) * (d2fdx2_cell1 + d2fdx2_cell2) / 12. )
 
                      !-- update tendency
+                     !$omp critical
                      tracer_tend(iTracer,k,cell1) = tracer_tend(iTracer,k,cell1) - flux/areaCell(cell1)
+                     !$omp end critical
+
+                     !$omp critical
                      tracer_tend(iTracer,k,cell2) = tracer_tend(iTracer,k,cell2) + flux/areaCell(cell2)
+                     !$omp end critical
                   enddo
                end do
             end if
          end do
+         !$omp end do
 
       endif   ! if (config_tracer_adv_order == 2 )
 
@@ -695,10 +849,16 @@
          !
          ! compute a boundary mask to enforce insulating boundary conditions in the horizontal
          !
+         !$omp single
          allocate(boundaryMask(grid % nVertLevels, grid % nEdges+1))
+         !$omp end single
+
+         !$omp workshare
          boundaryMask = 1.0
          where(boundaryEdge.eq.1) boundaryMask=0.0
+         !$omp end workshare
 
+         !$omp do private(iEdge, cell1, cell2, invAreaCell1, invAreaCell2, k, iTracer, tracer_turb_flux, flux)
          do iEdge=1,grid % nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
@@ -713,14 +873,22 @@
 
                  ! div(h \kappa_2 </font>
<font color="gray">abla \phi) at cell center
                  flux = dvEdge(iEdge) * h_edge(k,iEdge) * tracer_turb_flux * boundaryMask(k, iEdge)
+                 !$omp critical
                  tracer_tend(iTracer,k,cell1) = tracer_tend(iTracer,k,cell1) + flux * invAreaCell1
+                 !$omp end critical
+
+                 !$omp critical
                  tracer_tend(iTracer,k,cell2) = tracer_tend(iTracer,k,cell2) - flux * invAreaCell2
+                 !$omp end critical
               end do
             end do
 
          end do
+         !$omp end do
 
-        deallocate(boundaryMask)
+         !$omp single
+         deallocate(boundaryMask)
+         !$omp end single
 
       end if
 
@@ -733,15 +901,21 @@
          !
          ! compute a boundary mask to enforce insulating boundary conditions in the horizontal
          !
+         !$omp sections
+         !$omp section
          allocate(boundaryMask(grid % nVertLevels, grid % nEdges+1))
+         !$omp section
+         allocate(delsq_tracer(grid % nTracers, grid % nVertLevels, grid % nCells+1))
+         !$omp end sections
+
+         !$omp workshare
          boundaryMask = 1.0
          where(boundaryEdge.eq.1) boundaryMask=0.0
-
-         allocate(delsq_tracer(grid % nTracers, grid % nVertLevels, grid % nCells+1))
-
          delsq_tracer(:,:,:) = 0.
+         !$omp end workshare
 
          ! first del2: div(h </font>
<font color="gray">abla \phi) at cell center
+         !$omp do private(iEdge, cell1, cell2, k, iTracer)
          do iEdge=1,grid % nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
@@ -756,7 +930,9 @@
             end do
 
          end do
+         !$omp end do
 
+         !$omp do private(iCell, r, k, iTracer)
          do iCell = 1, grid % nCells
             r = 1.0 / grid % areaCell % array(iCell)
             do k=1,grid % nVertLevels
@@ -765,8 +941,10 @@
             end do
             end do
          end do
+         !$omp end do
 
          ! second del2: div(h </font>
<font color="gray">abla [delsq_tracer]) at cell center
+         !$omp do private(iEdge, cell1, cell2, invAreaCell1, invAreaCell2, k, iTracer, tracer_turb_flux, flux)
          do iEdge=1,grid % nEdges
             cell1 = grid % cellsOnEdge % array(1,iEdge)
             cell2 = grid % cellsOnEdge % array(2,iEdge)
@@ -777,15 +955,25 @@
             do iTracer=1,grid % nTracers
                tracer_turb_flux = config_h_tracer_eddy_diff4 * (delsq_tracer(iTracer,k,cell2) - delsq_tracer(iTracer,k,cell1)) / dcEdge(iEdge)
                flux = dvEdge(iEdge) * tracer_turb_flux
+               !$omp critical
                tracer_tend(iTracer,k,cell1) = tracer_tend(iTracer,k,cell1) - flux * invAreaCell1 * boundaryMask(k,iEdge)
+               !$omp end critical
+
+               !$omp critical
                tracer_tend(iTracer,k,cell2) = tracer_tend(iTracer,k,cell2) + flux * invAreaCell2 * boundaryMask(k,iEdge)
+               !$omp end critical
             end do
             enddo
 
          end do
+         !$omp end do
 
+         !$omp sections
+         !$omp section
          deallocate(delsq_tracer)
+         !$omp section
          deallocate(boundaryMask)
+         !$omp end sections
 
       end if
 
@@ -872,17 +1060,27 @@
       !
       ! Find those cells that have an edge on the boundary
       !
+      !$omp workshare
       boundaryCell(:,:) = 0
+      !$omp end workshare
+
+      !$omp do private(iEdge, k, cell1, cell2)
       do iEdge=1,nEdges
        do k=1,nVertLevels
          if(boundaryEdge(k,iEdge).eq.1) then
            cell1 = cellsOnEdge(1,iEdge)
            cell2 = cellsOnEdge(2,iEdge)
+           !$omp critical
            boundaryCell(k,cell1) = 1
+           !$omp end critical
+
+           !$omp critical
            boundaryCell(k,cell2) = 1
+           !$omp end critical
          endif
        enddo
       enddo
+      !$omp end do
 
       !
       ! Compute height on cell edges at velocity locations
@@ -895,6 +1093,7 @@
 
       if (config_thickness_adv_order == 2) then
 
+         !$omp do private(iEdge, cell1, cell2, k)
          do iEdge=1,grid % nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
@@ -904,9 +1103,11 @@
                end do 
             end if
          end do 
+         !$omp end do
 
       else if (config_thickness_adv_order == 3) then
 
+         !$omp do private(iEdge, cell1, cell2, k, d2fdx2_cell1, d2fdx2_cell2, i)
          do iEdge=1,grid%nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
@@ -956,9 +1157,11 @@
                end do   ! do k
             end if      ! if (cell1 &lt;=
          end do         ! do iEdge
+         !$omp end do
 
       else  if (config_thickness_adv_order == 4) then
 
+         !$omp do private(iEdge, cell1, cell2, k, d2fdx2_cell1, d2fdx2_cell2, i)
          do iEdge=1,grid%nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
@@ -998,6 +1201,7 @@
                end do   ! do k
             end if      ! if (cell1 &lt;=
          end do         ! do iEdge
+         !$omp end do
 
       endif   ! if(config_thickness_adv_order == 2)
 
@@ -1005,54 +1209,83 @@
       ! set the velocity in the nEdges+1 slot to zero, this is a dummy address
       !    used to when reading for edges that do not exist
       !
+      !$omp workshare
       u(:,nEdges+1) = 0.0
 
       !
       ! Compute circulation and relative vorticity at each vertex
       !
       circulation(:,:) = 0.0
+      !$omp end workshare
+
+      !$omp do private(iEdge, k)
       do iEdge=1,nEdges
          do k=1,nVertLevels
+            !$omp critical
             circulation(k,verticesOnEdge(1,iEdge)) = circulation(k,verticesOnEdge(1,iEdge)) - dcEdge(iEdge) * u(k,iEdge)
+            !$omp end critical
+
+            !$omp critical
             circulation(k,verticesOnEdge(2,iEdge)) = circulation(k,verticesOnEdge(2,iEdge)) + dcEdge(iEdge) * u(k,iEdge)
+            !$omp end critical
          end do
       end do
+      !$omp end do
+
+      !$omp do private(iVertex, k)
       do iVertex=1,nVertices
          do k=1,nVertLevels
             vorticity(k,iVertex) = circulation(k,iVertex) / areaTriangle(iVertex)
          end do
       end do
+      !$omp end do
 
 
       !
       ! Compute the divergence at each cell center
       !
+      !$omp workshare
       divergence(:,:) = 0.0
+      !$omp end workshare
+
+      !$omp do private(iEdge, cell1, cell2, k)
       do iEdge=1,nEdges
          cell1 = cellsOnEdge(1,iEdge)
          cell2 = cellsOnEdge(2,iEdge)
          if (cell1 &lt;= nCells) then
             do k=1,nVertLevels
+              !$omp critical
               divergence(k,cell1) = divergence(k,cell1) + u(k,iEdge)*dvEdge(iEdge)
+              !$omp end critical
             enddo
          endif
          if(cell2 &lt;= nCells) then
             do k=1,nVertLevels
+              !$omp critical
               divergence(k,cell2) = divergence(k,cell2) - u(k,iEdge)*dvEdge(iEdge)
+              !$omp end critical
             enddo
          end if
       end do
+      !$omp end do
+
+      !$omp do private(iCell, r, k)
       do iCell = 1,nCells
         r = 1.0 / areaCell(iCell)
         do k = 1,nVertLevels
            divergence(k,iCell) = divergence(k,iCell) * r
         enddo
       enddo
+      !$omp end do
 
       !
       ! Compute kinetic energy in each cell
       !
+      !$omp workshare
       ke(:,:) = 0.0
+      !$omp end workshare
+
+      !$omp do private(iCell, i, iEdge, k)
       do iCell=1,nCells
          do i=1,nEdgesOnCell(iCell)
             iEdge = edgesOnCell(i,iCell)
@@ -1064,11 +1297,16 @@
             ke(k,iCell) = ke(k,iCell) / areaCell(iCell)
          end do
       end do
+      !$omp end do
 
       !
       ! Compute v (tangential) velocities
       !
+      !$omp workshare
       v(:,:) = 0.0
+      !$omp end workshare
+
+      !$omp do private(iEdge, i, eoe, k)
       do iEdge = 1,nEdges
          do i=1,nEdgesOnEdge(iEdge)
             eoe = edgesOnEdge(i,iEdge)
@@ -1077,12 +1315,17 @@
             end do
          end do
       end do
+      !$omp end do
 
 #ifdef NCAR_FORMULATION
       !
       ! Compute mass fluxes tangential to each edge (i.e., through the faces of dual grid cells)
       !
+      !$omp workshare
       vh(:,:) = 0.0
+      !$omp end workshare
+
+      !$omp do private(iEdge, j, eoe, k)
       do iEdge=1,grid % nEdgesSolve
          do j=1,nEdgesOnEdge(iEdge)
             eoe = edgesOnEdge(j,iEdge)
@@ -1091,6 +1334,7 @@
             end do
          end do
       end do
+      !$omp end do
 #endif
 
 
@@ -1098,6 +1342,7 @@
       ! Compute height at vertices, pv at vertices, and average pv to edge locations
       !  ( this computes pv_vertex at all vertices bounding real cells and distance-1 ghost cells )
       !
+      !$omp do private(iVertex, k, i)
       do iVertex = 1,nVertices
          do k=1,nVertLevels
             h_vertex(k,iVertex) = 0.0
@@ -1109,24 +1354,30 @@
             pv_vertex(k,iVertex) = (fVertex(iVertex) + vorticity(k,iVertex)) / h_vertex(k,iVertex)
          end do
       end do
+      !$omp end do
 
-
       !
       ! Compute gradient of PV in the tangent direction
       !   ( this computes gradPVt at all edges bounding real cells and distance-1 ghost cells )
       !
+      !$omp do private(iEdge, k)
       do iEdge = 1,nEdges
          do k = 1,nVertLevels
            gradPVt(k,iEdge) = (pv_vertex(k,verticesOnEdge(2,iEdge)) - pv_vertex(k,verticesOnEdge(1,iEdge))) / &amp;
                               dvEdge(iEdge)
          enddo
       enddo
+      !$omp end do
 
       !
       ! Compute pv at the edges
       !   ( this computes pv_edge at all edges bounding real cells )
       !
+      !$omp workshare
       pv_edge(:,:) = 0.0
+      !$omp end workshare
+
+      !$omp do private(iVertex, i, iEdge, k)
       do iVertex = 1,nVertices
         do i=1,grid % vertexDegree
            iEdge = edgesOnVertex(i,iVertex)
@@ -1135,24 +1386,31 @@
            end do
         end do
       end do
+      !$omp end do
 
 
       !
       ! Modify PV edge with upstream bias. 
       !
+      !$omp do private(iEdge, k)
       do iEdge = 1,nEdges
          do k = 1,nVertLevels
            pv_edge(k,iEdge) = pv_edge(k,iEdge) - config_apvm_upwinding * v(k,iEdge) * dt * gradPVt(k,iEdge)
          enddo
       enddo
+      !$omp end do
 
 
       !
       ! Compute pv at cell centers
       !    ( this computes pv_cell for all real cells and distance-1 ghost cells )
       !
+      !$omp workshare
       pv_cell(:,:) = 0.0
       vorticity_cell(:,:) = 0.0
+      !$omp end workshare
+
+      !$omp do private(iVertex, i, iCell, k)
       do iVertex = 1, nVertices
        do i=1,grid % vertexDegree
          iCell = cellsOnVertex(i,iVertex)
@@ -1164,13 +1422,17 @@
          endif
        enddo
       enddo
+      !$omp end do
 
-
       !
       ! Compute gradient of PV in normal direction
       !   ( this computes gradPVn for all edges bounding real cells )
       !
+      !$omp workshare
       gradPVn(:,:) = 0.0
+      !$omp end workshare
+
+      !$omp do private(iEdge, k)
       do iEdge = 1,nEdges
         if( cellsOnEdge(1,iEdge) &lt;= nCells .and. cellsOnEdge(2,iEdge) &lt;= nCells) then
           do k = 1,nVertLevels
@@ -1179,14 +1441,17 @@
           enddo
         endif
       enddo
+      !$omp end do
 
       ! Modify PV edge with upstream bias.
       !
+      !$omp do private(iEdge, k)
       do iEdge = 1,nEdges
          do k = 1,nVertLevels
            pv_edge(k,iEdge) = pv_edge(k,iEdge) - config_apvm_upwinding * u(k,iEdge) * dt * gradPVn(k,iEdge)
          enddo
       enddo
+      !$omp end do
 
       !
       ! set pv_edge = fEdge / h_edge at boundary points
@@ -1243,9 +1508,10 @@
 
       boundaryEdge         =&gt; grid % boundaryEdge % array
       tend_u               =&gt; tend % u % array
-

       if(maxval(boundaryEdge).le.0) return
 
+      !$omp do private(iEdge, k)
       do iEdge = 1,nEdges
         do k = 1,nVertLevels
 
@@ -1254,7 +1520,8 @@
           endif
 
         enddo
-       enddo
+      enddo
+      !$omp end do
 
    end subroutine sw_enforce_boundary_edge
 

</font>
</pre>