<p><b>duda</b> 2013-04-16 15:39:41 -0600 (Tue, 16 Apr 2013)</p><p>BRANCH COMMIT<br>
<br>
Add consistent indentation and comments from Bill to the main MPAS-A dynamics routines.<br>
<br>
No change to results.<br>
<br>
<br>
M    src/core_nhyd_atmos/mpas_atm_time_integration.F<br>
</p><hr noshade><pre><font color="gray">Modified: branches/atmos_physics/src/core_nhyd_atmos/mpas_atm_time_integration.F
===================================================================
--- branches/atmos_physics/src/core_nhyd_atmos/mpas_atm_time_integration.F        2013-04-16 20:57:31 UTC (rev 2758)
+++ branches/atmos_physics/src/core_nhyd_atmos/mpas_atm_time_integration.F        2013-04-16 21:39:41 UTC (rev 2759)
@@ -68,12 +68,13 @@
    ! Advance model state forward in time by the specified time step using 
    !   time-split RK3 scheme
    !
-   ! Hydrostatic (primitive eqns.) solver
+   ! Nonhydrostatic atmospheric solver
    !
    ! Input: domain - current model state in time level 1 (e.g., time_levs(1)state%h(:,:)) 
-   !                 plus grid meta-data
+   !                 plus grid meta-data and some diagnostics of state.
    ! Output: domain - upon exit, time level 2 (e.g., time_levs(2)%state%h(:,:)) contains 
-   !                  model state advanced forward in time by dt seconds
+   !                  model state advanced forward in time by dt seconds,
+   !                  and some diagnostics in diag 
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
 
       implicit none
@@ -118,14 +119,6 @@
 
       if(debug) write(0,*) ' copy step in rk solver '
 
-! WCS-parallel: it appears we have chosen to update all edges of nCellsSolve (to cut down on communications on acoustic steps).
-! Do our communications patterns and loops (specifically in compute_dyn_tend) reflect this?  Or do they assume we are only updating 
-! the so-called owned edges?
-
-
-
-! WCS-parallel: first three and rtheta_p arise from scalar transport and microphysics update (OK).  Others come from where?
-
 ! theta_m
       call mpas_dmpar_exch_halo_field(domain % blocklist % state % time_levs(1) % state % theta_m)
  
@@ -141,72 +134,71 @@
 
       block =&gt; domain % blocklist
       do while (associated(block))
-         ! We are setting values in the halo here, so no communications are needed.
-         ! Alternatively, we could just set owned cells and edge values and communicate after this block loop.
          call atm_rk_integration_setup( block % state % time_levs(2) % state, block % state % time_levs(1) % state, block % diag )
          block =&gt; block % next
       end do
 
       !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
-      ! BEGIN RK loop 
+      ! BEGIN Runge-Kutta loop 
       !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
 
       do rk_step = 1, 3  ! Runge-Kutta loop
 
          if(debug) write(0,*) ' rk substep ', rk_step
 
-        block =&gt; domain % blocklist
-        do while (associated(block))
-           ! The coefficients are set for owned cells (cqw) and for all edges of owned cells, 
-           ! thus no communications should be needed after this call.  
-           ! We could consider combining this and the next block loop.
-           call atm_compute_moist_coefficients( block % state % time_levs(2) % state, block % diag, block % mesh )
-           block =&gt; block % next
-        end do
+         block =&gt; domain % blocklist
+         do while (associated(block))
+            ! The coefficients are set for owned cells (cqw) and for all edges of owned cells, 
+            call atm_compute_moist_coefficients( block % state % time_levs(2) % state, block % diag, block % mesh )
+            block =&gt; block % next
+         end do
 
 
-        if (debug) write(0,*) ' compute_dyn_tend '
-        block =&gt; domain % blocklist
-        do while (associated(block))
-           call atm_compute_dyn_tend( block % tend, block % state % time_levs(2) % state, block % diag, block % mesh, rk_step, dt )
-           block =&gt; block % next
-        end do
-        if (debug) write(0,*) ' finished compute_dyn_tend '
+         if (debug) write(0,*) ' compute_dyn_tend '
+         block =&gt; domain % blocklist
+         do while (associated(block))
+            call atm_compute_dyn_tend( block % tend, block % state % time_levs(2) % state, block % diag, block % mesh, rk_step, dt )
+            block =&gt; block % next
+         end do
+         if (debug) write(0,*) ' finished compute_dyn_tend '
 
-
 #ifdef DO_PHYSICS
-        if (debug) write(0,*) ' add physics tendencies '
-        block =&gt; domain % blocklist
-        do while (associated(block))
-           call physics_addtend( block % mesh, &amp;
-                        block % state % time_levs(1) % state, &amp;
-                        block % diag, &amp;
-                        block % tend, &amp;
-                        block % tend_physics, &amp;
-                        block % state % time_levs(2) % state % rho_zz % array(:,:), &amp;
-                        block % diag % rho_edge % array(:,:), &amp; 
-                        rk_step )
-           block =&gt; block % next
-        end do
-        if (debug) write(0,*) ' finished add physics tendencies '
+         if (debug) write(0,*) ' add physics tendencies '
+         block =&gt; domain % blocklist
+         do while (associated(block))
+            call physics_addtend( block % mesh, &amp;
+                         block % state % time_levs(1) % state, &amp;
+                         block % diag, &amp;
+                         block % tend, &amp;
+                         block % tend_physics, &amp;
+                         block % state % time_levs(2) % state % rho_zz % array(:,:), &amp;
+                         block % diag % rho_edge % array(:,:), &amp; 
+                         rk_step )
+            block =&gt; block % next
+         end do
+         if (debug) write(0,*) ' finished add physics tendencies '
 #endif
 
-!***********************************
-!  we will need to communicate the momentum tendencies here - we want tendencies for all edges of owned cells
-!  because we are solving for all edges of owned cells
-!***********************************
+         !***********************************
+         !  need tendencies at all edges of owned cells -
+         !  we are solving for all edges of owned cells to minimize communications
+         !  during the acoustic substeps
+         !***********************************
 
 ! tend_u
          call mpas_dmpar_exch_halo_field(domain % blocklist % tend % u, (/ 1 /))
 
          block =&gt; domain % blocklist
             do while (associated(block))
-!               call atm_set_smlstep_pert_variables( block % state % time_levs(1) % state, block % state % time_levs(2) % state,  &amp;
                call atm_set_smlstep_pert_variables( block % tend, block % diag, block % mesh )
                call atm_compute_vert_imp_coefs( block % state % time_levs(2) % state, block % mesh, block % diag, rk_sub_timestep(rk_step) )
             block =&gt; block % next
          end do
 
+         !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+         ! begin acoustic steps loop
+         !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
          do small_step = 1, number_sub_steps(rk_step)
 
             if(debug) write(0,*) ' acoustic step ',small_step
@@ -220,42 +212,24 @@
 
             if(debug) write(0,*) ' acoustic step complete '
   
-            !  will need communications here for rtheta_pp
+! rtheta_pp
+! This is the only communications needed during the acoustic steps because we solve for u on all edges of owned cells
 
-!  WCS-parallel: is this a candidate for a smaller stencil?  we need only communicate cells that share edges with owned cells.
-
-! rtheta_pp
             call mpas_dmpar_exch_halo_field(domain % blocklist % diag % rtheta_pp, (/ 1 /))
  
-         end do  ! end of small stimestep loop
+         end do  ! end of acoustic steps loop
 
-         !  will need communications here for rho_pp
-
-! WCS-parallel: is communication of rw_p and rho_pp because of limiter (pd or mono scheme?),
-!  or is it needed for the large-step variable recovery (to get decoupled variables)?
-!  seems like only rho_pp needed...
-!
-!  or, do we need ru and u in the halo for diagnostics that are computed later in compute_solve_diagnostics?
-!
-!  rho_pp might be candidate for smaller stencil (same stencil as rtheta_pp above).
-
-! MGD seems necessary
-! rw_p
          !CR: SMALLER STENCIL?: call mpas_dmpar_exch_halo_field(block % diag % rw_p, (/ 1 /))
          call mpas_dmpar_exch_halo_field(domain % blocklist % diag % rw_p)
 
-! MGD seems necessary
-! ru_p
          !CR: SMALLER STENCIL?: call mpas_dmpar_exch_halo_field(block % diag % ru_p, (/ 2 /))
          call mpas_dmpar_exch_halo_field(domain % blocklist % diag % ru_p)
 
-! rho_pp
          call mpas_dmpar_exch_halo_field(domain % blocklist % diag % rho_pp)
 
          ! the second layer of halo cells must be exchanged before calling atm_recover_large_step_variables
          call mpas_dmpar_exch_halo_field(domain % blocklist % diag % rtheta_pp, (/ 2 /))
 
-
          block =&gt; domain % blocklist
          do while (associated(block))
             call atm_recover_large_step_variables( block % state % time_levs(2) % state,                 &amp;
@@ -264,12 +238,12 @@
             block =&gt; block % next
          end do
 
-!  ************  advection of moist variables here...
-
 ! u
          !CR: SMALLER STENCIL?: call mpas_dmpar_exch_halo_field(block % state % time_levs(2) % state % u, (/ 3 /))
          call mpas_dmpar_exch_halo_field(domain % blocklist % state % time_levs(2) % state % u)
 
+         ! scalar advection: RK3 scheme of Skamarock and Gassmann (2011). 
+         ! PD or monotonicity constraints applied only on the final Runge-Kutta substep.
 
          if (config_scalar_advection) then
 
@@ -278,7 +252,7 @@
             !
             ! Note: The advance_scalars_mono routine can be used without limiting, and thus, encompasses 
             !       the functionality of the advance_scalars routine; however, it is noticeably slower, 
-            !       so we keep the advance_scalars routine as well
+            !       so we use the advance_scalars routine for the first two RK substeps.
             !
             if (rk_step &lt; 3 .or. (.not. config_monotonic .and. .not. config_positive_definite)) then
                call atm_advance_scalars( block % tend, &amp;
@@ -295,30 +269,12 @@
             block =&gt; block % next
          end do
 
-! For now, we do scalar halo updates later on...
-!         block =&gt; domain % blocklist
-!         do while (associated(block))
-!            call mpas_dmpar_exch_halo_field3d_real(domain % dminfo, block % tend % scalars % array(:,:,:), &amp;
-!                                             block % tend % num_scalars, block % mesh % nVertLevels, block % mesh % nCells, &amp;
-!                                             block % parinfo % cellsToSend, block % parinfo % cellsToRecv)
-!            call mpas_dmpar_exch_halo_field3d_real(domain % dminfo, block % state % time_levs(2) % state % scalars % array(:,:,:), &amp;
-!                                             block % state % time_levs(2) % state % num_scalars, block % mesh % nVertLevels, block % mesh % nCells, &amp;
-!                                             block % parinfo % cellsToSend, block % parinfo % cellsToRecv)
-!            block =&gt; block % next
-!         end do
-
          else
  
             write(0,*) ' no scalar advection '
 
          end if
 
-! WCS-parallel: seems like we already use u and w (and other state variables) as if they were already correctly set in the halo,
-! but they are not (at least to the outer edges - see communications below? or are those communications redundent?).  
-! Perhaps we should communicate u, w, theta_m, rho_zz, etc after recover_large_step_variables),
-! cover it with the scalar communications, and then compute solve_diagnostics.  I do not think we need to communicate the stuff we compute
-! in compute_solve_diagnostics if we compute it out in the halo (and I think we do - the halos should be large enough).
-
          block =&gt; domain % blocklist
          do while (associated(block))
             call atm_compute_solve_diagnostics( dt, block % state % time_levs(2) % state, block % diag, block % mesh )
@@ -327,11 +283,6 @@
 
          if(debug) write(0,*) ' diagnostics complete '
 
-
-      ! need communications here to fill out u, w, theta_m, p, and pp, scalars, etc  
-      ! so that they are available for next RK step or the first rk substep of the next timestep
-
-!MGD seems necessary
 ! w
          call mpas_dmpar_exch_halo_field(domain % blocklist % state % time_levs(2) % state % w)
 
@@ -341,30 +292,29 @@
 ! rho_edge
          call mpas_dmpar_exch_halo_field(domain % blocklist % diag % rho_edge)
 
-!  ****  this will always be needed - perhaps we can cover this with compute_solve_diagnostics
-
 ! scalars
-         if(rk_step &lt; 3) then
+         if (rk_step &lt; 3) then
             call mpas_dmpar_exch_halo_field(domain % blocklist % state % time_levs(2) % state % scalars)
          end if
 
-       end do ! rk_step loop
+      end do ! rk_step loop
 
 !...  compute full velocity vectors at cell centers:
       block =&gt; domain % blocklist
-        do while (associated(block))
-           call mpas_reconstruct(block % mesh, block % state % time_levs(2) % state % u % array, &amp;
-                                 block % diag % uReconstructX % array,                           &amp;
-                                 block % diag % uReconstructY % array,                           &amp;
-                                 block % diag % uReconstructZ % array,                           &amp;
-                                 block % diag % uReconstructZonal % array,                       &amp;
-                                 block % diag % uReconstructMeridional % array                   &amp;
-                                )
-           block =&gt; block % next
-        end do
+      do while (associated(block))
+         call mpas_reconstruct(block % mesh, block % state % time_levs(2) % state % u % array, &amp;
+                               block % diag % uReconstructX % array,                           &amp;
+                               block % diag % uReconstructY % array,                           &amp;
+                               block % diag % uReconstructZ % array,                           &amp;
+                               block % diag % uReconstructZonal % array,                       &amp;
+                               block % diag % uReconstructMeridional % array                   &amp;
+                              )
+         block =&gt; block % next
+      end do
 
 !... call to parameterizations of cloud microphysics. calculation of the tendency of water vapor to horizontal and
 !... vertical advection needed for the Tiedtke parameterization of convection.
+
 #ifdef DO_PHYSICS
       block =&gt; domain % blocklist
       do while(associated(block))
@@ -372,21 +322,21 @@
          !NOTE: The calculation of the tendency due to horizontal and vertical advection for the water vapor mixing ratio
          !requires that the subroutine atm_advance_scalars_mono was called on the third Runge Kutta step, so that a halo
          !update for the scalars at time_levs(1) is applied. A halo update for the scalars at time_levs(2) is done above. 
-         if(config_monotonic) then
+         if (config_monotonic) then
             block % tend_physics % rqvdynten % array(:,:) = &amp;
                  ( block % state % time_levs(2) % state % scalars % array(block % state % time_levs(2) % state % index_qv,:,:)   &amp;
                  - block % state % time_levs(1) % state % scalars % array(block % state % time_levs(1) % state % index_qv,:,:) ) &amp;
                  / config_dt
          else
             block % tend_physics % rqvdynten % array(:,:) = 0._RKIND
-         endif
+         end if
 
          !simply set to zero negative mixing ratios of different water species (for now):
          where ( block % state % time_levs(2) % state % scalars % array(:,:,:) .lt. 0.) &amp;
             block % state % time_levs(2) % state % scalars % array(:,:,:) = 0.
 
          !call microphysics schemes:
-         if(config_microp_scheme .ne. 'off') &amp;
+         if (config_microp_scheme .ne. 'off') &amp;
             call microphysics_driver ( block % state % time_levs(2) % state, block % diag, block % diag_physics, &amp;
                                        block % tend, block % mesh, itimestep )
 
@@ -394,87 +344,84 @@
       end do
 #endif
 
+      102 format(' global min, max scalar',i4,2(1x,e17.10))
+      write(0,*)
+      block =&gt; domain % blocklist
+      do while (associated(block))
+         scalar_min = 0.
+         scalar_max = 0.
+         do iCell = 1, block % mesh % nCellsSolve
+         do k = 1, block % mesh % nVertLevels
+            scalar_min = min(scalar_min, block % state % time_levs(2) % state % w % array(k,iCell))
+            scalar_max = max(scalar_max, block % state % time_levs(2) % state % w % array(k,iCell))
+         enddo
+         enddo
+         call mpas_dmpar_min_real(domain%dminfo, scalar_min, global_scalar_min)
+         call mpas_dmpar_max_real(domain%dminfo, scalar_max, global_scalar_max)
+         write(0,*) 'global min, max w ',global_scalar_min, global_scalar_max
 
-!      if(debug) then
-!        101 format(' local  min, max scalar',i4,2(1x,e17.10))
-        102 format(' global min, max scalar',i4,2(1x,e17.10))
-        write(0,*)
-        block =&gt; domain % blocklist
-          do while (associated(block))
-             scalar_min = 0.
-             scalar_max = 0.
-             do iCell = 1, block % mesh % nCellsSolve
-             do k = 1, block % mesh % nVertLevels
-               scalar_min = min(scalar_min, block % state % time_levs(2) % state % w % array(k,iCell))
-               scalar_max = max(scalar_max, block % state % time_levs(2) % state % w % array(k,iCell))
-             enddo
-             enddo
-             call mpas_dmpar_min_real(domain%dminfo, scalar_min, global_scalar_min)
-             call mpas_dmpar_max_real(domain%dminfo, scalar_max, global_scalar_max)
-!             write(0,*) 'local  min, max w ',scalar_min, scalar_max
-             write(0,*) 'global min, max w ',global_scalar_min, global_scalar_max
+         scalar_min = 0.
+         scalar_max = 0.
+         do iEdge = 1, block % mesh % nEdgesSolve
+         do k = 1, block % mesh % nVertLevels
+            scalar_min = min(scalar_min, block % state % time_levs(2) % state % u % array(k,iEdge))
+            scalar_max = max(scalar_max, block % state % time_levs(2) % state % u % array(k,iEdge))
+         enddo
+         enddo
+         call mpas_dmpar_min_real(domain%dminfo, scalar_min, global_scalar_min)
+         call mpas_dmpar_max_real(domain%dminfo, scalar_max, global_scalar_max)
+         write(0,*) 'global min, max u ',global_scalar_min, global_scalar_max
 
-             scalar_min = 0.
-             scalar_max = 0.
-             do iEdge = 1, block % mesh % nEdgesSolve
-             do k = 1, block % mesh % nVertLevels
-               scalar_min = min(scalar_min, block % state % time_levs(2) % state % u % array(k,iEdge))
-               scalar_max = max(scalar_max, block % state % time_levs(2) % state % u % array(k,iEdge))
-             enddo
-             enddo
-             call mpas_dmpar_min_real(domain%dminfo, scalar_min, global_scalar_min)
-             call mpas_dmpar_max_real(domain%dminfo, scalar_max, global_scalar_max)
-!             write(0,*) 'local  min, max u ',scalar_min, scalar_max
-             write(0,*) 'global min, max u ',global_scalar_min, global_scalar_max
+         do iScalar = 1, block % state % time_levs(2) % state % num_scalars
+            scalar_min = 0.
+            scalar_max = 0.
+            do iCell = 1, block % mesh % nCellsSolve
+            do k = 1, block % mesh % nVertLevels
+               scalar_min = min(scalar_min, block % state % time_levs(2) % state % scalars % array(iScalar,k,iCell))
+               scalar_max = max(scalar_max, block % state % time_levs(2) % state % scalars % array(iScalar,k,iCell))
+            enddo
+            enddo
+            call mpas_dmpar_min_real(domain%dminfo, scalar_min, global_scalar_min)
+            call mpas_dmpar_max_real(domain%dminfo, scalar_max, global_scalar_max)
+            write(0,102) iScalar,global_scalar_min,global_scalar_max
+         end do
 
-             do iScalar = 1, block % state % time_levs(2) % state % num_scalars
-                scalar_min = 0.
-                scalar_max = 0.
-                do iCell = 1, block % mesh % nCellsSolve
-                do k = 1, block % mesh % nVertLevels
-                  scalar_min = min(scalar_min, block % state % time_levs(2) % state % scalars % array(iScalar,k,iCell))
-                  scalar_max = max(scalar_max, block % state % time_levs(2) % state % scalars % array(iScalar,k,iCell))
-                enddo
-                enddo
-                call mpas_dmpar_min_real(domain%dminfo, scalar_min, global_scalar_min)
-                call mpas_dmpar_max_real(domain%dminfo, scalar_max, global_scalar_max)
-!                write(0,101) iScalar,scalar_min,scalar_max
-                write(0,102) iScalar,global_scalar_min,global_scalar_max
-             end do
-             block =&gt; block % next
+         block =&gt; block % next
+      end do
 
-          end do
-!      end if
-
-
    end subroutine atm_srk3
 
 !---
 
    subroutine atm_rk_integration_setup( s_old, s_new, diag )
 
-     implicit none
-     type (state_type) :: s_new, s_old
-     type (diag_type) :: diag
+      implicit none
 
-     diag % ru_save % array = diag % ru % array
-     diag % rw_save % array = diag % rw % array
-     diag % rtheta_p_save % array = diag % rtheta_p % array
-     diag % rho_p_save % array = diag % rho_p % array
+      type (state_type) :: s_new, s_old
+      type (diag_type) :: diag
 
-     s_old % u % array = s_new % u % array
-     s_old % w % array = s_new % w % array
-     s_old % theta_m % array = s_new % theta_m % array
-     s_old % rho_zz % array = s_new % rho_zz % array
-     s_old % scalars % array = s_new % scalars % array
+      diag % ru_save % array       = diag % ru % array
+      diag % rw_save % array       = diag % rw % array
+      diag % rtheta_p_save % array = diag % rtheta_p % array
+      diag % rho_p_save % array    = diag % rho_p % array
 
+      s_old % u % array       = s_new % u % array
+      s_old % w % array       = s_new % w % array
+      s_old % theta_m % array = s_new % theta_m % array
+      s_old % rho_zz % array  = s_new % rho_zz % array
+      s_old % scalars % array = s_new % scalars % array
+
    end subroutine atm_rk_integration_setup
 
 !-----
 
    subroutine atm_compute_moist_coefficients( state, diag, grid )
 
+      ! the moist coefficients cqu and cqw serve to transform the inverse dry density (1/rho_d) 
+      ! into the inverse full (moist) density (1/rho_m).
+
       implicit none
+
       type (state_type) :: state
       type (diag_type) :: diag
       type (mesh_type) :: grid
@@ -488,29 +435,29 @@
       nVertLevels = grid % nVertLevels
       nCellsSolve = grid % nCellsSolve
 
-        do iCell = 1, nCellsSolve
-          do k = 2, nVertLevels
+      do iCell = 1, nCellsSolve
+         do k = 2, nVertLevels
             qtot = 0.
             do iq = state % moist_start, state % moist_end
-              qtot = qtot + 0.5 * (state % scalars % array (iq, k, iCell) + state % scalars % array (iq, k-1, iCell))
+               qtot = qtot + 0.5 * (state % scalars % array (iq, k, iCell) + state % scalars % array (iq, k-1, iCell))
             end do
             diag % cqw % array(k,iCell) = 1./(1.+qtot)
-          end do
-        end do
+         end do
+      end do
 
-        do iEdge = 1, nEdges
-          cell1 = grid % cellsOnEdge % array(1,iEdge)
-          cell2 = grid % cellsOnEdge % array(2,iEdge)
-          if (cell1 &lt;= nCellsSolve .or. cell2 &lt;= nCellsSolve) then
+      do iEdge = 1, nEdges
+         cell1 = grid % cellsOnEdge % array(1,iEdge)
+         cell2 = grid % cellsOnEdge % array(2,iEdge)
+         if (cell1 &lt;= nCellsSolve .or. cell2 &lt;= nCellsSolve) then
             do k = 1, nVertLevels
-              qtot = 0.
-              do iq = state % moist_start, state % moist_end
-                 qtot = qtot + 0.5 * ( state % scalars % array (iq, k, cell1) + state % scalars % array (iq, k, cell2) )
-              end do
-              diag % cqu % array(k,iEdge) = 1./( 1. + qtot)
+               qtot = 0.
+               do iq = state % moist_start, state % moist_end
+                  qtot = qtot + 0.5 * ( state % scalars % array (iq, k, cell1) + state % scalars % array (iq, k, cell2) )
+               end do
+               diag % cqu % array(k,iEdge) = 1./( 1. + qtot)
             end do
-          end if
-        end do
+         end if
+      end do
 
    end subroutine atm_compute_moist_coefficients
 
@@ -528,11 +475,12 @@
 
       implicit none
 
-      type (state_type), intent(in) :: s
-      type (mesh_type), intent(in) :: grid
+      type (state_type), intent(in)   :: s
+      type (mesh_type), intent(in)    :: grid
       type (diag_type), intent(inout) :: diag
-      real (kind=RKIND), intent(in) :: dts
+      real (kind=RKIND), intent(in)   :: dts
 
+
       integer :: i, k, iq
 
       integer :: nCells, nVertLevels, nCellsSolve
@@ -548,8 +496,6 @@
       nCells      = grid % nCells
       nCellsSolve = grid % nCellsSolve
       nVertLevels = grid % nVertLevels
-!      epssm = grid % epssm  !  this should come in through the namelist  ******************
-!      epssm = 0.1
       epssm = config_epssm
 
       rdzu =&gt; grid % rdzu % array
@@ -586,51 +532,51 @@
 
       do i = 1, nCellsSolve  !  we only need to do cells we are solving for, not halo cells
 
-        do k=2,nVertLevels
-          cofwr(k,i) =.5*dtseps*gravity*(fzm(k)*zz(k,i)+fzp(k)*zz(k-1,i))
-        end do
-        coftz(1,i) = 0.0
-        do k=2,nVertLevels
-           cofwz(k,i) = dtseps*c2*(fzm(k)*zz(k,i)+fzp(k)*zz(k-1,i))  &amp;
-                *rdzu(k)*cqw(k,i)*(fzm(k)*p (k,i)+fzp(k)*p (k-1,i))
-           coftz(k,i) = dtseps*   (fzm(k)*t (k,i)+fzp(k)*t (k-1,i))
-        end do
-        coftz(nVertLevels+1,i) = 0.0
-        do k=1,nVertLevels
+         do k=2,nVertLevels
+            cofwr(k,i) =.5*dtseps*gravity*(fzm(k)*zz(k,i)+fzp(k)*zz(k-1,i))
+         end do
+         coftz(1,i) = 0.0
+         do k=2,nVertLevels
+            cofwz(k,i) = dtseps*c2*(fzm(k)*zz(k,i)+fzp(k)*zz(k-1,i))  &amp;
+                 *rdzu(k)*cqw(k,i)*(fzm(k)*p (k,i)+fzp(k)*p (k-1,i))
+            coftz(k,i) = dtseps*   (fzm(k)*t (k,i)+fzp(k)*t (k-1,i))
+         end do
+         coftz(nVertLevels+1,i) = 0.0
+         do k=1,nVertLevels
 
-          qtot = 0.
-          do iq = s % moist_start, s % moist_end
-            qtot = qtot + s % scalars % array (iq, k, i)
-          end do
+            qtot = 0.
+            do iq = s % moist_start, s % moist_end
+               qtot = qtot + s % scalars % array (iq, k, i)
+            end do
 
-          cofwt(k,i) = .5*dtseps*rcv*zz(k,i)*gravity*rb(k,i)/(1.+qtot)  &amp;
-                              *p(k,i)/((rtb(k,i)+rt(k,i))*pb(k,i))
-        end do
+            cofwt(k,i) = .5*dtseps*rcv*zz(k,i)*gravity*rb(k,i)/(1.+qtot)  &amp;
+                                *p(k,i)/((rtb(k,i)+rt(k,i))*pb(k,i))
+         end do
 
-        a_tri(1,i) = 0.  ! note, this value is never used
-        b_tri(1) = 1.    ! note, this value is never used
-        c_tri(1) = 0.    ! note, this value is never used
-        gamma_tri(1,i) = 0.
-        alpha_tri(1,i) = 0.  ! note, this value is never used
+         a_tri(1,i) = 0.  ! note, this value is never used
+         b_tri(1) = 1.    ! note, this value is never used
+         c_tri(1) = 0.    ! note, this value is never used
+         gamma_tri(1,i) = 0.
+         alpha_tri(1,i) = 0.  ! note, this value is never used
 
-        do k=2,nVertLevels
-          a_tri(k,i) = -cofwz(k  ,i)* coftz(k-1,i)*rdzw(k-1)*zz(k-1,i)   &amp;
-                       +cofwr(k  ,i)* cofrz(k-1  )                       &amp;
-                       -cofwt(k-1,i)* coftz(k-1,i)*rdzw(k-1)
-          b_tri(k) = 1.                                                  &amp;
-                       +cofwz(k  ,i)*(coftz(k  ,i)*rdzw(k  )*zz(k  ,i)   &amp;
-                                    +coftz(k  ,i)*rdzw(k-1)*zz(k-1,i))   &amp;
-                       -coftz(k  ,i)*(cofwt(k  ,i)*rdzw(k  )             &amp;
-                                     -cofwt(k-1,i)*rdzw(k-1))            &amp;
-                       +cofwr(k,  i)*(cofrz(k    )-cofrz(k-1))
-          c_tri(k) =   -cofwz(k  ,i)* coftz(k+1,i)*rdzw(k  )*zz(k  ,i)   &amp;
-                       -cofwr(k  ,i)* cofrz(k    )                       &amp;
-                       +cofwt(k  ,i)* coftz(k+1,i)*rdzw(k  )
-        end do
-        do k=2,nVertLevels
-          alpha_tri(k,i) = 1./(b_tri(k)-a_tri(k,i)*gamma_tri(k-1,i))
-          gamma_tri(k,i) = c_tri(k)*alpha_tri(k,i)
-        end do
+         do k=2,nVertLevels
+            a_tri(k,i) = -cofwz(k  ,i)* coftz(k-1,i)*rdzw(k-1)*zz(k-1,i)   &amp;
+                         +cofwr(k  ,i)* cofrz(k-1  )                       &amp;
+                         -cofwt(k-1,i)* coftz(k-1,i)*rdzw(k-1)
+            b_tri(k) = 1.                                                  &amp;
+                         +cofwz(k  ,i)*(coftz(k  ,i)*rdzw(k  )*zz(k  ,i)   &amp;
+                                      +coftz(k  ,i)*rdzw(k-1)*zz(k-1,i))   &amp;
+                         -coftz(k  ,i)*(cofwt(k  ,i)*rdzw(k  )             &amp;
+                                       -cofwt(k-1,i)*rdzw(k-1))            &amp;
+                         +cofwr(k,  i)*(cofrz(k    )-cofrz(k-1))
+            c_tri(k) =   -cofwz(k  ,i)* coftz(k+1,i)*rdzw(k  )*zz(k  ,i)   &amp;
+                         -cofwr(k  ,i)* cofrz(k    )                       &amp;
+                         +cofwt(k  ,i)* coftz(k+1,i)*rdzw(k  )
+         end do
+         do k=2,nVertLevels
+            alpha_tri(k,i) = 1./(b_tri(k)-a_tri(k,i)*gamma_tri(k-1,i))
+            gamma_tri(k,i) = c_tri(k)*alpha_tri(k,i)
+         end do
 
       end do ! loop over cells
 
@@ -640,47 +586,60 @@
 
    subroutine atm_set_smlstep_pert_variables( tend, diag, grid )
 
+      ! following Klemp et al MWR 2007, we use preturbation variables
+      ! in the acoustic-step integration.  This routine computes those 
+      ! perturbation variables.  state variables are reconstituted after 
+      ! the acousstic steps in subroutine atm_recover_large_step_variables
+
+
       implicit none
+
       type (tend_type) :: tend
       type (diag_type) :: diag
       type (mesh_type) :: grid
-      !SHP-w
+
       integer :: iCell, iEdge, k, cell1, cell2, coef_3rd_order
       integer, dimension(:,:), pointer :: cellsOnEdge
       real (kind=RKIND), dimension(:), pointer :: fzm, fzp, dvEdge, areaCell
       real (kind=RKIND) :: flux
-      !SHP-w
+
       coef_3rd_order = config_coef_3rd_order
-      if(config_theta_adv_order /=3) coef_3rd_order = 0
+      if (config_theta_adv_order /=3) coef_3rd_order = 0
 
-      !SHP-w
       fzm =&gt; grid % fzm % array
       fzp =&gt; grid % fzp % array
       dvEdge =&gt; grid % dvEdge % array
       areaCell =&gt; grid % areaCell % array
       cellsOnEdge =&gt; grid % cellsOnEdge % array
 
+      ! set the acoustic step perturbation variables by subtracting the RK timestep variables
+      ! from their at the previous RK substep.
+
       diag % rho_pp % array = diag % rho_p_save % array - diag % rho_p % array
-
       diag % ru_p % array = diag % ru_save % array - diag % ru % array
       diag % rtheta_pp % array = diag % rtheta_p_save % array - diag % rtheta_p % array
       diag % rtheta_pp_old % array = diag % rtheta_pp % array
       diag % rw_p % array = diag % rw_save % array - diag % rw % array
 
+      ! we solve for omega instead of w (see Klemp et al MWR 2007),
+      ! so here we change the w_p tendency to an omega_p tendency
+
       do iCell = 1, grid % nCellsSolve
       do k = 2, grid % nVertLevels
-        tend % w % array(k,iCell) = ( fzm(k) * grid % zz % array(k  ,iCell) +   &amp;
-                                      fzp(k) * grid % zz % array(k-1,iCell)   ) &amp;
-                                     * tend % w % array(k,iCell)
+         tend % w % array(k,iCell) = ( fzm(k) * grid % zz % array(k  ,iCell) +   &amp;
+                                       fzp(k) * grid % zz % array(k-1,iCell)   ) &amp;
+                                      * tend % w % array(k,iCell)
       end do
       end do
 
+      ! here we need to compute the omega tendency in a manner consistent with our diagnosis of omega.
+      ! this requires us to use the same flux divergence as is used in the theta eqn - see Klemp et al MWR 2003.
+
       do iEdge = 1,grid % nEdges
 
          cell1 = cellsOnEdge(1,iEdge)
          cell2 = cellsOnEdge(2,iEdge)
 
-         !SHP-w
          do k = 2, grid%nVertLevels
             flux = fzm(k) * tend % u % array(k,iEdge) + fzp(k) * tend % u % array(k-1,iEdge)
             tend % w % array(k,cell2) = tend % w % array(k,cell2)   &amp;
@@ -693,6 +652,8 @@
 
       end do
 
+      !  ruAvg and wwAvg will store the mass fluxes averaged over the acoustic steps for the subsequent scalar transport.
+
       diag % ruAvg % array = 0.
       diag % wwAvg % array = 0.
 
@@ -702,6 +663,12 @@
 
    subroutine atm_advance_acoustic_step( s, diag, tend, grid, dts )
 
+      !  This subroutine performs the entire acoustic step update, following Klemp et al MWR 2007,
+      !  using forward-backward vertically implicit integration.  
+      !  The gravity-waves are included in the acoustic-step integration.
+      !  The input state variables that are updated are ru_p, rw_p (note that this is (rho*omega)_p here),
+      !  rtheta_p, and rho_pp.  The time averaged mass flux is accumulated in ruAvg and wwAvg
+
       implicit none
 
       type (state_type) :: s
@@ -710,11 +677,12 @@
       type (mesh_type) :: grid
       real (kind=RKIND), intent(in) :: dts
 
-      real (kind=RKIND), dimension(:,:), pointer :: rho_zz, theta_m, ru_p, rw_p, rtheta_pp,    &amp;
-                                                    rtheta_pp_old, zz, exner, cqu, ruAvg, &amp;
-                                                    wwAvg, rho_pp, cofwt, coftz, zx,      &amp;
-                                                    a_tri, alpha_tri, gamma_tri, dss,     &amp;
-                                                    tend_ru, tend_rho, tend_rt, tend_rw,  &amp;
+
+      real (kind=RKIND), dimension(:,:), pointer :: rho_zz, theta_m, ru_p, rw_p, rtheta_pp,  &amp;
+                                                    rtheta_pp_old, zz, exner, cqu, ruAvg,    &amp;
+                                                    wwAvg, rho_pp, cofwt, coftz, zx,         &amp;
+                                                    a_tri, alpha_tri, gamma_tri, dss,        &amp;
+                                                    tend_ru, tend_rho, tend_rt, tend_rw,     &amp;
                                                     zgrid, cofwr, cofwz, w, h_divergence
       real (kind=RKIND), dimension(:), pointer :: fzm, fzp, rdzw, dcEdge, AreaCell, cofrz, dvEdge
 
@@ -734,7 +702,6 @@
       integer :: nEdges, nCells, nCellsSolve, nVertLevels
 
       logical, parameter :: debug = .false.
-!      logical, parameter :: debug = .true.
       logical, parameter :: debug1 = .false.
       logical :: newpx
 
@@ -782,8 +749,6 @@
       dvEdge =&gt; grid % dvEdge % array
       AreaCell =&gt; grid % AreaCell % array
 
-!  might these be pointers instead? **************************
-
       nEdges = grid % nEdges
       nCells = grid % nCells
       nCellsSolve = grid % nCellsSolve
@@ -797,6 +762,8 @@
       cpl         =&gt; grid % cpl % array
       newpx = config_newpx
 
+      ! epssm is the offcentering coefficient for the vertically implicit integration.
+      ! smdiv is the 3D divergence-damping coefficient.
       epssm = config_epssm
       smdiv = config_smdiv
 
@@ -807,16 +774,25 @@
       ts = 0.
       rs = 0.
 
-      ! acoustic step divergence damping - forward weight rtheta_pp
+      ! acoustic step divergence damping - forward weight rtheta_pp - see Klemp et al MWR 2007
       rtheta_pp_old = rtheta_pp + smdiv*(rtheta_pp - rtheta_pp_old)
 
-      if(debug) write(0,*) ' updating ru_p '
+      if (debug) write(0,*) ' updating ru_p '
 
+      ! forward-backward acoustic step integration.
+      ! begin by updating the horizontal velocity u, 
+      ! and accumulating the contribution from the updated u to the other tendencies.
+
+      ! we are looping over all edges, but only computing on edges of owned cells. This will include updates of
+      ! all owned edges plus some edges that are owned by other blocks.  We perform these redundant computations
+      ! so that we do not have to communicate updates of u to update the cell variables (rho, w, and theta). 
+
       do iEdge = 1, nEdges
  
          cell1 = grid % cellsOnEdge % array (1,iEdge)
          cell2 = grid % cellsOnEdge % array (2,iEdge)
-         ! update edge for block-owned cells
+
+         ! update edges for block-owned cells
          if (cell1 &lt;= grid % nCellsSolve .or. cell2 &lt;= grid % nCellsSolve ) then
 
             if (newpx) then
@@ -853,13 +829,6 @@
             else
 
                k = 1
-!               dpzx(k) = .5*zx(k,iEdge)*(cf1*(zz(k  ,cell2)*rtheta_pp_old(k  ,cell2)    &amp;
-!                                             +zz(k  ,cell1)*rtheta_pp_old(k  ,cell1))   &amp;
-!                                        +cf2*(zz(k+1,cell2)*rtheta_pp_old(k+1,cell2)    &amp;
-!                                             +zz(k+1,cell1)*rtheta_pp_old(k+1,cell1))   &amp;
-!                                        +cf3*(zz(k+2,cell2)*rtheta_pp_old(k+2,cell2)    &amp;
-!                                             +zz(k+2,cell1)*rtheta_pp_old(k+2,cell1)))
-
                dpzx(k) = .25*(zx(k,iEdge)+zx(k+1,iEdge))                               &amp;
                          *(pzm(k,cell2)*(zz(k+1,cell2)*rtheta_pp_old(k+1,cell2)        &amp;
                                         -zz(k  ,cell2)*rtheta_pp_old(k  ,cell2))       &amp;
@@ -871,10 +840,6 @@
                                         -zz(k  ,cell1)*rtheta_pp_old(k  ,cell1)))
 
                do k=2,grid % nVertLevels-1
-!                  dpzx(k)=.5*zx(k,iEdge)*(fzm(k)*(zz(k  ,cell2)*rtheta_pp_old(k  ,cell2)   &amp;
-!                                                 +zz(k  ,cell1)*rtheta_pp_old(k  ,cell1))  &amp;
-!                                         +fzp(k)*(zz(k-1,cell2)*rtheta_pp_old(k-1,cell2)   &amp;
-!                                                 +zz(k-1,cell1)*rtheta_pp_old(k-1,cell1)))
                   dpzx(k) = .25*(zx(k,iEdge)+zx(k+1,iEdge))                                   &amp;
                                    *(pzp(k,cell2)*(zz(k+1,cell2)*rtheta_pp_old(k+1,cell2)     &amp;
                                                   -zz(k  ,cell2)*rtheta_pp_old(k  ,cell2))    &amp;
@@ -886,38 +851,30 @@
                                                   -zz(k-1,cell1)*rtheta_pp_old(k-1,cell1)))
                end do
 
-               k=grid % nVertLevels
+               k = grid % nVertLevels
                dpzx(k) = .25*(zx(k,iEdge)+zx(k+1,iEdge))                                   &amp;
                                 *(pzm(k,cell2)*(zz(k  ,cell2)*rtheta_pp_old(k  ,cell2)     &amp;
                                                -zz(k-1,cell2)*rtheta_pp_old(k-1,cell2))    &amp;
                                  +pzm(k,cell1)*(zz(k  ,cell1)*rtheta_pp_old(k  ,cell1)     &amp;
                                                -zz(k-1,cell1)*rtheta_pp_old(k-1,cell1)))
 
-!               dpzx(nVertLevels + 1) = 0.
-
                do k=1,nVertLevels
-!                  pgrad =  (rtheta_pp_old(k,cell2)-rtheta_pp_old(k,cell1))/dcEdge(iEdge)  &amp;
-!                               - rdzw(k)*(dpzx(k+1)-dpzx(k))
                   pgrad =     ((rtheta_pp_old(k,cell2)*zz(k,cell2)                    &amp;
                                -rtheta_pp_old(k,cell1)*zz(k,cell1))/dcEdge(iEdge)     &amp;
                             -dpzx(k))/(.5*(zz(k,cell2)+zz(k,cell1)))
                   pgrad = 0.5*c2*(exner(k,cell1)+exner(k,cell2))*pgrad
                   du(k) = dts*(tend_ru(k,iEdge) - cqu(k,iEdge) * pgrad) 
-!                          + (0.05/6.)*dcEdge(iEdge)*(h_divergence(k,cell2)-h_divergence(k,cell1))
                end do
             end if
 
             do k=1,nVertLevels
+
+               ! full update of ru_p
+
                ru_p(k,iEdge) = ru_p(k,iEdge) + du(k)
 
-               if(debug) then
-                 if(iEdge == 3750) then
-                   write(0,*) ' k, pgrad, tend_ru ',k,pgrad,tend_ru(k,3750)
-                 end if
-               end if
+               ! add horizontal fluxes using updated ru_p into density update, rtheta update and w update
 
-!  need to add horizontal fluxes into density update, rtheta update and w update
-
                flux = dts*dvEdge(iEdge)*ru_p(k,iEdge)
                rs(k,cell1) = rs(k,cell1)-flux/AreaCell(cell1)
                rs(k,cell2) = rs(k,cell2)+flux/AreaCell(cell2)
@@ -925,67 +882,82 @@
                flux = flux*0.5*(theta_m(k,cell2)+theta_m(k,cell1))
                ts(k,cell1) = ts(k,cell1)-flux/AreaCell(cell1)
                ts(k,cell2) = ts(k,cell2)+flux/AreaCell(cell2)
-   
+
+               ! accumulate ru_p for use later in scalar transport
+
                ruAvg(k,iEdge) = ruAvg(k,iEdge) + ru_p(k,iEdge)
 
             end do
 
-        end if ! end test for block-owned cells
+         end if ! end test for block-owned cells
 
       end do ! end loop over edges
 
       ! saving rtheta_pp before update for use in divergence damping in next acoustic step
+
       rtheta_pp_old(:,:) = rtheta_pp(:,:)
 
+      ! vertically implicit acoustic and gravity wave integration.
+      ! this follows Klemp et al MWR 2007, with the addition of an implicit Rayleigh damping of w
+      ! serves as a gravity-wave absorbing layer, from Klemp et al 2008.
+
       do iCell = 1, nCellsSolve
 
-        do k=1, nVertLevels
-          rs(k,iCell) = rho_pp(k,iCell) + dts*tend_rho(k,iCell) + rs(k,iCell)      &amp;
-                          - cofrz(k)*resm*(rw_p(k+1,iCell)-rw_p(k,iCell))
-          ts(k,iCell) = rtheta_pp(k,iCell) + dts*tend_rt(k,iCell) + ts(k,iCell)    &amp;
-                             - resm*rdzw(k)*(coftz(k+1,iCell)*rw_p(k+1,iCell)      &amp;
-                             -coftz(k,iCell)*rw_p(k,iCell))
-        enddo
+         do k=1, nVertLevels
+            rs(k,iCell) = rho_pp(k,iCell) + dts*tend_rho(k,iCell) + rs(k,iCell)      &amp;
+                            - cofrz(k)*resm*(rw_p(k+1,iCell)-rw_p(k,iCell))
+            ts(k,iCell) = rtheta_pp(k,iCell) + dts*tend_rt(k,iCell) + ts(k,iCell)    &amp;
+                               - resm*rdzw(k)*(coftz(k+1,iCell)*rw_p(k+1,iCell)      &amp;
+                               -coftz(k,iCell)*rw_p(k,iCell))
+         end do
 
-        do k=2, nVertLevels
+         do k=2, nVertLevels
 
-          wwavg(k,iCell) = wwavg(k,iCell) + 0.5*(1.-epssm)*rw_p(k,iCell)
+            wwavg(k,iCell) = wwavg(k,iCell) + 0.5*(1.-epssm)*rw_p(k,iCell)
 
-          rw_p(k,iCell) = rw_p(k,iCell) +  dts*tend_rw(k,iCell)                       &amp;
-                     - cofwz(k,iCell)*((zz(k  ,iCell)*ts (k  ,iCell)                  &amp;
-                                   -zz(k-1,iCell)*ts (k-1,iCell))                     &amp;
-                             +resm*(zz(k  ,iCell)*rtheta_pp(k  ,iCell)                &amp;
-                                   -zz(k-1,iCell)*rtheta_pp(k-1,iCell)))              &amp;
-                     - cofwr(k,iCell)*((rs (k,iCell)+rs (k-1,iCell))                  &amp;
-                             +resm*(rho_pp(k,iCell)+rho_pp(k-1,iCell)))               &amp;
-                     + cofwt(k  ,iCell)*(ts (k  ,iCell)+resm*rtheta_pp(k  ,iCell))    &amp;
-                     + cofwt(k-1,iCell)*(ts (k-1,iCell)+resm*rtheta_pp(k-1,iCell))
-        enddo
+            rw_p(k,iCell) = rw_p(k,iCell) +  dts*tend_rw(k,iCell)                       &amp;
+                       - cofwz(k,iCell)*((zz(k  ,iCell)*ts (k  ,iCell)                  &amp;
+                                     -zz(k-1,iCell)*ts (k-1,iCell))                     &amp;
+                               +resm*(zz(k  ,iCell)*rtheta_pp(k  ,iCell)                &amp;
+                                     -zz(k-1,iCell)*rtheta_pp(k-1,iCell)))              &amp;
+                       - cofwr(k,iCell)*((rs (k,iCell)+rs (k-1,iCell))                  &amp;
+                               +resm*(rho_pp(k,iCell)+rho_pp(k-1,iCell)))               &amp;
+                       + cofwt(k  ,iCell)*(ts (k  ,iCell)+resm*rtheta_pp(k  ,iCell))    &amp;
+                       + cofwt(k-1,iCell)*(ts (k-1,iCell)+resm*rtheta_pp(k-1,iCell))
+         end do
 
-        do k=2,nVertLevels
-          rw_p(k,iCell) = (rw_p(k,iCell)-a_tri(k,iCell)*rw_p(k-1,iCell))*alpha_tri(k,iCell)
-        end do
+         ! tridiagonal solve sweeping up and then down the column
 
-        do k=nVertLevels,1,-1
-          rw_p(k,iCell) = rw_p(k,iCell) - gamma_tri(k,iCell)*rw_p(k+1,iCell)     
-        end do
+         do k=2,nVertLevels
+            rw_p(k,iCell) = (rw_p(k,iCell)-a_tri(k,iCell)*rw_p(k-1,iCell))*alpha_tri(k,iCell)
+         end do
 
-        do k=2,nVertLevels
-           rw_p(k,iCell) = (rw_p(k,iCell)-dts*dss(k,iCell)*               &amp;
-                       (fzm(k)*zz (k,iCell)+fzp(k)*zz (k-1,iCell))        &amp;
-                       *(fzm(k)*rho_zz(k,iCell)+fzp(k)*rho_zz(k-1,iCell))       &amp;
-                                *w(k,iCell)    )/(1.+dts*dss(k,iCell))
+         do k=nVertLevels,1,-1
+            rw_p(k,iCell) = rw_p(k,iCell) - gamma_tri(k,iCell)*rw_p(k+1,iCell)     
+         end do
 
-           wwAvg(k,iCell) = wwAvg(k,iCell) + 0.5*(1.+epssm)*rw_p(k,iCell)
+         ! the implicit Rayleigh damping on w (gravity-wave absorbing) 
 
-        end do
+         do k=2,nVertLevels
+            rw_p(k,iCell) = (rw_p(k,iCell)-dts*dss(k,iCell)*               &amp;
+                        (fzm(k)*zz (k,iCell)+fzp(k)*zz (k-1,iCell))        &amp;
+                        *(fzm(k)*rho_zz(k,iCell)+fzp(k)*rho_zz(k-1,iCell))       &amp;
+                                 *w(k,iCell)    )/(1.+dts*dss(k,iCell))

+            ! accumulate (rho*omega)' for use later in scalar transport
 
-        do k=1,nVertLevels
-          rho_pp(k,iCell) = rs(k,iCell) - cofrz(k) *(rw_p(k+1,iCell)-rw_p(k  ,iCell))
-          rtheta_pp(k,iCell) = ts(k,iCell) - rdzw(k)*(coftz(k+1,iCell)*rw_p(k+1,iCell)  &amp;
-                             -coftz(k  ,iCell)*rw_p(k  ,iCell))
-        end do
+            wwAvg(k,iCell) = wwAvg(k,iCell) + 0.5*(1.+epssm)*rw_p(k,iCell)

+         end do
 
+         ! update rho_pp and theta_pp given updated rw_p
+
+         do k=1,nVertLevels
+            rho_pp(k,iCell) = rs(k,iCell) - cofrz(k) *(rw_p(k+1,iCell)-rw_p(k  ,iCell))
+            rtheta_pp(k,iCell) = ts(k,iCell) - rdzw(k)*(coftz(k+1,iCell)*rw_p(k+1,iCell)  &amp;
+                               -coftz(k  ,iCell)*rw_p(k  ,iCell))
+         end do
+
       end do !  end of loop over cells
 
    end subroutine atm_advance_acoustic_step
@@ -994,7 +966,13 @@
 
    subroutine atm_recover_large_step_variables( s, diag, tend, grid, dt, ns, rk_step )
 
+      ! reconstitute state variables from acoustic-step perturbation variables 
+      ! after the acoustic steps.  The perturbation variables were originally set in
+      ! subroutine atm_set_smlstep_pert_variables prior to their acoustic-steps update.
+      ! we are also computing a few other state-derived variables here.
+
       implicit none
+
       type (state_type) :: s
       type (diag_type) :: diag
       type (tend_type) :: tend
@@ -1002,6 +980,7 @@
       integer, intent(in) :: ns, rk_step
       real (kind=RKIND), intent(in) :: dt
 
+
       real (kind=RKIND), dimension(:,:), pointer :: wwAvg, rw_save, w, rw, rw_p, rtheta_p, rtheta_pp,   &amp;
                                                     rtheta_p_save, rt_diabatic_tend, rho_p, rho_p_save, &amp;
                                                     rho_pp, rho_zz, rho_base, ruAvg, ru_save, ru_p, u, ru, &amp;
@@ -1015,86 +994,79 @@
       integer :: nVertLevels, nCells, nCellsSolve, nEdges, nEdgesSolve
       real (kind=RKIND) :: rcv, p0, cf1, cf2, cf3, flux, coef_3rd_order
 
-!      logical, parameter :: debug=.true.
       logical, parameter :: debug=.false.
 
-!---
-       wwAvg =&gt; diag % wwAvg % array
-       rw_save =&gt; diag % rw_save % array
-       rw =&gt; diag % rw % array
-       rw_p =&gt; diag % rw_p % array
-       w =&gt; s % w % array
 
-       rtheta_p =&gt; diag % rtheta_p % array
-       rtheta_p_save =&gt; diag % rtheta_p_save % array
-       rtheta_pp =&gt; diag % rtheta_pp % array
-       rtheta_base =&gt; diag % rtheta_base % array
-       rt_diabatic_tend =&gt; tend % rt_diabatic_tend % array
-       theta_m =&gt; s % theta_m % array
-       qvapor =&gt; s % scalars % array(s%index_qv,:,:)
+      wwAvg =&gt; diag % wwAvg % array
+      rw_save =&gt; diag % rw_save % array
+      rw =&gt; diag % rw % array
+      rw_p =&gt; diag % rw_p % array
+      w =&gt; s % w % array
 
-       rho_zz =&gt; s % rho_zz % array
-       rho_p =&gt; diag % rho_p % array
-       rho_p_save =&gt; diag % rho_p_save % array
-       rho_pp =&gt; diag % rho_pp % array
-       rho_base =&gt; diag % rho_base % array
+      rtheta_p =&gt; diag % rtheta_p % array
+      rtheta_p_save =&gt; diag % rtheta_p_save % array
+      rtheta_pp =&gt; diag % rtheta_pp % array
+      rtheta_base =&gt; diag % rtheta_base % array
+      rt_diabatic_tend =&gt; tend % rt_diabatic_tend % array
+      theta_m =&gt; s % theta_m % array
+      qvapor =&gt; s % scalars % array(s%index_qv,:,:)
 
-       ruAvg =&gt; diag % ruAvg % array
-       ru_save =&gt; diag % ru_save % array
-       ru_p =&gt; diag % ru_p % array
-       ru =&gt; diag % ru % array
-       u =&gt; s % u % array
+      rho_zz =&gt; s % rho_zz % array
+      rho_p =&gt; diag % rho_p % array
+      rho_p_save =&gt; diag % rho_p_save % array
+      rho_pp =&gt; diag % rho_pp % array
+      rho_base =&gt; diag % rho_base % array
 
-       exner =&gt; diag % exner % array
-       exner_base =&gt; diag % exner_base % array
+      ruAvg =&gt; diag % ruAvg % array
+      ru_save =&gt; diag % ru_save % array
+      ru_p =&gt; diag % ru_p % array
+      ru =&gt; diag % ru % array
+      u =&gt; s % u % array
 
-       pressure_p =&gt; diag % pressure_p % array
-       pressure_b =&gt; diag % pressure_base % array
+      exner =&gt; diag % exner % array
+      exner_base =&gt; diag % exner_base % array
 
-       zz =&gt; grid % zz % array
-       zb =&gt; grid % zb % array
-       zb3 =&gt; grid % zb3 % array
-       fzm =&gt; grid % fzm % array
-       fzp =&gt; grid % fzp % array
-       dvEdge =&gt; grid % dvEdge % array
-       AreaCell =&gt; grid % AreaCell % array
-       CellsOnEdge =&gt; grid % CellsOnEdge % array
+      pressure_p =&gt; diag % pressure_p % array
+      pressure_b =&gt; diag % pressure_base % array
 
-       nVertLevels = grid % nVertLevels
-       nCells = grid % nCells
-       nCellsSolve = grid % nCellsSolve
-       nEdges = grid % nEdges
-       nEdgesSolve = grid % nEdgesSolve
+      zz =&gt; grid % zz % array
+      zb =&gt; grid % zb % array
+      zb3 =&gt; grid % zb3 % array
+      fzm =&gt; grid % fzm % array
+      fzp =&gt; grid % fzp % array
+      dvEdge =&gt; grid % dvEdge % array
+      AreaCell =&gt; grid % AreaCell % array
+      CellsOnEdge =&gt; grid % CellsOnEdge % array
 
-       rcv = rgas/(cp-rgas)
-       p0 = 1.e+05  ! this should come from somewhere else...
+      nVertLevels = grid % nVertLevels
+      nCells = grid % nCells
+      nCellsSolve = grid % nCellsSolve
+      nEdges = grid % nEdges
+      nEdgesSolve = grid % nEdgesSolve
 
-       cf1 = grid % cf1 % scalar
-       cf2 = grid % cf2 % scalar
-       cf3 = grid % cf3 % scalar
-       coef_3rd_order = config_coef_3rd_order
-       if(config_theta_adv_order /=3) coef_3rd_order = 0
+      rcv = rgas/(cp-rgas)
+      p0 = 1.e+05  ! this should come from somewhere else...
 
+      cf1 = grid % cf1 % scalar
+      cf2 = grid % cf2 % scalar
+      cf3 = grid % cf3 % scalar
+      coef_3rd_order = config_coef_3rd_order
+      if (config_theta_adv_order /=3) coef_3rd_order = 0
+
       ! compute new density everywhere so we can compute u from ru.
       ! we will also need it to compute theta_m below
 
       do iCell = 1, nCells
 
-        do k = 1, nVertLevels
+         do k = 1, nVertLevels
 
-          rho_p(k,iCell) = rho_p(k,iCell) + rho_pp(k,iCell)
+            rho_p(k,iCell) = rho_p(k,iCell) + rho_pp(k,iCell)
 
-          rho_zz(k,iCell) = rho_p(k,iCell) + rho_base(k,iCell)
-        end do
+            rho_zz(k,iCell) = rho_p(k,iCell) + rho_base(k,iCell)
+         end do
 
-      !  recover owned-cell values in block
-
-!        if( iCell &lt;= nCellsSolve ) then    ! If using this test, more halo exchanges will be needed
-!  WCS-parallel: OK
-
-
-          w(1,iCell) = 0.
-          do k = 2, nVertLevels
+         w(1,iCell) = 0.
+         do k = 2, nVertLevels
             wwAvg(k,iCell) = rw(k,iCell) + (wwAvg(k,iCell) / float(ns))
 
             rw(k,iCell) = rw(k,iCell) + rw_p(k,iCell)
@@ -1103,27 +1075,27 @@
           ! pick up part of diagnosed w from omega
             w(k,iCell) = rw(k,iCell)/( (fzm(k)*zz (k,iCell)+fzp(k)*zz (k-1,iCell))   &amp;
                                       *(fzm(k)*rho_zz(k,iCell)+fzp(k)*rho_zz(k-1,iCell)) )
-          end do
-          w(nVertLevels+1,iCell) = 0.
+         end do
+         w(nVertLevels+1,iCell) = 0.
 
-          if(rk_step == 3) then
+         if (rk_step == 3) then
             do k = 1, nVertLevels
                rtheta_p(k,iCell) = rtheta_p(k,iCell) + rtheta_pp(k,iCell) &amp;
                                  - dt * rho_zz(k,iCell) * rt_diabatic_tend(k,iCell)
             end do
-          else
+         else
             do k = 1, nVertLevels
                rtheta_p(k,iCell) = rtheta_p(k,iCell) + rtheta_pp(k,iCell)
             end do
-          end if
+         end if
 
-          do k = 1, nVertLevels
+         do k = 1, nVertLevels
             theta_m(k,iCell) = (rtheta_p(k,iCell) + rtheta_base(k,iCell))/rho_zz(k,iCell)
             exner(k,iCell) = (zz(k,iCell)*(rgas/p0)*(rtheta_p(k,iCell)+rtheta_base(k,iCell)))**rcv
-             ! pressure below is perturbation pressure
+            ! pressure_p is perturbation pressure
             pressure_p(k,iCell) = zz(k,iCell) * rgas * (exner(k,iCell)*rtheta_p(k,iCell)+rtheta_base(k,iCell)  &amp;
                                                           * (exner(k,iCell)-exner_base(k,iCell)))
-          end do
+         end do
 
       end do
 
@@ -1133,45 +1105,35 @@
 
       do iEdge = 1, nEdges
 
-        cell1 = CellsOnEdge(1,iEdge)
-        cell2 = CellsOnEdge(2,iEdge)
+         cell1 = CellsOnEdge(1,iEdge)
+         cell2 = CellsOnEdge(2,iEdge)
 
-!        if( cell1 &lt;= nCellsSolve .or. cell2 &lt;= nCellsSolve ) then   ! If using this test, more halo exchanges will be needed,
-                                                                     ! though we can avoid division by zero, e.g., by rho_zz(:,cell2)
-          do k = 1, nVertLevels
-
-
-!  WCS-parallel: we could pick this up in the last acoustic step (ruAvg) because we solve for all edges of owned cells
-
+         do k = 1, nVertLevels
             ruAvg(k,iEdge) = ru(k,iEdge) + (ruAvg(k,iEdge) / float(ns))
-
             ru(k,iEdge) = ru(k,iEdge) + ru_p(k,iEdge)
-
             u(k,iEdge) = 2.*ru(k,iEdge)/(rho_zz(k,cell1)+rho_zz(k,cell2))
-          enddo
+         end do
 
+         !  finish recovering w from (rho*omega)_p.  as when we formed (rho*omega)_p from u and w, we need
+         !  to use the same flux-divergence operator as is used for the horizontal theta transport
+         !  (See Klemp et al 2003).
 
-! WCS-parallel: we likely only need this for owned cells
+         flux = cf1*ru(1,iEdge) + cf2*ru(2,iEdge) + cf3*ru(3,iEdge)
+         w(1,cell2) = w(1,cell2) - (zb(1,2,iEdge) + sign(1.0_RKIND,flux)*coef_3rd_order*zb3(1,2,iEdge))  &amp;
+                                *flux/(cf1*rho_zz(1,cell2)+cf2*rho_zz(2,cell2)+cf3*rho_zz(3,cell2))
+         w(1,cell1) = w(1,cell1) + (zb(1,1,iEdge) + sign(1.0_RKIND,flux)*coef_3rd_order*zb3(1,1,iEdge))  &amp;
+                                *flux/(cf1*rho_zz(1,cell1)+cf2*rho_zz(2,cell1)+cf3*rho_zz(3,cell1))
 
-          !SHP-mtn
-          flux = cf1*ru(1,iEdge) + cf2*ru(2,iEdge) + cf3*ru(3,iEdge)
-          w(1,cell2) = w(1,cell2) - (zb(1,2,iEdge) + sign(1.0_RKIND,flux)*coef_3rd_order*zb3(1,2,iEdge))  &amp;
-                                 *flux/(cf1*rho_zz(1,cell2)+cf2*rho_zz(2,cell2)+cf3*rho_zz(3,cell2))
-          w(1,cell1) = w(1,cell1) + (zb(1,1,iEdge) + sign(1.0_RKIND,flux)*coef_3rd_order*zb3(1,1,iEdge))  &amp;
-                                 *flux/(cf1*rho_zz(1,cell1)+cf2*rho_zz(2,cell1)+cf3*rho_zz(3,cell1))
-
-          do k = 2, nVertLevels
+         do k = 2, nVertLevels
             flux = (fzm(k)*ru(k,iEdge)+fzp(k)*ru(k-1,iEdge))
             w(k,cell2) = w(k,cell2) - (zb(k,2,iEdge)+sign(1.0_RKIND,flux)*coef_3rd_order*zb3(k,2,iEdge)) &amp;
                                  *flux/(fzm(k)*rho_zz(k,cell2)+fzp(k)*rho_zz(k-1,cell2))
             w(k,cell1) = w(k,cell1) + (zb(k,1,iEdge)+sign(1.0_RKIND,flux)*coef_3rd_order*zb3(k,1,iEdge)) &amp;
                                  *flux/(fzm(k)*rho_zz(k,cell1)+fzp(k)*rho_zz(k-1,cell1))
-          enddo
+         end do
 
-!        end if
+      end do
 
-      enddo
-
    end subroutine atm_recover_large_step_variables
 
 !---------------------------------------------------------------------------------------
@@ -1179,10 +1141,21 @@
    subroutine atm_advance_scalars( tend, s_old, s_new, diag, grid, dt)
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
    !
-   ! Input: s - current model state
+   ! Integrate scalar equations - explicit transport plus other tendencies
+   !
+   ! Input: s - current model state, 
+   !            including tendencies from sources other than resolved transport.
    !        grid - grid metadata
    !
-   ! Output: tend - computed scalar tendencies
+   ! input scalars in state are uncoupled (i.e. not mulitplied by density)
+   ! 
+   ! Output: updated uncoupled scalars (scalars in s_new).
+   ! Note: scalar tendencies are also modified by this routine.
+   !
+   ! This routine DOES NOT apply any positive definite or monotonic renormalizations.
+   !
+   ! The transport scheme is from Skamarock and Gassmann MWR 2011.
+   !
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
 
       implicit none
@@ -1269,127 +1242,132 @@
 #endif
 
       !
-      ! Runge Kutta integration, so we compute fluxes from scalar_new values, update starts form scalar_old
+      ! Runge Kutta integration, so we compute fluxes from scalar_new values, update starts from scalar_old
       !
-      !
       !  horizontal flux divergence, accumulate in scalar_tend
 
-         do iEdge=1,grid%nEdges
-            cell1 = cellsOnEdge(1,iEdge)
-            cell2 = cellsOnEdge(2,iEdge)
-            if (cell1 &lt;= grid%nCellsSolve .or. cell2 &lt;= grid%nCellsSolve) then  ! only for owned cells
+      do iEdge=1,grid%nEdges
+         cell1 = cellsOnEdge(1,iEdge)
+         cell2 = cellsOnEdge(2,iEdge)
+         if (cell1 &lt;= grid%nCellsSolve .or. cell2 &lt;= grid%nCellsSolve) then  ! only for owned cells
   
-               flux_arr(:,:) = 0.
-               do i=1,nAdvCellsForEdge(iEdge)
-                 iCell = advCellsForEdge(i,iEdge)
-                 do k=1,grid % nVertLevels
-                   scalar_weight = adv_coefs(i,iEdge) + coef_3rd_order*sign(1.0_RKIND,uhAvg(k,iEdge))*adv_coefs_3rd(i,iEdge)
-                   do iScalar=1,s_old % num_scalars
-                     flux_arr(iScalar,k) = flux_arr(iScalar,k) + scalar_weight* scalar_new(iScalar,k,iCell)
-                   end do
-                 end do
-               end do
+            !  flux_arr stores the value of the scalar at the edge.
+            !  a better name perhaps would be scalarEdge
 
+            flux_arr(:,:) = 0.
+            do i=1,nAdvCellsForEdge(iEdge)
+               iCell = advCellsForEdge(i,iEdge)
                do k=1,grid % nVertLevels
-               do iScalar=1,s_old % num_scalars
-                     scalar_tend(iScalar,k,cell1) = scalar_tend(iScalar,k,cell1) &amp;
-                            - uhAvg(k,iEdge)*flux_arr(iScalar,k)/areaCell(cell1)
-                     scalar_tend(iScalar,k,cell2) = scalar_tend(iScalar,k,cell2) &amp;
-                            + uhAvg(k,iEdge)*flux_arr(iScalar,k)/areaCell(cell2)
+                  scalar_weight = adv_coefs(i,iEdge) + coef_3rd_order*sign(1.0_RKIND,uhAvg(k,iEdge))*adv_coefs_3rd(i,iEdge)
+                  do iScalar=1,s_old % num_scalars
+                     flux_arr(iScalar,k) = flux_arr(iScalar,k) + scalar_weight* scalar_new(iScalar,k,iCell)
+                  end do
                end do
-               end do
+            end do
 
-            end if
-          end do
+            ! here we add the horizontal flux divergence into the scalar tendency.
+            ! note that the scalar tendency is modified.
 
+            do k=1,grid % nVertLevels
+            do iScalar=1,s_old % num_scalars
+                  scalar_tend(iScalar,k,cell1) = scalar_tend(iScalar,k,cell1) &amp;
+                         - uhAvg(k,iEdge)*flux_arr(iScalar,k)/areaCell(cell1)
+                  scalar_tend(iScalar,k,cell2) = scalar_tend(iScalar,k,cell2) &amp;
+                         + uhAvg(k,iEdge)*flux_arr(iScalar,k)/areaCell(cell2)
+            end do
+            end do
+
+         end if
+      end do
+
       !
-      !  vertical flux divergence
+      !  vertical flux divergence and update of the scalars
       !
 
       ! zero fluxes at top and bottom
 
-         wdtn(:,1) = 0.
-         wdtn(:,grid % nVertLevels+1) = 0.
+      wdtn(:,1) = 0.
+      wdtn(:,grid % nVertLevels+1) = 0.
 
-         if (config_scalar_vadv_order == 2) then
+      if (config_scalar_vadv_order == 2) then
 
-           do iCell=1,grid % nCellsSolve
-             do k = 2, nVertLevels
-                do iScalar=1,s_old % num_scalars
+         do iCell=1,grid % nCellsSolve
+            do k = 2, nVertLevels
+               do iScalar=1,s_old % num_scalars
                   wdtn(iScalar,k) = wwAvg(k,iCell)*(fnm(k)*scalar_new(iScalar,k,iCell)+fnp(k)*scalar_new(iScalar,k-1,iCell))
-                end do
-             end do
-             do k=1,grid % nVertLevels   ! Could be nVertLevelsSolve?
-                do iScalar=1,s_old % num_scalars
+               end do
+            end do
+            do k=1,grid % nVertLevels
+               do iScalar=1,s_old % num_scalars
                   scalar_new(iScalar,k,iCell) = (   scalar_old(iScalar,k,iCell)*h_old(k,iCell) &amp;
                         + dt*( scalar_tend(iScalar,k,iCell) -rdnw(k)*(wdtn(iScalar,k+1)-wdtn(iScalar,k)) ) )/h_new(k,iCell)
-                end do
-             end do
-           end do
+               end do
+            end do
+         end do
 
-         else if ( config_scalar_vadv_order == 3 ) then
+      else if ( config_scalar_vadv_order == 3 ) then
 
-           do iCell=1,grid % nCellsSolve
+         do iCell=1,grid % nCellsSolve
 
-             k = 2
-             do iScalar=1,s_old % num_scalars
+            k = 2
+            do iScalar=1,s_old % num_scalars
                wdtn(iScalar,k) = wwAvg(k,iCell)*(fnm(k)*scalar_new(iScalar,k,iCell)+fnp(k)*scalar_new(iScalar,k-1,iCell))
-             enddo
+            enddo
              
-             do k=3,nVertLevels-1
+            do k=3,nVertLevels-1
                do iScalar=1,s_old % num_scalars
-                 wdtn(iScalar,k) = flux3( scalar_new(iScalar,k-2,iCell),scalar_new(iScalar,k-1,iCell),  &amp;
-                                          scalar_new(iScalar,k  ,iCell),scalar_new(iScalar,k+1,iCell),  &amp;
-                                          wwAvg(k,iCell), coef_3rd_order )
+                  wdtn(iScalar,k) = flux3( scalar_new(iScalar,k-2,iCell),scalar_new(iScalar,k-1,iCell),  &amp;
+                                           scalar_new(iScalar,k  ,iCell),scalar_new(iScalar,k+1,iCell),  &amp;
+                                           wwAvg(k,iCell), coef_3rd_order )
                end do
-             end do
-             k = nVertLevels
-             do iScalar=1,s_old % num_scalars
+            end do
+            k = nVertLevels
+            do iScalar=1,s_old % num_scalars
                wdtn(iScalar,k) = wwAvg(k,iCell)*(fnm(k)*scalar_new(iScalar,k,iCell)+fnp(k)*scalar_new(iScalar,k-1,iCell))
-             enddo
+            enddo
 
-             do k=1,grid % nVertLevels   ! Could be nVertLevelsSolve?
-                do iScalar=1,s_old % num_scalars
+            do k=1,grid % nVertLevels
+               do iScalar=1,s_old % num_scalars
                   scalar_new(iScalar,k,iCell) = (   scalar_old(iScalar,k,iCell)*h_old(k,iCell) &amp;
                         + dt*( scalar_tend(iScalar,k,iCell) -rdnw(k)*(wdtn(iScalar,k+1)-wdtn(iScalar,k)) ) )/h_new(k,iCell)
-                end do
-             end do
+               end do
+            end do
 
-           end do
+         end do
 
-         else if ( config_scalar_vadv_order == 4 ) then
+      else if ( config_scalar_vadv_order == 4 ) then
 
-           do iCell=1,grid % nCellsSolve
+         do iCell=1,grid % nCellsSolve
 
-             k = 2
-             do iScalar=1,s_old % num_scalars
+            k = 2
+            do iScalar=1,s_old % num_scalars
                wdtn(iScalar,k) = wwAvg(k,iCell)*(fnm(k)*scalar_new(iScalar,k,iCell)+fnp(k)*scalar_new(iScalar,k-1,iCell))
-             enddo
-             do k=3,nVertLevels-1
+            enddo
+            do k=3,nVertLevels-1
                do iScalar=1,s_old % num_scalars
-                 wdtn(iScalar,k) = flux4( scalar_new(iScalar,k-2,iCell),scalar_new(iScalar,k-1,iCell),  &amp;
-                                          scalar_new(iScalar,k  ,iCell),scalar_new(iScalar,k+1,iCell), wwAvg(k,iCell) )
+                  wdtn(iScalar,k) = flux4( scalar_new(iScalar,k-2,iCell),scalar_new(iScalar,k-1,iCell),  &amp;
+                                           scalar_new(iScalar,k  ,iCell),scalar_new(iScalar,k+1,iCell), wwAvg(k,iCell) )
                end do
-             end do
-             k = nVertLevels
-             do iScalar=1,s_old % num_scalars
+            end do
+            k = nVertLevels
+            do iScalar=1,s_old % num_scalars
                wdtn(iScalar,k) = wwAvg(k,iCell)*(fnm(k)*scalar_new(iScalar,k,iCell)+fnp(k)*scalar_new(iScalar,k-1,iCell))
-             enddo
+            enddo
 
-             do k=1,grid % nVertLevels   ! Could be nVertLevelsSolve?
-                do iScalar=1,s_old % num_scalars
+            do k=1,grid % nVertLevels
+               do iScalar=1,s_old % num_scalars
                   scalar_new(iScalar,k,iCell) = (   scalar_old(iScalar,k,iCell)*h_old(k,iCell) &amp;
                         + dt*( scalar_tend(iScalar,k,iCell) -rdnw(k)*(wdtn(iScalar,k+1)-wdtn(iScalar,k)) ) )/h_new(k,iCell)
-                end do
-             end do
+               end do
+            end do
 
-           end do
+         end do
                                                                                         
-         else 
+      else 
 
-           write(0,*) ' bad value for config_scalar_vadv_order - ', config_scalar_vadv_order
+         write(0,*) ' bad value for config_scalar_vadv_order - ', config_scalar_vadv_order
 
-         end if
+      end if
 
    end subroutine atm_advance_scalars
 
@@ -1398,9 +1376,25 @@
    subroutine atm_advance_scalars_mono(tend, s_old, s_new, diag, grid, dt)
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
    !
-   ! Input: s - current model state
+   ! Integrate scalar equations - transport plus other tendencies
+   !
+   ! Input: s - current model state, 
+   !            including tendencies from sources other than resolved transport.
    !        grid - grid metadata
    !
+   ! input scalars in state are uncoupled (i.e. not mulitplied by density)
+   ! 
+   ! Output: updated uncoupled scalars (scalars in s_new).
+   ! Note: scalar tendencies are also modified by this routine.
+   !
+   ! This routine DOES apply positive definite or monotonic renormalizations.
+   !
+   ! The transport scheme is from Skamarock and Gassmann MWR 2011.
+   !
+   ! The positive-definite or monotonic renormalization is from Zalesak JCP 1979
+   !   as used in the RK3 scheme as described in Wang et al MWR 2009
+   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
+
       implicit none
 
       type (tend_type),intent(in)     :: tend
@@ -1498,10 +1492,10 @@
       scalar_tend = 0.  !  testing purposes - we have no sources or sinks
 #endif
 
-      !
-      ! Update scalars for physics (i.e., what is in scalar_tend)
-      !   we should probably move this to another routine called before mono advection ****
-      !
+      !  for positive-definite or monotonic option, we first update scalars using the tendency from sources other than
+      !  the resolved transport (these should constitute a positive definite update).  
+      !  Note, however, that we enforce positive-definiteness in this update.
+      !  The transport will maintain this positive definite solution and optionally, shape preservation (monotonicity).
 
       do iCell = 1,grid%nCellsSolve
       do k = 1, grid%nVertLevels
@@ -1525,16 +1519,16 @@
       num_scalars = 1
 
       do iScalar = 1, s_old % num_scalars
-        write(0,*) ' mono transport for scalar ',iScalar
+         write(0,*) ' mono transport for scalar ',iScalar
 
-        do iCell = 1, grid%nCells
-        do k=1, grid%nVertLevels
-          scalar_old(k,iCell) = s_old % scalars % array(iScalar,k,iCell)
-          scalar_new(k,iCell) = s_new % scalars % array(iScalar,k,iCell)
-        end do
-        end do
+         do iCell = 1, grid%nCells
+         do k=1, grid%nVertLevels
+            scalar_old(k,iCell) = s_old % scalars % array(iScalar,k,iCell)
+            scalar_new(k,iCell) = s_new % scalars % array(iScalar,k,iCell)
+         end do
+         end do
 
-         if(debug_print) then
+         if (debug_print) then
             scmin = scalar_old(1,1)
             scmax = scalar_old(1,1)
             do iCell = 1, grid%nCells
@@ -1564,40 +1558,40 @@
  
          do iCell=1,grid % nCellsSolve
 
-          ! zero flux at top and bottom
-           wdtn(1,iCell) = 0.
-           wdtn(grid % nVertLevels+1,iCell) = 0.
+            ! zero flux at top and bottom
+            wdtn(1,iCell) = 0.
+            wdtn(grid % nVertLevels+1,iCell) = 0.
 
-           k = 1
-           s_max(k,iCell) = max(scalar_old(1,iCell),scalar_old(2,iCell))
-           s_min(k,iCell) = min(scalar_old(1,iCell),scalar_old(2,iCell))
+            k = 1
+            s_max(k,iCell) = max(scalar_old(1,iCell),scalar_old(2,iCell))
+            s_min(k,iCell) = min(scalar_old(1,iCell),scalar_old(2,iCell))
 
-           k = 2
-           wdtn(k,iCell) = wwAvg(k,iCell)*(fnm(k)*scalar_new(k,iCell)+fnp(k)*scalar_new(k-1,iCell))
-           s_max(k,iCell) = max(scalar_old(k-1,iCell),scalar_old(k,iCell),scalar_old(k+1,iCell))
-           s_min(k,iCell) = min(scalar_old(k-1,iCell),scalar_old(k,iCell),scalar_old(k+1,iCell))
+            k = 2
+            wdtn(k,iCell) = wwAvg(k,iCell)*(fnm(k)*scalar_new(k,iCell)+fnp(k)*scalar_new(k-1,iCell))
+            s_max(k,iCell) = max(scalar_old(k-1,iCell),scalar_old(k,iCell),scalar_old(k+1,iCell))
+            s_min(k,iCell) = min(scalar_old(k-1,iCell),scalar_old(k,iCell),scalar_old(k+1,iCell))
              
-           do k=3,nVertLevels-1
-              wdtn(k,iCell) = flux3( scalar_new(k-2,iCell),scalar_new(k-1,iCell),  &amp;
-                                     scalar_new(k  ,iCell),scalar_new(k+1,iCell),  &amp;
-                                     wwAvg(k,iCell), coef_3rd_order )
-              s_max(k,iCell) = max(scalar_old(k-1,iCell),scalar_old(k,iCell),scalar_old(k+1,iCell))
-              s_min(k,iCell) = min(scalar_old(k-1,iCell),scalar_old(k,iCell),scalar_old(k+1,iCell))
-           end do
+            do k=3,nVertLevels-1
+               wdtn(k,iCell) = flux3( scalar_new(k-2,iCell),scalar_new(k-1,iCell),  &amp;
+                                      scalar_new(k  ,iCell),scalar_new(k+1,iCell),  &amp;
+                                      wwAvg(k,iCell), coef_3rd_order )
+               s_max(k,iCell) = max(scalar_old(k-1,iCell),scalar_old(k,iCell),scalar_old(k+1,iCell))
+               s_min(k,iCell) = min(scalar_old(k-1,iCell),scalar_old(k,iCell),scalar_old(k+1,iCell))
+            end do
  
-           k = nVertLevels
-           wdtn(k,iCell) = wwAvg(k,iCell)*(fnm(k)*scalar_new(k,iCell)+fnp(k)*scalar_new(k-1,iCell))
-           s_max(k,iCell) = max(scalar_old(k,iCell),scalar_old(k-1,iCell))
-           s_min(k,iCell) = min(scalar_old(k,iCell),scalar_old(k-1,iCell))
+            k = nVertLevels
+            wdtn(k,iCell) = wwAvg(k,iCell)*(fnm(k)*scalar_new(k,iCell)+fnp(k)*scalar_new(k-1,iCell))
+            s_max(k,iCell) = max(scalar_old(k,iCell),scalar_old(k-1,iCell))
+            s_min(k,iCell) = min(scalar_old(k,iCell),scalar_old(k-1,iCell))
 
       ! pull s_min and s_max from the (horizontal) surrounding cells
 
-           do i=1, grid % nEdgesOnCell % array(iCell)
-             do k=1, grid % nVertLevels
-               s_max(k,iCell) = max(s_max(k,iCell),scalar_old(k, grid % CellsOnCell % array(i,iCell)))
-               s_min(k,iCell) = min(s_min(k,iCell),scalar_old(k, grid % CellsOnCell % array(i,iCell)))
-             end do
-           end do
+            do i=1, grid % nEdgesOnCell % array(iCell)
+               do k=1, grid % nVertLevels
+                  s_max(k,iCell) = max(s_max(k,iCell),scalar_old(k, grid % CellsOnCell % array(i,iCell)))
+                  s_min(k,iCell) = min(s_min(k,iCell),scalar_old(k, grid % CellsOnCell % array(i,iCell)))
+               end do
+            end do
 
          end do
 
@@ -1613,69 +1607,69 @@
             if (cell1 &lt;= grid%nCellsSolve .or. cell2 &lt;= grid%nCellsSolve) then  ! only for owned cells
   
                do i=1,nAdvCellsForEdge(iEdge)
-                 iCell = advCellsForEdge(i,iEdge)
-                 do k=1,grid % nVertLevels
-                   scalar_weight = uhAvg(k,iEdge)*(adv_coefs(i,iEdge) + coef_3rd_order*sign(1.0_RKIND,uhAvg(k,iEdge))*adv_coefs_3rd(i,iEdge))
-                   flux_arr(k,iEdge) = flux_arr(k,iEdge) + scalar_weight* scalar_new(k,iCell)
-                 end do
+                  iCell = advCellsForEdge(i,iEdge)
+                  do k=1,grid % nVertLevels
+                     scalar_weight = uhAvg(k,iEdge)*(adv_coefs(i,iEdge) + coef_3rd_order*sign(1.0_RKIND,uhAvg(k,iEdge))*adv_coefs_3rd(i,iEdge))
+                     flux_arr(k,iEdge) = flux_arr(k,iEdge) + scalar_weight* scalar_new(k,iCell)
+                  end do
                end do
 
             end if
 
-          end do
+         end do
 
 !  vertical flux divergence for upwind update, we will put upwind update into scalar_new, and put factor of dt in fluxes
 
-          do iCell = 1, grid % nCellsSolve
+         do iCell = 1, grid % nCellsSolve
 
             k = 1
             scalar_new(k,iCell) = scalar_old(k,iCell)*h_old(k,iCell)
 
             do k = 2, nVertLevels
-              scalar_new(k,iCell) = scalar_old(k,iCell)*h_old(k,iCell)
-              flux_upwind = dt*(max(0.0_RKIND,wwAvg(k,iCell))*scalar_old(k-1,iCell) + min(0.0_RKIND,wwAvg(k,iCell))*scalar_old(k,iCell))
-              scalar_new(k-1,iCell) = scalar_new(k-1,iCell) - flux_upwind*rdnw(k-1)
-              scalar_new(k  ,iCell) = scalar_new(k  ,iCell) + flux_upwind*rdnw(k)
-              wdtn(k,iCell) = dt*wdtn(k,iCell) - flux_upwind
+               scalar_new(k,iCell) = scalar_old(k,iCell)*h_old(k,iCell)
+               flux_upwind = dt*(max(0.0_RKIND,wwAvg(k,iCell))*scalar_old(k-1,iCell) + min(0.0_RKIND,wwAvg(k,iCell))*scalar_old(k,iCell))
+               scalar_new(k-1,iCell) = scalar_new(k-1,iCell) - flux_upwind*rdnw(k-1)
+               scalar_new(k  ,iCell) = scalar_new(k  ,iCell) + flux_upwind*rdnw(k)
+               wdtn(k,iCell) = dt*wdtn(k,iCell) - flux_upwind
             end do
 
 ! scale_arr(SCALE_IN,:,:) and scale_arr(SCALE_OUT:,:) are used here to store the incoming and outgoing perturbation flux 
 ! contributions to the update:  first the vertical flux component, then the horizontal
 
             do k=1,nVertLevels
-              scale_arr(SCALE_IN, k,iCell) = - rdnw(k)*(min(0.0_RKIND,wdtn(k+1,iCell))-max(0.0_RKIND,wdtn(k,iCell)))
-              scale_arr(SCALE_OUT,k,iCell) = - rdnw(k)*(max(0.0_RKIND,wdtn(k+1,iCell))-min(0.0_RKIND,wdtn(k,iCell)))
+               scale_arr(SCALE_IN, k,iCell) = - rdnw(k)*(min(0.0_RKIND,wdtn(k+1,iCell))-max(0.0_RKIND,wdtn(k,iCell)))
+               scale_arr(SCALE_OUT,k,iCell) = - rdnw(k)*(max(0.0_RKIND,wdtn(k+1,iCell))-min(0.0_RKIND,wdtn(k,iCell)))
             end do
 
-          end do
+         end do
 
 !  horizontal flux divergence for upwind update
 
          !  upwind flux computation
 
          do iEdge=1,grid%nEdges
-           cell1 = cellsOnEdge(1,iEdge)
-           cell2 = cellsOnEdge(2,iEdge)
-             if (cell1 &lt;= grid%nCellsSolve .or. cell2 &lt;= grid%nCellsSolve) then  ! only for owned cells
+            cell1 = cellsOnEdge(1,iEdge)
+            cell2 = cellsOnEdge(2,iEdge)
+            if (cell1 &lt;= grid%nCellsSolve .or. cell2 &lt;= grid%nCellsSolve) then  ! only for owned cells
                do k=1,grid % nVertLevels
-                 flux_upwind = grid % dvEdge % array(iEdge) * dt *   &amp;
-                        (max(0.0_RKIND,uhAvg(k,iEdge))*scalar_old(k,cell1) + min(0.0_RKIND,uhAvg(k,iEdge))*scalar_old(k,cell2))
-                 flux_arr(k,iEdge) = dt*flux_arr(k,iEdge) - flux_upwind
-                 scalar_new(k,cell1) = scalar_new(k,cell1) - flux_upwind / areaCell(cell1)
-                 scalar_new(k,cell2) = scalar_new(k,cell2) + flux_upwind / areaCell(cell2)
-
-                 scale_arr(SCALE_OUT,k,cell1) = scale_arr(SCALE_OUT,k,cell1) - max(0.0_RKIND,flux_arr(k,iEdge)) / areaCell(cell1)
-                 scale_arr(SCALE_IN, k,cell1) = scale_arr(SCALE_IN, k,cell1) - min(0.0_RKIND,flux_arr(k,iEdge)) / areaCell(cell1)
-                 scale_arr(SCALE_OUT,k,cell2) = scale_arr(SCALE_OUT,k,cell2) + min(0.0_RKIND,flux_arr(k,iEdge)) / areaCell(cell2)
-                 scale_arr(SCALE_IN, k,cell2) = scale_arr(SCALE_IN, k,cell2) + max(0.0_RKIND,flux_arr(k,iEdge)) / areaCell(cell2)
-
+                  flux_upwind = grid % dvEdge % array(iEdge) * dt *   &amp;
+                         (max(0.0_RKIND,uhAvg(k,iEdge))*scalar_old(k,cell1) + min(0.0_RKIND,uhAvg(k,iEdge))*scalar_old(k,cell2))
+                  flux_arr(k,iEdge) = dt*flux_arr(k,iEdge) - flux_upwind
+                  scalar_new(k,cell1) = scalar_new(k,cell1) - flux_upwind / areaCell(cell1)
+                  scalar_new(k,cell2) = scalar_new(k,cell2) + flux_upwind / areaCell(cell2)

+                  scale_arr(SCALE_OUT,k,cell1) = scale_arr(SCALE_OUT,k,cell1) - max(0.0_RKIND,flux_arr(k,iEdge)) / areaCell(cell1)
+                  scale_arr(SCALE_IN, k,cell1) = scale_arr(SCALE_IN, k,cell1) - min(0.0_RKIND,flux_arr(k,iEdge)) / areaCell(cell1)
+                  scale_arr(SCALE_OUT,k,cell2) = scale_arr(SCALE_OUT,k,cell2) + min(0.0_RKIND,flux_arr(k,iEdge)) / areaCell(cell2)
+                  scale_arr(SCALE_IN, k,cell2) = scale_arr(SCALE_IN, k,cell2) + max(0.0_RKIND,flux_arr(k,iEdge)) / areaCell(cell2)

                end do
-             end if
-          end do
+            end if
+         end do
 
 !  next, the limiter
 
-          do iCell = 1, grid % nCellsSolve
+         do iCell = 1, grid % nCellsSolve
             do k = 1, nVertLevels
                s_min_update = (scalar_new(k,iCell)+scale_arr(SCALE_OUT,k,iCell))/h_new(k,iCell)
                s_max_update = (scalar_new(k,iCell)+scale_arr(SCALE_IN,k,iCell))/h_new(k,iCell)
@@ -1688,54 +1682,52 @@
                scale_arr(SCALE_OUT,k,iCell) = min( 1.0_RKIND, max( 0.0_RKIND, scale_factor) )
 
             end do
-          end do
+         end do
 !
-!  communicate scale factors here
+!  communicate scale factors here.
+!  communicate only first halo row in these next two exchanges
 !
-!
-!  WCS_halo_opt_2 - communicate only first halo row in these next two exchanges
-!
-      tempField =&gt; tempFieldTarget
+         tempField =&gt; tempFieldTarget
 
-      tempField % block =&gt; block
-      tempField % dimSizes(1) = 2
-      tempField % dimSizes(2) = grid % nVertLevels
-      tempField % dimSizes(3) = grid % nCells
-      tempField % sendList =&gt; block % parinfo % cellsToSend
-      tempField % recvList =&gt; block % parinfo % cellsToRecv
-      tempField % copyList =&gt; block % parinfo % cellsToCopy
-      tempField % prev =&gt; null()
-      tempField % next =&gt; null()
+         tempField % block =&gt; block
+         tempField % dimSizes(1) = 2
+         tempField % dimSizes(2) = grid % nVertLevels
+         tempField % dimSizes(3) = grid % nCells
+         tempField % sendList =&gt; block % parinfo % cellsToSend
+         tempField % recvList =&gt; block % parinfo % cellsToRecv
+         tempField % copyList =&gt; block % parinfo % cellsToCopy
+         tempField % prev =&gt; null()
+         tempField % next =&gt; null()
 
-      tempField % array =&gt; scale_arr
-      call mpas_dmpar_exch_halo_field(tempField, (/ 1 /))
+         tempField % array =&gt; scale_arr
+         call mpas_dmpar_exch_halo_field(tempField, (/ 1 /))
 
 !
 !  rescale the fluxes
 !
-            do iEdge = 1, grid % nEdges
-               cell1 = grid % cellsOnEdge % array(1,iEdge)
-               cell2 = grid % cellsOnEdge % array(2,iEdge)
-               if (cell1 &lt;= grid%nCellsSolve .or. cell2 &lt;= grid%nCellsSolve) then
-                  do k = 1, nVertLevels
-                     flux = flux_arr(k,iEdge)
-                     flux = max(0.0_RKIND,flux) * min(scale_arr(SCALE_OUT,k,cell1), scale_arr(SCALE_IN, k,cell2)) &amp;
-                          + min(0.0_RKIND,flux) * min(scale_arr(SCALE_IN, k,cell1), scale_arr(SCALE_OUT,k,cell2))
-                     flux_arr(k,iEdge) = flux
-                  end do
-               end if
-            end do
+         do iEdge = 1, grid % nEdges
+            cell1 = grid % cellsOnEdge % array(1,iEdge)
+            cell2 = grid % cellsOnEdge % array(2,iEdge)
+            if (cell1 &lt;= grid%nCellsSolve .or. cell2 &lt;= grid%nCellsSolve) then
+               do k = 1, nVertLevels
+                  flux = flux_arr(k,iEdge)
+                  flux = max(0.0_RKIND,flux) * min(scale_arr(SCALE_OUT,k,cell1), scale_arr(SCALE_IN, k,cell2)) &amp;
+                       + min(0.0_RKIND,flux) * min(scale_arr(SCALE_IN, k,cell1), scale_arr(SCALE_OUT,k,cell2))
+                  flux_arr(k,iEdge) = flux
+               end do
+            end if
+         end do
  
        ! rescale the vertical flux
  
-            do iCell=1,grid % nCells
-               do k = 2, nVertLevels
-                  flux =  wdtn(k,iCell)
-                  flux = max(0.0_RKIND,flux) * min(scale_arr(SCALE_OUT,k-1,iCell), scale_arr(SCALE_IN,k  ,iCell)) &amp;
-                       + min(0.0_RKIND,flux) * min(scale_arr(SCALE_OUT,k  ,iCell), scale_arr(SCALE_IN,k-1,iCell))
-                  wdtn(k,iCell) = flux
-               end do
+         do iCell=1,grid % nCells
+            do k = 2, nVertLevels
+               flux =  wdtn(k,iCell)
+               flux = max(0.0_RKIND,flux) * min(scale_arr(SCALE_OUT,k-1,iCell), scale_arr(SCALE_IN,k  ,iCell)) &amp;
+                    + min(0.0_RKIND,flux) * min(scale_arr(SCALE_OUT,k  ,iCell), scale_arr(SCALE_IN,k-1,iCell))
+               wdtn(k,iCell) = flux
             end do
+         end do
 !
 !  do the scalar update now that we have the fluxes
 !
@@ -1752,8 +1744,8 @@
 
          do iCell=1,grid % nCellsSolve
             do k=1,grid % nVertLevels
-              scalar_new(k,iCell) = (   scalar_new(k,iCell)  &amp;
-                  + (-rdnw(k)*(wdtn(k+1,iCell)-wdtn(k,iCell)) ) )/h_new(k,iCell)
+               scalar_new(k,iCell) = (   scalar_new(k,iCell)  &amp;
+                   + (-rdnw(k)*(wdtn(k+1,iCell)-wdtn(k,iCell)) ) )/h_new(k,iCell)
             end do
          end do
 
@@ -1765,10 +1757,10 @@
             do k=1, grid%nVertLevels
                scmax = max(scmax,scalar_new(k,iCell))
                scmin = min(scmin,scalar_new(k,iCell))
-               if(s_max(k,iCell) &lt; scalar_new(k,iCell)) then
+               if (s_max(k,iCell) &lt; scalar_new(k,iCell)) then
                   write(32,*) ' over - k,iCell,s_min,s_max,scalar_new ',k,iCell,s_min(k,iCell),s_max(k,iCell),scalar_new(k,iCell)
                end if
-               if(s_min(k,iCell) &gt; scalar_new(k,iCell)) then
+               if (s_min(k,iCell) &gt; scalar_new(k,iCell)) then
                   write(32,*) ' under - k,iCell,s_min,s_max,scalar_new ',k,iCell,s_min(k,iCell),s_max(k,iCell),scalar_new(k,iCell)
                end if
             enddo
@@ -1778,6 +1770,9 @@
 
          end if
 
+         ! the update should be positive definite. but roundoff can sometimes leave small negative values
+         ! hence the enforcement of PD in the copy back to the model state.
+
          do iCell = 1, grid%nCells
          do k=1, grid%nVertLevels
             s_new % scalars % array(iScalar,k,iCell) = max(0.0_RKIND,scalar_new(k,iCell))
@@ -1796,10 +1791,12 @@
    !
    ! Input: s - current model state
    !        grid - grid metadata
+   !        diag - some grid diagnostics
    !
-   ! Output: tend - computed diagnostics (parallel velocities, v; mass fluxes, rv; 
-   !                circulation; vorticity; and kinetic energy, ke) and the 
-   !                tendencies for height (h) and u (u)
+   ! Output: tend - tendencies: tend_u, tend_w, tend_theta and tend_rho
+   !                these are all coupled-variable tendencies.
+   !         various other quantities in diag: Smagorinsky eddy viscosity
+   !                
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
 
       implicit none
@@ -1811,6 +1808,7 @@
       integer, intent(in) :: rk_step
       real (kind=RKIND), intent(in) :: dt
 
+
       logical, parameter :: rk_diffusion = .false.
 
       integer :: iEdge, iCell, iVertex, k, cell1, cell2, vertex1, vertex2, eoe, i, j, iq
@@ -1855,10 +1853,8 @@
       real (kind=RKIND) :: cf1, cf2, cf3, pr, pl
       real (kind=RKIND) :: prandtl_inv
 
-!      logical, parameter :: debug = .true.
       logical, parameter :: debug = .false.
 
-      !SHP-curvature
       logical, parameter :: curvature = .true.
       real (kind=RKIND) :: r_earth
       real (kind=RKIND), dimension(:,:), pointer :: ur_cell, vr_cell
@@ -1884,7 +1880,6 @@
 
 !-----------
 
-      !SHP-curvature
       r_earth = grid % sphere_radius
       ur_cell =&gt; diag % uReconstructZonal % array
       vr_cell =&gt; diag % uReconstructMeridional % array
@@ -1982,15 +1977,15 @@
 
       prandtl_inv = 1.0_RKIND/prandtl
 
-      !
-      ! Compute u (normal) velocity tendency for each edge (cell face)
-      !
-
       write(0,*) ' rk_step in compute_dyn_tend ',rk_step
 
 
       delsq_horiz_mixing = .false.
       if (config_horiz_mixing == &quot;2d_smagorinsky&quot; .and. (rk_step == 1 .or. rk_diffusion)) then
+
+         ! Smagorinsky eddy viscosity, based on horizontal deformation (in this case on model coordinate surfaces).
+         ! The integration coefficients were precomputed and stored in defc_a and defc_b
+
          do iCell = 1, nCells
             d_diag(:) = 0.
             d_off_diag(:) = 0.
@@ -2003,6 +1998,8 @@
                end do
             end do
             do k=1, nVertLevels
+               ! here is the Smagorinsky formulation, 
+               ! followed by imposition of an upper bound on the eddy viscosity
                kdiff(k,iCell) = (c_s * config_len_disp)**2 * sqrt(d_diag(k)**2 + d_off_diag(k)**2)
                kdiff(k,iCell) = min(kdiff(k,iCell),(0.01*config_len_disp**2)/dt)
             end do
@@ -2027,157 +2024,162 @@
       cf2 = grid % cf2 % scalar
       cf3 = grid % cf3 % scalar
 
-      !  tendency for density
-      !  divergence_ru may calculated in the diagnostic subroutine - it is temporary
+      ! tendency for density.
+      ! accumulate total water here for later use in w tendency calculation.
+
       allocate(divergence_ru(nVertLevels, nCells+1))
       allocate(qtot(nVertLevels, nCells+1))
 
       divergence_ru(:,:) = 0.0
       h_divergence(:,:) = 0.
+
+      ! accumulate horizontal mass-flux
+
       do iEdge=1,grid % nEdges
          cell1 = cellsOnEdge(1,iEdge)
          cell2 = cellsOnEdge(2,iEdge)
          do k=1,nVertLevels
-           flux = ru(k,iEdge)*dvEdge(iEdge)
-           divergence_ru(k,cell1) = divergence_ru(k,cell1) + flux
-           divergence_ru(k,cell2) = divergence_ru(k,cell2) - flux
+            flux = ru(k,iEdge)*dvEdge(iEdge)
+            divergence_ru(k,cell1) = divergence_ru(k,cell1) + flux
+            divergence_ru(k,cell2) = divergence_ru(k,cell2) - flux
          end do
       end do
 
       qtot(:,:)=0.
+
+      ! compute horiontal mass-flux divergence, add vertical mass flux divergence to complete tend_rho
+
       do iCell = 1,nCells
-        r = 1.0 / areaCell(iCell)
-        do k = 1,nVertLevels
-           divergence_ru(k,iCell) = divergence_ru(k,iCell) * r
-           h_divergence(k,iCell) = divergence_ru(k,iCell)
-           tend_rho(k,iCell) = -divergence_ru(k,iCell)-rdzw(k)*(rw(k+1,iCell)-rw(k,iCell))
+         r = 1.0 / areaCell(iCell)
+         do k = 1,nVertLevels
+            divergence_ru(k,iCell) = divergence_ru(k,iCell) * r
+            h_divergence(k,iCell) = divergence_ru(k,iCell)
+            tend_rho(k,iCell) = -divergence_ru(k,iCell)-rdzw(k)*(rw(k+1,iCell)-rw(k,iCell))
 
-           do iq = s % moist_start, s % moist_end
-              qtot(k,iCell) = qtot(k,iCell) + s % scalars % array (iq, k, iCell)
-           end do
+            do iq = s % moist_start, s % moist_end
+               qtot(k,iCell) = qtot(k,iCell) + s % scalars % array (iq, k, iCell)
+            end do
 
-        end do
+         end do
       end do    
 
-!****       u dyn tend
+      !
+      ! Compute u (normal) velocity tendency for each edge (cell face)
+      !
 
-         do iEdge=1,grid % nEdgesSolve
-            cell1 = cellsOnEdge(1,iEdge)
-            cell2 = cellsOnEdge(2,iEdge)
+      do iEdge=1,grid % nEdgesSolve
 
-           if(newpx)  then
+         cell1 = cellsOnEdge(1,iEdge)
+         cell2 = cellsOnEdge(2,iEdge)
 
-               k = 1
-               pr  = cpr(k,iEdge)*pp(k,cell2)+cpr(k+1,iEdge)*pp(k+1,cell2)+cpr(k+2,iEdge)*pp(k+2,cell2)
-               pl  = cpl(k,iEdge)*pp(k,cell1)+cpl(k+1,iEdge)*pp(k+1,cell1)+cpl(k+2,iEdge)*pp(k+2,cell1)
-               tend_u(k,iEdge) =  - cqu(k,iEdge)*2./(zz(k,cell1)+zz(k,cell2))*(pr-pl)/dcEdge(iEdge)
+         ! horizontal pressure gradient 
 
-               do k=2,nVertLevels
+         if (newpx)  then
 
-                  kr = min(nVertLevels,k+ nint(.5-sign(0.5_RKIND,zx(k,iEdge)+zx(k+1,iEdge))))
-                  kl = min(nVertLevels,2*k+1-kr)
+            k = 1
+            pr  = cpr(k,iEdge)*pp(k,cell2)+cpr(k+1,iEdge)*pp(k+1,cell2)+cpr(k+2,iEdge)*pp(k+2,cell2)
+            pl  = cpl(k,iEdge)*pp(k,cell1)+cpl(k+1,iEdge)*pp(k+1,cell1)+cpl(k+2,iEdge)*pp(k+2,cell1)
+            tend_u(k,iEdge) =  - cqu(k,iEdge)*2./(zz(k,cell1)+zz(k,cell2))*(pr-pl)/dcEdge(iEdge)
 
-                  pr = pp(k,cell2)+.5*(zgrid(k   ,cell1)+zgrid(k +1,cell1)-zgrid(k ,cell2)-zgrid(k +1,cell2))   &amp;
-                                     /(zgrid(kr+1,cell2)-zgrid(kr-1,cell2))*( pp(kr,cell2)-pp   (kr-1,cell2))
-                  pl = pp(k,cell1)+.5*(zgrid(k   ,cell2)+zgrid(k +1,cell2)-zgrid(k ,cell1)-zgrid(k +1,cell1))   &amp;
-                                     /(zgrid(kl+1,cell1)-zgrid(kl-1,cell1))*( pp(kl,cell1)-pp   (kl-1,cell1))
-                  tend_u(k,iEdge) =  - cqu(k,iEdge)*2./(zz(k,cell1)+zz(k,cell2))*(pr-pl)/dcEdge(iEdge)
+            do k=2,nVertLevels
 
-               end do
+               kr = min(nVertLevels,k+ nint(.5-sign(0.5_RKIND,zx(k,iEdge)+zx(k+1,iEdge))))
+               kl = min(nVertLevels,2*k+1-kr)
 
-            else
-               k = 1
-!!              dpzx(k) = .5*zx(k,iEdge)*(cf1*(pp(k  ,cell2)+pp(k  ,cell1))   &amp;
-!!                                       +cf2*(pp(k+1,cell2)+pp(k+1,cell1))   &amp;
-!!                                       +cf3*(pp(k+2,cell2)+pp(k+2,cell1)))
+               pr = pp(k,cell2)+.5*(zgrid(k   ,cell1)+zgrid(k +1,cell1)-zgrid(k ,cell2)-zgrid(k +1,cell2))   &amp;
+                                  /(zgrid(kr+1,cell2)-zgrid(kr-1,cell2))*( pp(kr,cell2)-pp   (kr-1,cell2))
+               pl = pp(k,cell1)+.5*(zgrid(k   ,cell2)+zgrid(k +1,cell2)-zgrid(k ,cell1)-zgrid(k +1,cell1))   &amp;
+                                  /(zgrid(kl+1,cell1)-zgrid(kl-1,cell1))*( pp(kl,cell1)-pp   (kl-1,cell1))
+               tend_u(k,iEdge) =  - cqu(k,iEdge)*2./(zz(k,cell1)+zz(k,cell2))*(pr-pl)/dcEdge(iEdge)
 
-               dpzx(k) = .25*(zx(k,iEdge)+zx(k+1,iEdge))                  &amp;
-                            *(pzm(k,cell2)*(pp(k+1,cell2)-pp(k,cell2))    &amp;
-                             +pzm(k,cell1)*(pp(k+1,cell1)-pp(k,cell1))    &amp;
-                             +pzp(k,cell2)*(pp(k+2,cell2)-pp(k,cell2))    &amp;
-                             +pzp(k,cell1)*(pp(k+2,cell1)-pp(k,cell1))) 
-  
-               do k = 2, nVertLevels-1
+            end do
 
-!!              dpzx(k) = .5*zx(k,iEdge)*(fzm(k)*(pp(k  ,cell2)+pp(k  ,cell1))  &amp;
-!!                                   +fzp(k)*(pp(k-1,cell2)+pp(k-1,cell1)))
+         else
+            k = 1
 
-                  dpzx(k) = .25*(zx(k,iEdge)+zx(k+1,iEdge))                  &amp;
-                             *(pzp(k,cell2)*(pp(k+1,cell2)-pp(k  ,cell2))    &amp;
-                              +pzm(k,cell2)*(pp(k  ,cell2)-pp(k-1,cell2))    &amp;
-                              +pzp(k,cell1)*(pp(k+1,cell1)-pp(k  ,cell1))    &amp;
-                              +pzm(k,cell1)*(pp(k  ,cell1)-pp(k-1,cell1)))   
+            dpzx(k) = .25*(zx(k,iEdge)+zx(k+1,iEdge))                  &amp;
+                         *(pzm(k,cell2)*(pp(k+1,cell2)-pp(k,cell2))    &amp;
+                          +pzm(k,cell1)*(pp(k+1,cell1)-pp(k,cell1))    &amp;
+                          +pzp(k,cell2)*(pp(k+2,cell2)-pp(k,cell2))    &amp;
+                          +pzp(k,cell1)*(pp(k+2,cell1)-pp(k,cell1))) 
+  
+            do k = 2, nVertLevels-1
 
-               end do
-
-               k = nVertLevels
                dpzx(k) = .25*(zx(k,iEdge)+zx(k+1,iEdge))                  &amp;
-                          *(pzm(k,cell2)*(pp(k  ,cell2)-pp(k-1,cell2))    &amp;
+                          *(pzp(k,cell2)*(pp(k+1,cell2)-pp(k  ,cell2))    &amp;
+                           +pzm(k,cell2)*(pp(k  ,cell2)-pp(k-1,cell2))    &amp;
+                           +pzp(k,cell1)*(pp(k+1,cell1)-pp(k  ,cell1))    &amp;
                            +pzm(k,cell1)*(pp(k  ,cell1)-pp(k-1,cell1)))   
 
-!!               dpzx(nVertLevels+1) = 0.
+            end do
 
-               do k=1,nVertLevels
+            k = nVertLevels
+            dpzx(k) = .25*(zx(k,iEdge)+zx(k+1,iEdge))                  &amp;
+                       *(pzm(k,cell2)*(pp(k  ,cell2)-pp(k-1,cell2))    &amp;
+                        +pzm(k,cell1)*(pp(k  ,cell1)-pp(k-1,cell1)))   
 
-!!                  tend_u(k,iEdge) =  - cqu(k,iEdge)*( (pp(k,cell2)/zz(k,cell2) - pp(k,cell1)/zz(k,cell1))  &amp;
-!!                                                   /  dcEdge(iEdge) - rdzw(k)*(dpzx(k+1)-dpzx(k)) )
+            do k=1,nVertLevels
 
-                  tend_u(k,iEdge) =  - cqu(k,iEdge)*((pp(k,cell2)-pp(k,cell1))/dcEdge(iEdge)   &amp;
-                                          - dpzx(k) ) / (.5*(zz(k,cell2)+zz(k,cell1)))
-               end do
+               tend_u(k,iEdge) =  - cqu(k,iEdge)*((pp(k,cell2)-pp(k,cell1))/dcEdge(iEdge)   &amp;
+                                       - dpzx(k) ) / (.5*(zz(k,cell2)+zz(k,cell1)))
+            end do
 
-            end if
+         end if
 
-            wduz(1) = 0.
-            k = 2
-            wduz(k) =  0.5*( rw(k,cell1)+rw(k,cell2) )*(fzm(k)*u(k,iEdge)+fzp(k)*u(k-1,iEdge))  
-            do k=3,nVertLevels-1
-               wduz(k) = flux3( u(k-2,iEdge),u(k-1,iEdge),u(k,iEdge),u(k+1,iEdge),0.5*(rw(k,cell1)+rw(k,cell2)), 1.0_RKIND )
-            end do
-            k = nVertLevels
-            wduz(k) =  0.5*( rw(k,cell1)+rw(k,cell2) )*(fzm(k)*u(k,iEdge)+fzp(k)*u(k-1,iEdge))  
+         ! vertical transport of u
 
-            wduz(nVertLevels+1) = 0.
+         wduz(1) = 0.
+         k = 2
+         wduz(k) =  0.5*( rw(k,cell1)+rw(k,cell2) )*(fzm(k)*u(k,iEdge)+fzp(k)*u(k-1,iEdge))  
+         do k=3,nVertLevels-1
+            wduz(k) = flux3( u(k-2,iEdge),u(k-1,iEdge),u(k,iEdge),u(k+1,iEdge),0.5*(rw(k,cell1)+rw(k,cell2)), 1.0_RKIND )
+         end do
+         k = nVertLevels
+         wduz(k) =  0.5*( rw(k,cell1)+rw(k,cell2) )*(fzm(k)*u(k,iEdge)+fzp(k)*u(k-1,iEdge))  
 
+         wduz(nVertLevels+1) = 0.
+
+         do k=1,nVertLevels
+            tend_u(k,iEdge) = tend_u(k,iEdge) - rdzw(k)*(wduz(k+1)-wduz(k)) 
+         end do
+
+         ! Next, nonlinear Coriolis term (q) following Ringler et al JCP 2009
+
+         q(:) = 0.0
+         do j = 1,nEdgesOnEdge(iEdge)
+            eoe = edgesOnEdge(j,iEdge)
             do k=1,nVertLevels
-!               tend_u(k,iEdge) =  - cqu(k,iEdge)*( (pp(k,cell2)/zz(k,cell2) - pp(k,cell1)/zz(k,cell1))  &amp;
-!                                                /  dcEdge(iEdge) - rdzw(k)*(dpzx(k+1)-dpzx(k)) )
-               tend_u(k,iEdge) = tend_u(k,iEdge) - rdzw(k)*(wduz(k+1)-wduz(k)) 
+               workpv = 0.5 * (pv_edge(k,iEdge) + pv_edge(k,eoe))
+               q(k) = q(k) + weightsOnEdge(j,iEdge) * u(k,eoe) * workpv * rho_edge(k,eoe)
             end do
+         end do
 
-               q(:) = 0.0
-               do j = 1,nEdgesOnEdge(iEdge)
-                 eoe = edgesOnEdge(j,iEdge)
-                 do k=1,nVertLevels
-                   workpv = 0.5 * (pv_edge(k,iEdge) + pv_edge(k,eoe))
-                   q(k) = q(k) + weightsOnEdge(j,iEdge) * u(k,eoe) * workpv * rho_edge(k,eoe)
-                 end do
-               end do
+         do k=1,nVertLevels
 
-            do k=1,nVertLevels
-               tend_u(k,iEdge) = tend_u(k,iEdge) + rho_edge(k,iEdge)* (q(k) - (ke(k,cell2) - ke(k,cell1))       &amp;
-                                                                    / dcEdge(iEdge))                            &amp;
-                                                - u(k,iEdge)*0.5*(divergence_ru(k,cell1)+divergence_ru(k,cell2)) 
-               !SHP-curvature
-               if (curvature) then
+            ! horizontal ke gradient and vorticity terms in the vector invariant formulation
+            ! of the horizontal momentum equation
+            tend_u(k,iEdge) = tend_u(k,iEdge) + rho_edge(k,iEdge)* (q(k) - (ke(k,cell2) - ke(k,cell1))       &amp;
+                                                                 / dcEdge(iEdge))                            &amp;
+                                             - u(k,iEdge)*0.5*(divergence_ru(k,cell1)+divergence_ru(k,cell2)) 
+            if (curvature) then
+
+               ! curvature terms for the sphere
+
                tend_u(k,iEdge) = tend_u(k,iEdge) &amp;
                                 - 2.*omega*cos(grid % angleEdge % array(iEdge))*cos(grid % latEdge % array(iEdge))  &amp;
                                   *rho_edge(k,iEdge)*.25*(w(k,cell1)+w(k+1,cell1)+w(k,cell2)+w(k+1,cell2))          &amp; 
                                 - u(k,iEdge)*.25*(w(k+1,cell1)+w(k,cell1)+w(k,cell2)+w(k+1,cell2))                  &amp;
                                   *rho_edge(k,iEdge)/r_earth
-               !old-err.
-               !tend_u(k,iEdge) = tend_u(k,iEdge) &amp;
-               !                 - 2.*omega_e*cos(grid % angleEdge % array(iEdge))*cos(grid % latEdge % array(iEdge))  &amp;
-               !                   *.25*(rw(k,cell1)+rw(k+1,cell1)+rw(k,cell2)+rw(k+1,cell2))                          &amp;
-               !                 - u(k,iEdge)*.25*(rw(k+1,cell1)+rw(k,cell1)+rw(k,cell2)+rw(k+1,cell2))/r_earth
-               end if
-            end do
+            end if
          end do
+      end do
 
       deallocate(divergence_ru)
 
       !
       !  horizontal mixing for u
+      !  mixing terms are integrated using forward-Euler, so this tendency is only computed in the
+      !  first Runge-Kutta substep and saved for use in later RK substeps 2 and 3.
       !
 
       if (rk_step == 1 .or. rk_diffusion) then
@@ -2186,60 +2188,63 @@
 
       if (delsq_horiz_mixing) then
 
-        if ((h_mom_eddy_visc2 &gt; 0.0) .and. (config_horiz_mixing == &quot;2d_fixed&quot;)) then
-           do iEdge=1,grid % nEdgesSolve
-              cell1 = cellsOnEdge(1,iEdge)
-              cell2 = cellsOnEdge(2,iEdge)
-              vertex1 = verticesOnEdge(1,iEdge)
-              vertex2 = verticesOnEdge(2,iEdge)
+         if ((h_mom_eddy_visc2 &gt; 0.0) .and. (config_horiz_mixing == &quot;2d_fixed&quot;)) then
+            do iEdge=1,grid % nEdgesSolve
+               cell1 = cellsOnEdge(1,iEdge)
+               cell2 = cellsOnEdge(2,iEdge)
+               vertex1 = verticesOnEdge(1,iEdge)
+               vertex2 = verticesOnEdge(2,iEdge)
+   
+               do k=1,nVertLevels
   
-              do k=1,nVertLevels
+                  !
+                  ! Compute diffusion, computed as </font>
<font color="black">abla divergence - k \times </font>
<font color="red">abla vorticity
+                  !                    only valid for h_mom_eddy_visc2 == constant
+                  !
+                  ! Note that we impose a lower bound on the edge length used in the derivative of the vorticity;
+                  ! this is done to avoid an overly stringent stability constraint for small edge lengths that can
+                  ! occur on some variable-resolution meshes.
+                  !
+                  u_diffusion =   ( divergence(k,cell2)  - divergence(k,cell1) ) / dcEdge(iEdge)  &amp;
+                                 -( vorticity(k,vertex2) - vorticity(k,vertex1) ) / max(dvEdge(iEdge),0.25*dcEdge(iEdge))
+                  u_diffusion = rho_edge(k,iEdge)*h_mom_eddy_visc2 * u_diffusion
+                  u_diffusion = u_diffusion * meshScalingDel2(iEdge)
   
-                 !
-                 ! Compute diffusion, computed as </font>
<font color="black">abla divergence - k \times </font>
<font color="red">abla vorticity
-                 !                    only valid for h_mom_eddy_visc2 == constant
-                 !
-                 u_diffusion =   ( divergence(k,cell2)  - divergence(k,cell1) ) / dcEdge(iEdge)  &amp;
-                                -( vorticity(k,vertex2) - vorticity(k,vertex1) ) / max(dvEdge(iEdge),0.25*dcEdge(iEdge))
-                 u_diffusion = rho_edge(k,iEdge)*h_mom_eddy_visc2 * u_diffusion
-                 u_diffusion = u_diffusion * meshScalingDel2(iEdge)
-  
-!                 tend_u(k,iEdge) = tend_u(k,iEdge) + u_diffusion
-                 tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + u_diffusion
-              end do
-           end do
+                  tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + u_diffusion
+               end do
+            end do
 
-        else if ( config_horiz_mixing == &quot;2d_smagorinsky&quot;) then
+         else if ( config_horiz_mixing == &quot;2d_smagorinsky&quot;) then
 
-           do iEdge=1,grid % nEdgesSolve
-              cell1 = cellsOnEdge(1,iEdge)
-              cell2 = cellsOnEdge(2,iEdge)
-              vertex1 = verticesOnEdge(1,iEdge)
-              vertex2 = verticesOnEdge(2,iEdge)
+            do iEdge=1,grid % nEdgesSolve
+               cell1 = cellsOnEdge(1,iEdge)
+               cell2 = cellsOnEdge(2,iEdge)
+               vertex1 = verticesOnEdge(1,iEdge)
+               vertex2 = verticesOnEdge(2,iEdge)

+               do k=1,nVertLevels
+                  !
+                  ! Compute diffusion, computed as </font>
<font color="black">abla divergence - k \times </font>
<font color="red">abla vorticity
+                  !                    only valid for h_mom_eddy_visc2 == constant
+                  !
+                  u_diffusion =   ( divergence(k,cell2)  - divergence(k,cell1) ) / dcEdge(iEdge)  &amp;
+                                 -( vorticity(k,vertex2) - vorticity(k,vertex1) ) / max(dvEdge(iEdge),0.25*dcEdge(iEdge))
+                  u_diffusion = rho_edge(k,iEdge)* 0.5*(kdiff(k,cell1)+kdiff(k,cell2)) * u_diffusion
+                  u_diffusion = u_diffusion * meshScalingDel2(iEdge)

+                  tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + u_diffusion
+               end do
+            end do
+         end if
 
-              do k=1,nVertLevels
-                 !
-                 ! Compute diffusion, computed as </font>
<font color="black">abla divergence - k \times </font>
<font color="gray">abla vorticity
-                 !                    only valid for h_mom_eddy_visc2 == constant
-                 !
-                 u_diffusion =   ( divergence(k,cell2)  - divergence(k,cell1) ) / dcEdge(iEdge)  &amp;
-                                -( vorticity(k,vertex2) - vorticity(k,vertex1) ) / max(dvEdge(iEdge),0.25*dcEdge(iEdge))
-                 u_diffusion = rho_edge(k,iEdge)* 0.5*(kdiff(k,cell1)+kdiff(k,cell2)) * u_diffusion
-                 u_diffusion = u_diffusion * meshScalingDel2(iEdge)
-
-!                 tend_u(k,iEdge) = tend_u(k,iEdge) + u_diffusion
-                 tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + u_diffusion
-              end do
-           end do
-        end if
-
       end if ! delsq_horiz_mixing for u
 
-!ldf (2012-10-10): Modified loop below to allow hyper-diffusion when 2d_smagorinsky is set to true.
-!     if ((h_mom_eddy_visc4 &gt; 0.0) .and. (config_horiz_mixing == &quot;2d_fixed&quot;)) then
       if ((h_mom_eddy_visc4 &gt; 0.0 .and. config_horiz_mixing == &quot;2d_fixed&quot;) .or. &amp;
           (h_mom_eddy_visc4 &gt; 0.0 .and. config_horiz_mixing == &quot;2d_smagorinsky&quot;)) then
 
+         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (u) ).
+         ! First, storage to hold the result from the first del^2 computation.
+
          allocate(delsq_divergence(nVertLevels, nCells+1))
          allocate(delsq_u(nVertLevels, nEdges+1))
          allocate(delsq_circulation(nVertLevels, nVertices+1))
@@ -2268,10 +2273,10 @@
 
          delsq_circulation(:,:) = 0.0
          do iEdge=1,nEdges
-               do k=1,nVertLevels
-                  delsq_circulation(k,verticesOnEdge(1,iEdge)) = delsq_circulation(k,verticesOnEdge(1,iEdge)) - dcEdge(iEdge) * delsq_u(k,iEdge)
-                  delsq_circulation(k,verticesOnEdge(2,iEdge)) = delsq_circulation(k,verticesOnEdge(2,iEdge)) + dcEdge(iEdge) * delsq_u(k,iEdge)
-               end do
+            do k=1,nVertLevels
+               delsq_circulation(k,verticesOnEdge(1,iEdge)) = delsq_circulation(k,verticesOnEdge(1,iEdge)) - dcEdge(iEdge) * delsq_u(k,iEdge)
+               delsq_circulation(k,verticesOnEdge(2,iEdge)) = delsq_circulation(k,verticesOnEdge(2,iEdge)) + dcEdge(iEdge) * delsq_u(k,iEdge)
+            end do
          end do
          do iVertex=1,nVertices
             r = 1.0 / areaTriangle(iVertex)
@@ -2284,10 +2289,10 @@
          do iEdge=1,nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
-               do k=1,nVertLevels
-                 delsq_divergence(k,cell1) = delsq_divergence(k,cell1) + delsq_u(k,iEdge)*dvEdge(iEdge)
-                 delsq_divergence(k,cell2) = delsq_divergence(k,cell2) - delsq_u(k,iEdge)*dvEdge(iEdge)
-               end do
+            do k=1,nVertLevels
+               delsq_divergence(k,cell1) = delsq_divergence(k,cell1) + delsq_u(k,iEdge)*dvEdge(iEdge)
+               delsq_divergence(k,cell2) = delsq_divergence(k,cell2) - delsq_u(k,iEdge)*dvEdge(iEdge)
+            end do
          end do
          do iCell = 1,nCells
             r = 1.0 / areaCell(iCell)
@@ -2308,13 +2313,14 @@
                ! Compute diffusion, computed as </font>
<font color="black">abla divergence - k \times </font>
<font color="gray">abla vorticity
                !                    only valid for h_mom_eddy_visc4 == constant
                !
-               ! Here, we want to scale the diffusion on the divergence part a factor of config_del4u_div_factor 
-               !    more than the rotational part
+               ! Here, we scale the diffusion on the divergence part a factor of config_del4u_div_factor 
+               !    relative to the rotational part.  The stability constraint on the divergence component is much less
+               !    stringent than the rotational part, and this flexibility may be useful.
+               !
                u_diffusion =  rho_edge(k,iEdge) * ( config_del4u_div_factor * ( delsq_divergence(k,cell2)  - delsq_divergence(k,cell1) ) / dcEdge(iEdge)  &amp;
                            -( delsq_vorticity(k,vertex2) - delsq_vorticity(k,vertex1) ) / max(dvEdge(iEdge), 0.25*dcEdge(iEdge)) &amp;
                                                   )
 
-!               tend_u(k,iEdge) = tend_u(k,iEdge) - h_mom_eddy_visc4 * u_diffusion
                u_diffusion = u_diffusion * meshScalingDel4(iEdge)
                tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - h_mom_eddy_visc4 * u_diffusion
             end do
@@ -2328,71 +2334,65 @@
       end if
 
       !
-      !  vertical mixing for u - 2nd order 
+      !  vertical mixing for u - 2nd order filter in physical (z) space
       !
       if ( v_mom_eddy_visc2 &gt; 0.0 ) then
 
          if (config_mix_full) then
 
-         do iEdge=1,grid % nEdgesSolve
+            do iEdge=1,grid % nEdgesSolve
 
-            cell1 = cellsOnEdge(1,iEdge)
-            cell2 = cellsOnEdge(2,iEdge)
+               cell1 = cellsOnEdge(1,iEdge)
+               cell2 = cellsOnEdge(2,iEdge)
 
-            do k=2,nVertLevels-1
+               do k=2,nVertLevels-1
 
-               z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
-               z2 = 0.5*(zgrid(k  ,cell1)+zgrid(k  ,cell2))
-               z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
-               z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
+                  z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
+                  z2 = 0.5*(zgrid(k  ,cell1)+zgrid(k  ,cell2))
+                  z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
+                  z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
 
-               zm = 0.5*(z1+z2)
-               z0 = 0.5*(z2+z3)
-               zp = 0.5*(z3+z4)
+                  zm = 0.5*(z1+z2)
+                  z0 = 0.5*(z2+z3)
+                  zp = 0.5*(z3+z4)
 
-!               tend_u(k,iEdge) = tend_u(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*(  &amp;
-!                                  (u(k+1,iEdge)-u(k  ,iEdge))/(zp-z0)                      &amp;
-!                                 -(u(k  ,iEdge)-u(k-1,iEdge))/(z0-zm) )/(0.5*(zp-zm))
-               tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*(  &amp;
-                                  (u(k+1,iEdge)-u(k  ,iEdge))/(zp-z0)                      &amp;
-                                 -(u(k  ,iEdge)-u(k-1,iEdge))/(z0-zm) )/(0.5*(zp-zm))
+                  tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*(  &amp;
+                                     (u(k+1,iEdge)-u(k  ,iEdge))/(zp-z0)                      &amp;
+                                    -(u(k  ,iEdge)-u(k-1,iEdge))/(z0-zm) )/(0.5*(zp-zm))
+               end do
             end do
-         end do
 
          else  ! idealized cases where we mix on the perturbation from the initial 1-D state
 
-         do iEdge=1,grid % nEdgesSolve
+            do iEdge=1,grid % nEdgesSolve
 
-            cell1 = cellsOnEdge(1,iEdge)
-            cell2 = cellsOnEdge(2,iEdge)
+               cell1 = cellsOnEdge(1,iEdge)
+               cell2 = cellsOnEdge(2,iEdge)
 
-            do k=1,nVertLevels
+               do k=1,nVertLevels
 #ifdef ROTATED_GRID
-              u_mix(k) = u(k,iEdge) - grid % u_init % array(k) * sin( grid % angleEdge % array(iEdge) )
+                  u_mix(k) = u(k,iEdge) - grid % u_init % array(k) * sin( grid % angleEdge % array(iEdge) )
 #else
-              u_mix(k) = u(k,iEdge) - grid % u_init % array(k) * cos( grid % angleEdge % array(iEdge) )
+                  u_mix(k) = u(k,iEdge) - grid % u_init % array(k) * cos( grid % angleEdge % array(iEdge) )
 #endif
-            end do
+               end do
 
-            do k=2,nVertLevels-1
+               do k=2,nVertLevels-1
 
-               z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
-               z2 = 0.5*(zgrid(k  ,cell1)+zgrid(k  ,cell2))
-               z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
-               z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
+                  z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
+                  z2 = 0.5*(zgrid(k  ,cell1)+zgrid(k  ,cell2))
+                  z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
+                  z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
 
-               zm = 0.5*(z1+z2)
-               z0 = 0.5*(z2+z3)
-               zp = 0.5*(z3+z4)
+                  zm = 0.5*(z1+z2)
+                  z0 = 0.5*(z2+z3)
+                  zp = 0.5*(z3+z4)
 
-               tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*(  &amp;
-                                  (u_mix(k+1)-u_mix(k  ))/(zp-z0)                      &amp;
-                                 -(u_mix(k  )-u_mix(k-1))/(z0-zm) )/(0.5*(zp-zm))
-!               tend_u(k,iEdge) = tend_u(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*(  &amp;
-!                                  (u_mix(k+1)-u_mix(k  ))/(zp-z0)                      &amp;
-!                                 -(u_mix(k  )-u_mix(k-1))/(z0-zm) )/(0.5*(zp-zm))
+                  tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*(  &amp;
+                                     (u_mix(k+1)-u_mix(k  ))/(zp-z0)                      &amp;
+                                    -(u_mix(k  )-u_mix(k-1))/(z0-zm) )/(0.5*(zp-zm))
+               end do
             end do
-         end do
 
          end if
 
@@ -2439,16 +2439,19 @@
             if (cell1 &lt;= nCellsSolve .or. cell2 &lt;= nCellsSolve) then
 
                do k=2,grid % nVertLevels
-                 ru_edge_w(k) = fzm(k)*ru(k,iEdge) + fzp(k)*ru(k-1,iEdge)
+                  ru_edge_w(k) = fzm(k)*ru(k,iEdge) + fzp(k)*ru(k-1,iEdge)
                end do
 
                flux_arr(:) = 0.
+
+               ! flux_arr stores the value of w at the cell edge used in the horizontal transport
+
                do i=1,nAdvCellsForEdge(iEdge)
-                 iCell = advCellsForEdge(i,iEdge)
-                 do k=2,grid % nVertLevels
-                   scalar_weight = adv_coefs(i,iEdge) + coef_3rd_order*sign(1.0_RKIND,ru_edge_w(k))*adv_coefs_3rd(i,iEdge)
-                   flux_arr(k) = flux_arr(k) + scalar_weight* w(k,iCell)
-                 end do
+                  iCell = advCellsForEdge(i,iEdge)
+                  do k=2,grid % nVertLevels
+                     scalar_weight = adv_coefs(i,iEdge) + coef_3rd_order*sign(1.0_RKIND,ru_edge_w(k))*adv_coefs_3rd(i,iEdge)
+                     flux_arr(k) = flux_arr(k) + scalar_weight* w(k,iCell)
+                  end do
                end do
 
                do k=1,grid % nVertLevels
@@ -2456,35 +2459,6 @@
                   tend_w(k,cell2) = tend_w(k,cell2) + ru_edge_w(k)*flux_arr(k)
                end do
 
-!               do k=2,grid % nVertLevels
-!
-!                  d2fdx2_cell1 = deriv_two(1,1,iEdge) * w(k,cell1)
-!                  d2fdx2_cell2 = deriv_two(1,2,iEdge) * w(k,cell2)
-!                  do i=1, grid % nEdgesOnCell % array (cell1)
-!                     if ( grid % CellsOnCell % array (i,cell1) &lt;= grid%nCells) &amp;
-!                     d2fdx2_cell1 = d2fdx2_cell1 + deriv_two(i+1,1,iEdge) * w(k,grid % CellsOnCell % array (i,cell1))
-!                  end do
-!                  do i=1, grid % nEdgesOnCell % array (cell2)
-!                     if ( grid % CellsOnCell % array (i,cell2) &lt;= grid%nCells) &amp;
-!                     d2fdx2_cell2 = d2fdx2_cell2 + deriv_two(i+1,2,iEdge) * w(k,grid % CellsOnCell % array (i,cell2))
-!                  end do
-!
-!  3rd order stencil
-!                  if( u(k,iEdge)+u(k-1,iEdge) &gt; 0) then
-!                     flux = dvEdge(iEdge) * (fzm(k)*ru(k,iEdge) + fzp(k)*ru(k-1,iEdge))*(  &amp;
-!                                             0.5*(w(k,cell1) + w(k,cell2))                 &amp;
-!                                            -(dcEdge(iEdge) **2) * (d2fdx2_cell1) / 6. )
-!                  else
-!                     flux = dvEdge(iEdge) * (fzm(k)*ru(k,iEdge) + fzp(k)*ru(k-1,iEdge))*(  &amp;
-!                                             0.5*(w(k,cell1) + w(k,cell2))                 &amp;
-!                                            -(dcEdge(iEdge) **2) * (d2fdx2_cell2) / 6. )
-!                  end if
-!
-!                  tend_w(k,cell1) = tend_w(k,cell1) - flux
-!                  tend_w(k,cell2) = tend_w(k,cell2) + flux
-!
-!               end do
-
             end if
          end do
 
@@ -2521,7 +2495,6 @@
          end do
       end if
 
-      !SHP-curvature
       if (curvature) then
 
          do iCell = 1, grid % nCellsSolve
@@ -2533,12 +2506,6 @@
                                           *(fzm(k)*ur_cell(k,iCell)+fzp(k)*ur_cell(k-1,iCell))                 &amp;
                                           *(rho_zz(k,iCell)*fzm(k)+rho_zz(k-1,iCell)*fzp(k))
 
-               !old_err.
-               !tend_w(k,iCell) = tend_w(k,iCell) &amp;
-               !                 + rho_zz(k,iCell)*( (fzm(k)*ur_cell(k,iCell)+fzp(k)*ur_cell(k-1,iCell))**2.          &amp;
-               !                                 +(fzm(k)*vr_cell(k,iCell)+fzp(k)*vr_cell(k-1,iCell))**2. )/r_earth   &amp;
-               !                 + 2.*omega_e*cos(grid % latCell % array(iCell))*rho_zz(k,iCell)      &amp;
-               !                     *(fzm(k)*ur_cell(k,iCell)+fzp(k)*ur_cell(k-1,iCell))
             end do
          end do
 
@@ -2549,111 +2516,106 @@
       !  but here we can also code in hyperdiffusion if we wish (2nd order at present)
       !
 
-      !  Note: we are using quite a bit of the theta code here - could be combined later???
-
       if (rk_step == 1 .or. rk_diffusion) then
 
-      tend_w_euler = 0.
+         tend_w_euler = 0.
 
-      if (delsq_horiz_mixing) then
+         if (delsq_horiz_mixing) then
 
-        if ((h_mom_eddy_visc2 &gt; 0.0) .and. (config_horiz_mixing == &quot;2d_fixed&quot;)) then
+            if ((h_mom_eddy_visc2 &gt; 0.0) .and. (config_horiz_mixing == &quot;2d_fixed&quot;)) then
 
-          do iEdge=1,grid % nEdges
-            cell1 = grid % cellsOnEdge % array(1,iEdge)
-            cell2 = grid % cellsOnEdge % array(2,iEdge)
-            if (cell1 &lt;= nCellsSolve .or. cell2 &lt;= nCellsSolve) then
+               do iEdge=1,grid % nEdges
+                  cell1 = grid % cellsOnEdge % array(1,iEdge)
+                  cell2 = grid % cellsOnEdge % array(2,iEdge)
+                  if (cell1 &lt;= nCellsSolve .or. cell2 &lt;= nCellsSolve) then
 
-               do k=2,grid % nVertLevels
-                  theta_turb_flux = h_mom_eddy_visc2*(w(k,cell2) - w(k,cell1))/dcEdge(iEdge)
-                  theta_turb_flux = theta_turb_flux * meshScalingDel2(iEdge)
-                  flux = 0.5*dvEdge (iEdge) * (rho_edge(k,iEdge)+rho_edge(k-1,iEdge)) * theta_turb_flux
-!                  tend_w(k,cell1) = tend_w(k,cell1) + flux
-!                  tend_w(k,cell2) = tend_w(k,cell2) - flux
-                  tend_w_euler(k,cell1) = tend_w_euler(k,cell1) + flux/areaCell(cell1)
-                  tend_w_euler(k,cell2) = tend_w_euler(k,cell2) - flux/areaCell(cell2)
+                     ! horizontal flux divergence of the gradient (i.e. del^2)
+                     ! note, for w, even though we use theta_* local scratch variables
+                     do k=2,grid % nVertLevels
+                        theta_turb_flux = h_mom_eddy_visc2*(w(k,cell2) - w(k,cell1))/dcEdge(iEdge)
+                        theta_turb_flux = theta_turb_flux * meshScalingDel2(iEdge)
+                        flux = 0.5*dvEdge (iEdge) * (rho_edge(k,iEdge)+rho_edge(k-1,iEdge)) * theta_turb_flux
+                        tend_w_euler(k,cell1) = tend_w_euler(k,cell1) + flux/areaCell(cell1)
+                        tend_w_euler(k,cell2) = tend_w_euler(k,cell2) - flux/areaCell(cell2)
+                     end do
+
+                  end if
                end do
 
-             end if
-           end do
-        else if (config_horiz_mixing == &quot;2d_smagorinsky&quot;) then
+            else if (config_horiz_mixing == &quot;2d_smagorinsky&quot;) then
 
-          do iEdge=1,grid % nEdges
-            cell1 = grid % cellsOnEdge % array(1,iEdge)
-            cell2 = grid % cellsOnEdge % array(2,iEdge)
-            if (cell1 &lt;= nCellsSolve .or. cell2 &lt;= nCellsSolve) then
+               do iEdge=1,grid % nEdges
+                  cell1 = grid % cellsOnEdge % array(1,iEdge)
+                  cell2 = grid % cellsOnEdge % array(2,iEdge)
+                  if (cell1 &lt;= nCellsSolve .or. cell2 &lt;= nCellsSolve) then
+   
+                     do k=2,grid % nVertLevels
+                        theta_turb_flux = 0.25*(kdiff(k,cell1)+kdiff(k,cell2)+kdiff(k-1,cell1)+kdiff(k,cell2))  &amp;
+                                              *(w(k,cell2) - w(k,cell1))/dcEdge(iEdge)
+                        theta_turb_flux = theta_turb_flux * meshScalingDel2(iEdge)
+                        flux = 0.5*dvEdge (iEdge) * (rho_edge(k,iEdge)+rho_edge(k-1,iEdge)) * theta_turb_flux
+                        tend_w_euler(k,cell1) = tend_w_euler(k,cell1) + flux/areaCell(cell1)
+                        tend_w_euler(k,cell2) = tend_w_euler(k,cell2) - flux/areaCell(cell2)
+                     end do
 
-               do k=2,grid % nVertLevels
-!                  theta_turb_flux = h_mom_eddy_visc2*(w(k,cell2) - w(k,cell1))/dcEdge(iEdge)
-                  theta_turb_flux = 0.25*(kdiff(k,cell1)+kdiff(k,cell2)+kdiff(k-1,cell1)+kdiff(k,cell2))  &amp;
-                                        *(w(k,cell2) - w(k,cell1))/dcEdge(iEdge)
-                  theta_turb_flux = theta_turb_flux * meshScalingDel2(iEdge)
-                  flux = 0.5*dvEdge (iEdge) * (rho_edge(k,iEdge)+rho_edge(k-1,iEdge)) * theta_turb_flux
-!                  tend_w(k,cell1) = tend_w(k,cell1) + flux
-!                  tend_w(k,cell2) = tend_w(k,cell2) - flux
-                  tend_w_euler(k,cell1) = tend_w_euler(k,cell1) + flux/areaCell(cell1)
-                  tend_w_euler(k,cell2) = tend_w_euler(k,cell2) - flux/areaCell(cell2)
+                  end if
                end do
+            end if
+         end if   ! delsq_horiz_mixing
 
-             end if
-           end do
-        end if

-      end if
+         if ((h_mom_eddy_visc4 &gt; 0.0 .and. config_horiz_mixing == &quot;2d_fixed&quot;) .or. &amp;
+             (h_mom_eddy_visc4 &gt; 0.0 .and. config_horiz_mixing == &quot;2d_smagorinsky&quot;)) then
 
-!ldf (2010-10-10):
-!     if ( (h_mom_eddy_visc4 &gt; 0.0) .and. (config_horiz_mixing == &quot;2d_fixed&quot;)) then
-      if ((h_mom_eddy_visc4 &gt; 0.0 .and. config_horiz_mixing == &quot;2d_fixed&quot;) .or. &amp;
-          (h_mom_eddy_visc4 &gt; 0.0 .and. config_horiz_mixing == &quot;2d_smagorinsky&quot;)) then
 
-         allocate(delsq_theta(nVertLevels, nCells+1))
+            ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (u) ).
+            !
+            ! First, storage to hold the result from the first del^2 computation.
+            !  we copied code from the theta mixing, hence the theta* names.
 
-         delsq_theta(:,:) = 0.
+            allocate(delsq_theta(nVertLevels, nCells+1))
 
-         do iEdge=1,grid % nEdges
-            cell1 = grid % cellsOnEdge % array(1,iEdge)
-            cell2 = grid % cellsOnEdge % array(2,iEdge)
-            do k=2,grid % nVertLevels
-               delsq_theta(k,cell1) = delsq_theta(k,cell1) + dvEdge(iEdge)*0.5*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1))/dcEdge(iEdge)
-               delsq_theta(k,cell2) = delsq_theta(k,cell2) - dvEdge(iEdge)*0.5*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1))/dcEdge(iEdge)
+            delsq_theta(:,:) = 0.
+
+            do iEdge=1,grid % nEdges
+               cell1 = grid % cellsOnEdge % array(1,iEdge)
+               cell2 = grid % cellsOnEdge % array(2,iEdge)
+               do k=2,grid % nVertLevels
+                  delsq_theta(k,cell1) = delsq_theta(k,cell1) + dvEdge(iEdge)*0.5*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1))/dcEdge(iEdge)
+                  delsq_theta(k,cell2) = delsq_theta(k,cell2) - dvEdge(iEdge)*0.5*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1))/dcEdge(iEdge)
+               end do
             end do
-         end do
 
-         do iCell = 1, nCells
-            r = 1.0 / areaCell(iCell)
-            do k=2,nVertLevels
-               delsq_theta(k,iCell) = delsq_theta(k,iCell) * r
+            do iCell = 1, nCells
+               r = 1.0 / areaCell(iCell)
+               do k=2,nVertLevels
+                  delsq_theta(k,iCell) = delsq_theta(k,iCell) * r
+               end do
             end do
-         end do
 
-         do iEdge=1,grid % nEdges
-            cell1 = grid % cellsOnEdge % array(1,iEdge)
-            cell2 = grid % cellsOnEdge % array(2,iEdge)
-            if (cell1 &lt;= nCellsSolve .or. cell2 &lt;= nCellsSolve) then
+            do iEdge=1,grid % nEdges
+               cell1 = grid % cellsOnEdge % array(1,iEdge)
+               cell2 = grid % cellsOnEdge % array(2,iEdge)
+               if (cell1 &lt;= nCellsSolve .or. cell2 &lt;= nCellsSolve) then
 
-               do k=2,grid % nVertLevels
-                  theta_turb_flux = h_mom_eddy_visc4*(delsq_theta(k,cell2) - delsq_theta(k,cell1))/dcEdge(iEdge)
-                  theta_turb_flux = theta_turb_flux * meshScalingDel4(iEdge)
-                  flux = dvEdge (iEdge) * theta_turb_flux
+                  do k=2,grid % nVertLevels
+                     theta_turb_flux = h_mom_eddy_visc4*(delsq_theta(k,cell2) - delsq_theta(k,cell1))/dcEdge(iEdge)
+                     theta_turb_flux = theta_turb_flux * meshScalingDel4(iEdge)
+                     flux = dvEdge (iEdge) * theta_turb_flux
+                     tend_w_euler(k,cell1) = tend_w_euler(k,cell1) - flux/areaCell(cell1)
+                     tend_w_euler(k,cell2) = tend_w_euler(k,cell2) + flux/areaCell(cell2)
+                  end do
 
-!                  tend_w(k,cell1) = tend_w(k,cell1) - flux
-!                  tend_w(k,cell2) = tend_w(k,cell2) + flux
-                  tend_w_euler(k,cell1) = tend_w_euler(k,cell1) - flux/areaCell(cell1)
-                  tend_w_euler(k,cell2) = tend_w_euler(k,cell2) + flux/areaCell(cell2)
-               end do
+               end if
+            end do
 
-            end if
-         end do
+            deallocate(delsq_theta)
 
-         deallocate(delsq_theta)
+         end if
 
-      end if
-
       end if ! horizontal mixing for w computed in first rk_step
 
       !
       !  vertical advection, pressure gradient and buoyancy for w
-      !  Note: we are also dividing through by the cell area after the horizontal flux divergence
       !
 
       do iCell = 1, nCells
@@ -2689,10 +2651,11 @@
 
          wdwz(nVertLevels+1) = 0.
 
+      !  Note: next we are also dividing through by the cell area after the horizontal flux divergence
+
          do k=2,nVertLevels
 
             tend_w(k,iCell) = tend_w(k,iCell)/areaCell(iCell) -rdzu(k)*(wdwz(k+1)-wdwz(k))    &amp;
-!SHP-buoy
                                   - cqw(k,iCell)*( rdzu(k)*(pp(k,iCell)-pp(k-1,iCell))        &amp;
                                   + gravity*  &amp;
                                    ( fzm(k)*(rb(k,iCell)*(qtot(k,iCell)) +         &amp;
@@ -2708,17 +2671,17 @@
 
       if (rk_step == 1 .or. rk_diffusion) then
 
-      if ( v_mom_eddy_visc2 &gt; 0.0 ) then
+         if ( v_mom_eddy_visc2 &gt; 0.0 ) then
 
-         do iCell = 1, grid % nCellsSolve
+            do iCell = 1, grid % nCellsSolve
             do k=2,nVertLevels
                tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + v_mom_eddy_visc2*0.5*(rho_zz(k,iCell)+rho_zz(k-1,iCell))*(  &amp;
                                         (w(k+1,iCell)-w(k  ,iCell))*rdzw(k)                              &amp;
                                        -(w(k  ,iCell)-w(k-1,iCell))*rdzw(k-1) )*rdzu(k)
             end do
-         end do
+            end do
 
-      end if
+         end if
 
       end if ! mixing term computed first rk_step
 
@@ -2764,11 +2727,11 @@
 
                flux_arr(:) = 0.
                do i=1,nAdvCellsForEdge(iEdge)
-                 iCell = advCellsForEdge(i,iEdge)
-                 do k=1,grid % nVertLevels
-                   scalar_weight = adv_coefs(i,iEdge) + coef_3rd_order*sign(1.0_RKIND,ru(k,iEdge))*adv_coefs_3rd(i,iEdge)
-                   flux_arr(k) = flux_arr(k) + scalar_weight* theta_m(k,iCell)
-                 end do
+                  iCell = advCellsForEdge(i,iEdge)
+                  do k=1,grid % nVertLevels
+                     scalar_weight = adv_coefs(i,iEdge) + coef_3rd_order*sign(1.0_RKIND,ru(k,iEdge))*adv_coefs_3rd(i,iEdge)
+                     flux_arr(k) = flux_arr(k) + scalar_weight* theta_m(k,iCell)
+                  end do
                end do
 
                do k=1,grid % nVertLevels
@@ -2776,39 +2739,6 @@
                   tend_theta(k,cell2) = tend_theta(k,cell2) + ru(k,iEdge)*flux_arr(k)
                end do
 
-
-!               do k=1,grid % nVertLevels
-!
-!                  d2fdx2_cell1 = deriv_two(1,1,iEdge) * theta_m(k,cell1)
-!                  d2fdx2_cell2 = deriv_two(1,2,iEdge) * theta_m(k,cell2)
-!                  do i=1, grid % nEdgesOnCell % array (cell1)
-!                     if ( grid % CellsOnCell % array (i,cell1) &lt;= grid%nCells) &amp;
-!                     d2fdx2_cell1 = d2fdx2_cell1 + deriv_two(i+1,1,iEdge) * theta_m(k,grid % CellsOnCell % array (i,cell1))
-!                  end do
-!                  do i=1, grid % nEdgesOnCell % array (cell2)
-!                     if ( grid % CellsOnCell % array (i,cell2) &lt;= grid%nCells) &amp;
-!                     d2fdx2_cell2 = d2fdx2_cell2 + deriv_two(i+1,2,iEdge) * theta_m(k,grid % CellsOnCell % array (i,cell2))
-!                  end do
-!
-!  3rd order stencil
-!
-!                  if( u(k,iEdge) &gt; 0) then
-!                        flux = dvEdge(iEdge) * ru(k,iEdge) * (          &amp;
-!                                               0.5*(theta_m(k,cell1) + theta_m(k,cell2))      &amp;
-!                                                -(dcEdge(iEdge) **2) * (d2fdx2_cell1 + d2fdx2_cell2) / 12.          &amp;
-!                                                -(dcEdge(iEdge) **2) * coef_3rd_order*(d2fdx2_cell1 - d2fdx2_cell2) / 12. )
-!                     else
-!                        flux = dvEdge(iEdge) *  ru(k,iEdge) * (          &amp;
-!                                               0.5*(theta_m(k,cell1) + theta_m(k,cell2))      &amp;
-!                                                -(dcEdge(iEdge) **2) * (d2fdx2_cell1 + d2fdx2_cell2) / 12.          &amp;
-!                                                +(dcEdge(iEdge) **2) * coef_3rd_order*(d2fdx2_cell1 - d2fdx2_cell2) / 12. )
-!                  end if
-!
-!                  tend_theta(k,cell1) = tend_theta(k,cell1) - flux
-!                  tend_theta(k,cell2) = tend_theta(k,cell2) + flux
-!
-!               end do
-
             end if
          end do
 
@@ -2825,11 +2755,11 @@
                   d2fdx2_cell2 = deriv_two(1,2,iEdge) * theta_m(k,cell2)
                   do i=1, grid % nEdgesOnCell % array (cell1)
                      if ( grid % CellsOnCell % array (i,cell1) &lt;= grid%nCells) &amp;
-                     d2fdx2_cell1 = d2fdx2_cell1 + deriv_two(i+1,1,iEdge) * theta_m(k,grid % CellsOnCell % array (i,cell1))
+                        d2fdx2_cell1 = d2fdx2_cell1 + deriv_two(i+1,1,iEdge) * theta_m(k,grid % CellsOnCell % array (i,cell1))
                   end do
                   do i=1, grid % nEdgesOnCell % array (cell2)
                      if ( grid % CellsOnCell % array (i,cell2) &lt;= grid%nCells) &amp;
-                     d2fdx2_cell2 = d2fdx2_cell2 + deriv_two(i+1,2,iEdge) * theta_m(k,grid % CellsOnCell % array (i,cell2))
+                        d2fdx2_cell2 = d2fdx2_cell2 + deriv_two(i+1,2,iEdge) * theta_m(k,grid % CellsOnCell % array (i,cell2))
                   end do
 
                   flux = dvEdge(iEdge) *  ru(k,iEdge) * (                                               &amp;
@@ -2866,8 +2796,6 @@
                      theta_turb_flux = h_theta_eddy_visc2*prandtl_inv*(theta_m(k,cell2) - theta_m(k,cell1))/dcEdge(iEdge)
                      theta_turb_flux = theta_turb_flux * meshScalingDel2(iEdge)
                      flux = dvEdge (iEdge) * rho_edge(k,iEdge) * theta_turb_flux
-!                     tend_theta(k,cell1) = tend_theta(k,cell1) + flux
-!                     tend_theta(k,cell2) = tend_theta(k,cell2) - flux
                      tend_theta_euler(k,cell1) = tend_theta_euler(k,cell1) + flux/areaCell(cell1)
                      tend_theta_euler(k,cell2) = tend_theta_euler(k,cell2) - flux/areaCell(cell2)
                   end do
@@ -2887,8 +2815,6 @@
                                            *(theta_m(k,cell2) - theta_m(k,cell1))/dcEdge(iEdge)
                      theta_turb_flux = theta_turb_flux * meshScalingDel2(iEdge)
                      flux = dvEdge (iEdge) * rho_edge(k,iEdge) * theta_turb_flux
-!                     tend_theta(k,cell1) = tend_theta(k,cell1) + flux
-!                     tend_theta(k,cell2) = tend_theta(k,cell2) - flux
                      tend_theta_euler(k,cell1) = tend_theta_euler(k,cell1) + flux/areaCell(cell1)
                      tend_theta_euler(k,cell2) = tend_theta_euler(k,cell2) - flux/areaCell(cell2)
                   end do
@@ -2899,8 +2825,6 @@
 
       end if
 
-!ldf (2010-10-10):
-!     if ( (h_theta_eddy_visc4 &gt; 0.0) .and. (config_horiz_mixing == &quot;2d_fixed&quot;) ) then
       if ((h_theta_eddy_visc4 &gt; 0.0 .and. config_horiz_mixing == &quot;2d_fixed&quot;) .or. &amp;
           (h_theta_eddy_visc4 &gt; 0.0 .and. config_horiz_mixing == &quot;2d_smagorinsky&quot;)) then
 
@@ -2933,9 +2857,6 @@
                   theta_turb_flux = h_theta_eddy_visc4*prandtl_inv*(delsq_theta(k,cell2) - delsq_theta(k,cell1))/dcEdge(iEdge)
                   theta_turb_flux = theta_turb_flux * meshScalingDel4(iEdge)
                   flux = dvEdge (iEdge) * theta_turb_flux
-
-!                  tend_theta(k,cell1) = tend_theta(k,cell1) - flux
-!                  tend_theta(k,cell2) = tend_theta(k,cell2) + flux
                   tend_theta_euler(k,cell1) = tend_theta_euler(k,cell1) - flux/areaCell(cell1)
                   tend_theta_euler(k,cell2) = tend_theta_euler(k,cell2) + flux/areaCell(cell2)
                end do
@@ -3003,47 +2924,41 @@
 
          if (config_mix_full) then
 
-         do iCell = 1, grid % nCellsSolve
-            do k=2,nVertLevels-1
-               z1 = zgrid(k-1,iCell)
-               z2 = zgrid(k  ,iCell)
-               z3 = zgrid(k+1,iCell)
-               z4 = zgrid(k+2,iCell)
+            do iCell = 1, grid % nCellsSolve
+               do k=2,nVertLevels-1
+                  z1 = zgrid(k-1,iCell)
+                  z2 = zgrid(k  ,iCell)
+                  z3 = zgrid(k+1,iCell)
+                  z4 = zgrid(k+2,iCell)
 
-               zm = 0.5*(z1+z2)
-               z0 = 0.5*(z2+z3)
-               zp = 0.5*(z3+z4)
+                  zm = 0.5*(z1+z2)
+                  z0 = 0.5*(z2+z3)
+                  zp = 0.5*(z3+z4)
 
-!               tend_theta(k,iCell) = tend_theta(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&amp;
-!                                        (theta_m(k+1,iCell)-theta_m(k  ,iCell))/(zp-z0)                 &amp;
-!                                       -(theta_m(k  ,iCell)-theta_m(k-1,iCell))/(z0-zm) )/(0.5*(zp-zm))
-               tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&amp;
-                                        (theta_m(k+1,iCell)-theta_m(k  ,iCell))/(zp-z0)                 &amp;
-                                       -(theta_m(k  ,iCell)-theta_m(k-1,iCell))/(z0-zm) )/(0.5*(zp-zm))
+                  tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&amp;
+                                           (theta_m(k+1,iCell)-theta_m(k  ,iCell))/(zp-z0)                 &amp;
+                                          -(theta_m(k  ,iCell)-theta_m(k-1,iCell))/(z0-zm) )/(0.5*(zp-zm))
+               end do
             end do
-         end do
 
          else  ! idealized cases where we mix on the perturbation from the initial 1-D state
 
-         do iCell = 1, grid % nCellsSolve
-            do k=2,nVertLevels-1
-               z1 = zgrid(k-1,iCell)
-               z2 = zgrid(k  ,iCell)
-               z3 = zgrid(k+1,iCell)
-               z4 = zgrid(k+2,iCell)
+            do iCell = 1, grid % nCellsSolve
+               do k=2,nVertLevels-1
+                  z1 = zgrid(k-1,iCell)
+                  z2 = zgrid(k  ,iCell)
+                  z3 = zgrid(k+1,iCell)
+                  z4 = zgrid(k+2,iCell)
 
-               zm = 0.5*(z1+z2)
-               z0 = 0.5*(z2+z3)
-               zp = 0.5*(z3+z4)
+                  zm = 0.5*(z1+z2)
+                  z0 = 0.5*(z2+z3)
+                  zp = 0.5*(z3+z4)
 
-!               tend_theta(k,iCell) = tend_theta(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&amp;
-!                                        ((theta_m(k+1,iCell)-t_init(k+1,iCell))-(theta_m(k  ,iCell)-t_init(k,iCell)))/(zp-z0)      &amp;
-!                                       -((theta_m(k  ,iCell)-t_init(k,iCell))-(theta_m(k-1,iCell)-t_init(k-1,iCell)))/(z0-zm) )/(0.5*(zp-zm))
-               tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&amp;
-                                        ((theta_m(k+1,iCell)-t_init(k+1,iCell))-(theta_m(k  ,iCell)-t_init(k,iCell)))/(zp-z0)      &amp;
-                                       -((theta_m(k  ,iCell)-t_init(k,iCell))-(theta_m(k-1,iCell)-t_init(k-1,iCell)))/(z0-zm) )/(0.5*(zp-zm))
+                  tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&amp;
+                                           ((theta_m(k+1,iCell)-t_init(k+1,iCell))-(theta_m(k  ,iCell)-t_init(k,iCell)))/(zp-z0)      &amp;
+                                          -((theta_m(k  ,iCell)-t_init(k,iCell))-(theta_m(k-1,iCell)-t_init(k-1,iCell)))/(z0-zm) )/(0.5*(zp-zm))
+               end do
             end do
-         end do
 
          end if
 
@@ -3065,9 +2980,9 @@
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
    ! Compute diagnostic fields used in the tendency computations
    !
-   ! Input: grid - grid metadata
+   ! Input: state (s), grid - grid metadata
    !
-   ! Output: s - computed diagnostics
+   ! Output: diag - computed diagnostics
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
 
       implicit none
@@ -3089,7 +3004,6 @@
       integer, dimension(:,:), pointer :: cellsOnEdge, cellsOnVertex, verticesOnEdge, edgesOnCell, edgesOnEdge, edgesOnVertex
       integer, dimension(:), pointer :: nEdgesOnCell, nEdgesOnEdge
 
-      !WCS-instability
       logical, parameter :: hollingsworth=.true.
       real (kind=RKIND), allocatable, dimension(:,:) :: ke_vertex
       real (kind=RKIND)  :: ke_fact
@@ -3182,7 +3096,7 @@
 
 
       !
-      ! Compute kinetic energy in each cell
+      ! Compute kinetic energy in each cell (Ringler et al JCP 2009)
       !
       ke(:,:) = 0.0
       do iCell=1,nCells
@@ -3197,52 +3111,49 @@
          end do
       end do
 
-      !WCS-instability
-      ! Compute ke at cell vertices - AG's new KE construction, part 1
-      ! *** approximation here because we don't have inner triangle areas
-      !
       if (hollingsworth) then
-      allocate (ke_vertex(nVertLevels,nVertices))
-      do iVertex=1,nVertices
-         do k=1,nVertLevels
-!            ke_vertex(k,iVertex) = (  subTriangleAreasOnVertex(1,iVertex)*u(k,EdgesOnVertex(1,iVertex))**2.0  &amp;
-!                                    + subTriangleAreasOnVertex(2,iVertex)*u(k,EdgesOnVertex(2,iVertex))**2.0  &amp;
-!                                    + subTriangleAreasOnVertex(3,iVertex)*u(k,EdgesOnVertex(3,iVertex))**2.0  &amp;
-!                                                 ) / AreaTriangle(iVertex)
 
-            ke_vertex(k,iVertex) = (  dcEdge(EdgesOnVertex(1,iVertex))*dvEdge(EdgesOnVertex(1,iVertex))*u(k,EdgesOnVertex(1,iVertex))**2.0  &amp;
-                                     +dcEdge(EdgesOnVertex(2,iVertex))*dvEdge(EdgesOnVertex(2,iVertex))*u(k,EdgesOnVertex(2,iVertex))**2.0  &amp;
-                                     +dcEdge(EdgesOnVertex(3,iVertex))*dvEdge(EdgesOnVertex(3,iVertex))*u(k,EdgesOnVertex(3,iVertex))**2.0  &amp;
-                                                 ) * 0.25 / AreaTriangle(iVertex)
+         ! Compute ke at cell vertices - AG's new KE construction, part 1
+         ! *** approximation here because we don't have inner triangle areas
+         !
 
+         allocate (ke_vertex(nVertLevels,nVertices))
+         do iVertex=1,nVertices
+            do k=1,nVertLevels
+
+               ke_vertex(k,iVertex) = (  dcEdge(EdgesOnVertex(1,iVertex))*dvEdge(EdgesOnVertex(1,iVertex))*u(k,EdgesOnVertex(1,iVertex))**2.0  &amp;
+                                        +dcEdge(EdgesOnVertex(2,iVertex))*dvEdge(EdgesOnVertex(2,iVertex))*u(k,EdgesOnVertex(2,iVertex))**2.0  &amp;
+                                        +dcEdge(EdgesOnVertex(3,iVertex))*dvEdge(EdgesOnVertex(3,iVertex))*u(k,EdgesOnVertex(3,iVertex))**2.0  &amp;
+                                                    ) * 0.25 / AreaTriangle(iVertex)
+
+            end do
          end do
-      end do
 
-      ! adjust ke at cell vertices - AG's new KE construction, part 2
-      !
+         ! adjust ke at cell vertices - AG's new KE construction, part 2
+         !
 
-      ke_fact = 1.0 - .375
+         ke_fact = 1.0 - .375
 
-      do iCell=1,nCells
-         do k=1,nVertLevels
-            ke(k,iCell) = ke_fact*ke(k,iCell)
+         do iCell=1,nCells
+            do k=1,nVertLevels
+               ke(k,iCell) = ke_fact*ke(k,iCell)
+            end do
          end do
-      end do
 
-      do iVertex = 1, nVertices
-       do i=1,grid % vertexDegree
-          iCell = cellsOnVertex(i,iVertex)
-          do k = 1,nVertLevels
-             ke(k,iCell) = ke(k,iCell) + (1.-ke_fact)*kiteAreasOnVertex(i, iVertex) * ke_vertex(k, iVertex) / areaCell(iCell)
-          end do
-       end do
-      end do
-      deallocate (ke_vertex)
+         do iVertex = 1, nVertices
+            do i=1,grid % vertexDegree
+               iCell = cellsOnVertex(i,iVertex)
+               do k = 1,nVertLevels
+                  ke(k,iCell) = ke(k,iCell) + (1.-ke_fact)*kiteAreasOnVertex(i, iVertex) * ke_vertex(k, iVertex) / areaCell(iCell)
+               end do
+            end do
+         end do
+         deallocate (ke_vertex)
+
       end if
-      !END of WCS-instability
 
       !
-      ! Compute v (tangential) velocities
+      ! Compute v (tangential) velocities following Thuburn et al JCP 2009
       !
       v(:,:) = 0.0
       do iEdge = 1,nEdges
@@ -3303,44 +3214,44 @@
 
       if (config_apvm_upwinding &gt; 0.0) then
 
-      !
-      ! Modify PV edge with upstream bias. 
-      !
-      ! Compute gradient of PV in the tangent direction
-      !   ( this computes gradPVt at all edges bounding real cells )
-      !
-      do iEdge = 1,nEdges
-         do k = 1,nVertLevels
-            gradPVt(k,iEdge) = (pv_vertex(k,verticesOnEdge(2,iEdge)) - pv_vertex(k,verticesOnEdge(1,iEdge))) / &amp;
-                               dvEdge(iEdge)
+         !
+         ! Modify PV edge with upstream bias. 
+         !
+         ! Compute gradient of PV in the tangent direction
+         !   ( this computes gradPVt at all edges bounding real cells )
+         !
+         do iEdge = 1,nEdges
+            do k = 1,nVertLevels
+               gradPVt(k,iEdge) = (pv_vertex(k,verticesOnEdge(2,iEdge)) - pv_vertex(k,verticesOnEdge(1,iEdge))) / &amp;
+                                  dvEdge(iEdge)
+            end do
          end do
-      end do
 
-      !
-      ! Compute gradient of PV in normal direction
-      !   (tdr: 2009-10-02: this is not correct because the pv_cell in the halo is not correct)
-      !
-      gradPVn(:,:) = 0.0
-      do iEdge = 1,nEdges
-         do k = 1,nVertLevels
-            gradPVn(k,iEdge) = (pv_cell(k,cellsOnEdge(2,iEdge)) - pv_cell(k,cellsOnEdge(1,iEdge))) / &amp;
-                                 dcEdge(iEdge)
+         !
+         ! Compute gradient of PV in normal direction
+         !   (tdr: 2009-10-02: this is not correct because the pv_cell in the halo is not correct)
+         !
+         gradPVn(:,:) = 0.0
+         do iEdge = 1,nEdges
+            do k = 1,nVertLevels
+               gradPVn(k,iEdge) = (pv_cell(k,cellsOnEdge(2,iEdge)) - pv_cell(k,cellsOnEdge(1,iEdge))) / &amp;
+                                    dcEdge(iEdge)
+            end do
          end do
-      end do
 
-      do iEdge = 1,nEdges
-         do k = 1,nVertLevels
-            pv_edge(k,iEdge) = pv_edge(k,iEdge) - config_apvm_upwinding * v(k,iEdge) * dt * gradPVt(k,iEdge)
+         do iEdge = 1,nEdges
+            do k = 1,nVertLevels
+               pv_edge(k,iEdge) = pv_edge(k,iEdge) - config_apvm_upwinding * v(k,iEdge) * dt * gradPVt(k,iEdge)
+            end do
          end do
-      end do
 
-      ! Modify PV edge with upstream bias.
-      !
-      do iEdge = 1,nEdges
-         do k = 1,nVertLevels
-            pv_edge(k,iEdge) = pv_edge(k,iEdge) - config_apvm_upwinding * u(k,iEdge) *dt * gradPVn(k,iEdge)
+         ! Modify PV edge with upstream bias.
+         !
+         do iEdge = 1,nEdges
+            do k = 1,nVertLevels
+               pv_edge(k,iEdge) = pv_edge(k,iEdge) - config_apvm_upwinding * u(k,iEdge) *dt * gradPVn(k,iEdge)
+            end do
          end do
-      end do
 
       end if  ! apvm upwinding
 
@@ -3357,11 +3268,9 @@
       type (diag_type), intent(inout) :: diag
       type (mesh_type), intent(inout) :: grid
 
-      !SHP-w
       integer :: k,iCell,iEdge,iCell1,iCell2, cell1, cell2, coef_3rd_order
       real (kind=RKIND) :: p0, rcv, flux
 
-      !SHP-w
       coef_3rd_order = config_coef_3rd_order
       if(config_theta_adv_order /=3) coef_3rd_order = 0
 
@@ -3383,19 +3292,6 @@
          end do
       end do
 
-!      ! Compute w from rho_zz and rw
-!      do iCell=1,grid%nCells
-!         diag % rw % array(1,iCell) = 0.
-!         diag % rw % array(grid%nVertLevels+1,iCell) = 0.
-!         do k=2,grid%nVertLevels
-!            diag % rw % array(k,iCell) = state % w % array(k,iCell)     &amp;
-!                          * (grid % fzp % array(k) * state % rho_zz % array(k-1,iCell) + grid % fzm % array(k) * state % rho_zz % array(k,iCell))
-!         end do
-!      end do
-
-
-! WCS bug fix 20110916
-
       ! Compute rw (i.e. rho_zz * omega) from rho_zz, w, and ru.
       ! We are reversing the procedure we use in subroutine atm_recover_large_step_variables.
       ! first, the piece that depends on w.
@@ -3409,12 +3305,11 @@
          end do
       end do
   
-      !SHP-w
       ! next, the piece that depends on ru
       do iEdge=1,grid%nEdges
-        cell1 = grid % CellsOnEdge % array(1,iEdge)
-        cell2 = grid % CellsOnEdge % array(2,iEdge)
-          do k = 2, grid % nVertLevels
+         cell1 = grid % CellsOnEdge % array(1,iEdge)
+         cell2 = grid % CellsOnEdge % array(2,iEdge)
+         do k = 2, grid % nVertLevels
             flux = (grid % fzm % array(k) * diag % ru % array(k,iEdge)+grid % fzp % array(k) * diag % ru % array(k-1,iEdge))
             diag % rw % array(k,cell2) = diag % rw % array(k,cell2)   &amp;
                           + (grid % zb % array(k,2,iEdge) + coef_3rd_order * sign(1.0_RKIND,flux) * grid % zb3 % array(k,2,iEdge))*flux   &amp;
@@ -3422,11 +3317,9 @@
             diag % rw % array(k,cell1) = diag % rw % array(k,cell1)   &amp;
                           - (grid % zb % array(k,1,iEdge) + coef_3rd_order * sign(1.0_RKIND,flux) * grid % zb3 % array(k,1,iEdge))*flux   &amp;
                           * (grid % fzp % array(k) * grid % zz % array(k-1,cell1) + grid % fzm % array(k) * grid % zz % array(k,cell1))
-          end do
+         end do
       end do
 
-!  end WCS bug fix
-
       do iCell = 1, grid % nCells
          do k=1,grid % nVertLevels
             diag % rho_p % array(k,iCell) = state % rho_zz % array(k,iCell) - diag % rho_base % array(k,iCell)

</font>
</pre>