<p><b>duda</b> 2011-11-08 11:03:39 -0700 (Tue, 08 Nov 2011)</p><p>BRANCH COMMIT<br>
<br>
Optimizations to transport code in non-hydrostatic core.<br>
<br>
<br>
M    src/core_nhyd_atmos/module_mpas_core.F<br>
M    src/core_nhyd_atmos/Registry<br>
M    src/core_nhyd_atmos/module_time_integration.F<br>
</p><hr noshade><pre><font color="gray">Modified: branches/atmos_physics/src/core_nhyd_atmos/Registry
===================================================================
--- branches/atmos_physics/src/core_nhyd_atmos/Registry        2011-11-08 14:33:59 UTC (rev 1179)
+++ branches/atmos_physics/src/core_nhyd_atmos/Registry        2011-11-08 18:03:39 UTC (rev 1180)
@@ -269,9 +269,14 @@
 var persistent real    rho_p_save ( nVertLevels nCells Time ) 1 - rho_p_save diag - -
 
 # Space needed for advection
-var persistent real    deriv_two ( FIFTEEN TWO nEdges ) 0 iro deriv_two mesh - -
-var persistent integer advCells ( TWENTYONE nCells ) 0 iro advCells mesh - -
+var persistent real    deriv_two ( FIFTEEN TWO nEdges ) 0 ir deriv_two mesh - -
+var persistent integer advCells ( TWENTYONE nCells ) 0 ir advCells mesh - -
+var persistent real    adv_coefs ( FIFTEEN nEdges ) 0 - adv_coefs mesh - -
+var persistent real    adv_coefs_3rd ( FIFTEEN nEdges ) 0 - adv_coefs_3rd mesh - -
+var persistent integer advCellsForEdge ( FIFTEEN nEdges ) 0 - advCellsForEdge mesh - -
+var persistent integer nAdvCellsForEdge ( nEdges ) 0 - nAdvCellsForEdge mesh - -
 
+
 # Space needed for deformation calculation weights
 var persistent real    defc_a ( maxEdges nCells ) 0 iro defc_a mesh - -
 var persistent real    defc_b ( maxEdges nCells ) 0 iro defc_b mesh - -

Modified: branches/atmos_physics/src/core_nhyd_atmos/module_mpas_core.F
===================================================================
--- branches/atmos_physics/src/core_nhyd_atmos/module_mpas_core.F        2011-11-08 14:33:59 UTC (rev 1179)
+++ branches/atmos_physics/src/core_nhyd_atmos/module_mpas_core.F        2011-11-08 18:03:39 UTC (rev 1180)
@@ -254,6 +254,8 @@
                                                   maxval(mesh % meshScalingDel2 % array(1:mesh%nEdges))
       write(0,*) 'min/max of meshScalingDel4 = ', minval(mesh % meshScalingDel4 % array(1:mesh%nEdges)), &amp;
                                                   maxval(mesh % meshScalingDel4 % array(1:mesh%nEdges))
+
+      call adv_coef_compression(mesh)
    
    end subroutine mpas_init_block
    
@@ -597,4 +599,158 @@
 
    end subroutine compute_pgf_coefs
 
+
+   subroutine adv_coef_compression( grid )
+
+      implicit none
+
+      type (mesh_type), intent(inout) :: grid
+
+
+      real (kind=RKIND), dimension(:,:,:), pointer :: deriv_two
+      real (kind=RKIND), dimension(:,:), pointer :: adv_coefs, adv_coefs_3rd
+      integer, dimension(:,:), pointer :: cellsOnCell, cellsOnEdge, advCellsForEdge
+      integer, dimension(:), pointer :: nEdgesOnCell, nAdvCellsForEdge
+
+      integer :: cell1, cell2, iEdge, n, i, j, j_in, iCell
+      integer :: cell_list(20), ordered_cell_list(20)
+      logical :: addcell
+
+      deriv_two =&gt; grid % deriv_two % array
+      adv_coefs =&gt; grid % adv_coefs % array
+      adv_coefs_3rd =&gt; grid % adv_coefs_3rd % array
+      cellsOnCell =&gt; grid % cellsOnCell % array
+      cellsOnEdge =&gt; grid % cellsOnEdge % array
+      advCellsForEdge =&gt; grid % advCellsForEdge % array
+      nEdgesOnCell =&gt; grid % nEdgesOnCell % array
+      nAdvCellsForEdge =&gt; grid % nAdvCellsForEdge % array
+
+      do iEdge = 1, grid % nEdges
+         nAdvCellsForEdge(iEdge) = 0
+         cell1 = cellsOnEdge(1,iEdge)
+         cell2 = cellsOnEdge(2,iEdge)
+         !
+         ! do only if this edge flux is needed to update owned cells
+         !
+         if (cell1 &lt;= grid%nCells .or. cell2 &lt;= grid%nCells) then

+            cell_list(1) = cell1
+            cell_list(2) = cell2
+            n = 2 
+  
+          !  add cells surrounding cell 1.  n is number of cells currently in list
+            do i = 1, nEdgesOnCell(cell1)
+               if (cellsOnCell(i,cell1) /= cell2) then
+                  n = n + 1
+                  cell_list(n) = cellsOnCell(i,cell1)
+               end if
+            end do
+  
+          !  add cells surrounding cell 2 (brute force approach)
+            do iCell = 1, nEdgesOnCell(cell2)
+               addcell = .true.
+               do i=1,n
+                  if (cell_list(i) == cellsOnCell(iCell,cell2)) addcell = .false.
+               end do
+               if (addcell) then
+                  n = n+1
+                  cell_list(n) = cellsOnCell(iCell,cell2)
+               end if
+            end do
+  
+          ! order the list by increasing cell number (brute force approach)
+  
+            do i=1,n
+               ordered_cell_list(i) = grid % nCells + 2
+               j_in = 1
+               do j=1,n
+                  if (ordered_cell_list(i) &gt; cell_list(j) ) then
+                     j_in = j
+                     ordered_cell_list(i) = cell_list(j)
+                  end if
+               end do
+!               ordered_cell_list(i) = cell_list(j_in)
+               cell_list(j_in) = grid % nCells + 3
+            end do
+  
+            nAdvCellsForEdge(iEdge) = n
+            do iCell = 1, nAdvCellsForEdge(iEdge)
+               advCellsForEdge(iCell,iEdge) = ordered_cell_list(iCell)
+            end do
+  
+          ! we have the ordered list, now construct coefficients
+  
+            adv_coefs(:,iEdge) = 0.
+            adv_coefs_3rd(:,iEdge) = 0.
+          
+          ! pull together third and fourth order contributions to the flux
+          ! first from cell1
+  
+            j_in = 0
+            do j=1, n
+               if( ordered_cell_list(j) == cell1 ) j_in = j
+            end do
+            adv_coefs    (j_in,iEdge) = adv_coefs    (j_in,iEdge) + deriv_two(1,1,iEdge)
+            adv_coefs_3rd(j_in,iEdge) = adv_coefs_3rd(j_in,iEdge) + deriv_two(1,1,iEdge)
+  
+            do iCell = 1, nEdgesOnCell(cell1)
+               j_in = 0
+               do j=1, n
+                 if( ordered_cell_list(j) == cellsOnCell(iCell,cell1) ) j_in = j
+               end do
+               adv_coefs    (j_in,iEdge) = adv_coefs    (j_in,iEdge) + deriv_two(iCell+1,1,iEdge)
+               adv_coefs_3rd(j_in,iEdge) = adv_coefs_3rd(j_in,iEdge) + deriv_two(iCell+1,1,iEdge)
+            end do
+  
+          ! pull together third and fourth order contributions to the flux
+          ! now from cell2
+  
+            j_in = 0
+            do j=1, n
+               if( ordered_cell_list(j) == cell2 ) j_in = j
+            end do
+            adv_coefs    (j_in,iEdge) = adv_coefs    (j_in,iEdge) + deriv_two(1,2,iEdge)
+            adv_coefs_3rd(j_in,iEdge) = adv_coefs_3rd(j_in,iEdge) - deriv_two(1,2,iEdge)
+  
+            do iCell = 1, nEdgesOnCell(cell2)
+               j_in = 0
+               do j=1, n
+                  if( ordered_cell_list(j) == cellsOnCell(iCell,cell2) ) j_in = j
+               end do
+               adv_coefs    (j_in,iEdge) = adv_coefs    (j_in,iEdge) + deriv_two(iCell+1,2,iEdge)
+               adv_coefs_3rd(j_in,iEdge) = adv_coefs_3rd(j_in,iEdge) - deriv_two(iCell+1,2,iEdge)
+            end do
+  
+            do j = 1,n
+               adv_coefs    (j,iEdge) = - (grid % dcEdge % array (iEdge) **2) * adv_coefs    (j,iEdge) / 12.
+               adv_coefs_3rd(j,iEdge) = - (grid % dcEdge % array (iEdge) **2) * adv_coefs_3rd(j,iEdge) / 12.
+            end do
+  
+          ! 2nd order centered contribution - place this in the main flux weights
+  
+            j_in = 0
+            do j=1, n
+               if( ordered_cell_list(j) == cell1 ) j_in = j
+            end do
+            adv_coefs(j_in,iEdge) = adv_coefs(j_in,iEdge) + 0.5
+  
+            j_in = 0
+            do j=1, n
+               if( ordered_cell_list(j) == cell2 ) j_in = j
+            end do
+            adv_coefs(j_in,iEdge) = adv_coefs(j_in,iEdge) + 0.5
+  
+          !  multiply by edge length - thus the flux is just dt*ru times the results of the vector-vector multiply
+  
+            do j=1,n
+               adv_coefs    (j,iEdge) = grid % dvEdge % array(iEdge) * adv_coefs    (j,iEdge)
+               adv_coefs_3rd(j,iEdge) = grid % dvEdge % array(iEdge) * adv_coefs_3rd(j,iEdge)
+            end do

+         end if  ! only do for edges of owned-cells
+         
+      end do ! end loop over edges
+
+   end subroutine adv_coef_compression
+
 end module mpas_core

Modified: branches/atmos_physics/src/core_nhyd_atmos/module_time_integration.F
===================================================================
--- branches/atmos_physics/src/core_nhyd_atmos/module_time_integration.F        2011-11-08 14:33:59 UTC (rev 1179)
+++ branches/atmos_physics/src/core_nhyd_atmos/module_time_integration.F        2011-11-08 18:03:39 UTC (rev 1180)
@@ -1016,13 +1016,12 @@
 
       integer :: iCell, iEdge, k, cell1, cell2
       integer :: nVertLevels, nCells, nCellsSolve, nEdges, nEdgesSolve
-      real (kind=RKIND) :: rcv, p0, cf1, cf2, cf3, flux
+      real (kind=RKIND) :: rcv, p0, cf1, cf2, cf3, flux, coef_3rd_order
 
 !      logical, parameter :: debug=.true.
       logical, parameter :: debug=.false.
 
 !---
-
        wwAvg =&gt; diag % wwAvg % array
        rw_save =&gt; diag % rw_save % array
        rw =&gt; diag % rw % array
@@ -1078,21 +1077,14 @@
        cf1 = grid % cf1 % scalar
        cf2 = grid % cf2 % scalar
        cf3 = grid % cf3 % scalar
+       coef_3rd_order = config_coef_3rd_order
+       if(config_theta_adv_order /=3) coef_3rd_order = 0
 
       ! compute new density everywhere so we can compute u from ru.
       ! we will also need it to compute theta_m below
 
       do iCell = 1, nCells
 
-        if(debug) then
-          if( iCell == 479 ) then
-             write(0,*) ' k,rho_old,rp_old, rho_pp '
-            do k=1,nVertLevels
-              write(0,*) k, rho_zz(k,iCell) ,rho_p(k,iCell), rho_pp(k,iCell)
-            enddo
-          end if
-        end if
-
         do k = 1, nVertLevels
 
           rho_p(k,iCell) = rho_p(k,iCell) + rho_pp(k,iCell)
@@ -1103,18 +1095,9 @@
       !  recover owned-cell values in block
 
 !        if( iCell &lt;= nCellsSolve ) then    ! If using this test, more halo exchanges will be needed
-!  WCS-parallel: do we know about this? (OK)
+!  WCS-parallel: OK
 
 
-          if(debug) then
-          if( iCell == 479 ) then
-             write(0,*) ' k, rw, rw_save, rw_p '
-            do k=1,nVertLevels
-              write(0,*) k, rw(k,iCell), rw_save(k,iCell) ,rw_p(k,iCell)
-            enddo
-          end if
-          end if
-
           w(1,iCell) = 0.
           do k = 2, nVertLevels
             wwAvg(k,iCell) = rw(k,iCell) + (wwAvg(k,iCell) / float(ns))
@@ -1128,25 +1111,18 @@
           end do
           w(nVertLevels+1,iCell) = 0.
 
-          if(debug) then
-          if( iCell == 479 ) then
-             write(0,*) ' k, rtheta_p_save, rtheta_pp, rtheta_base '
-            do k=1,nVertLevels
-              write(0,*) k, rtheta_p_save(k,iCell), rtheta_pp(k,iCell), rtheta_base(k,iCell)
-            enddo
-          end if
-          end if
-
-          do k = 1, nVertLevels
-
-            if (rk_step==3) then
+          if(rk_step == 3) then
+            do k = 1, nVertLevels
                rtheta_p(k,iCell) = rtheta_p(k,iCell) + rtheta_pp(k,iCell) &amp;
                                  - dt * rho_zz(k,iCell) * rt_diabatic_tend(k,iCell)
-            else
+            end do
+          else
+            do k = 1, nVertLevels
                rtheta_p(k,iCell) = rtheta_p(k,iCell) + rtheta_pp(k,iCell)
-            end if
+            end do
+          end if
 
-
+          do k = 1, nVertLevels
             theta_m(k,iCell) = (rtheta_p(k,iCell) + rtheta_base(k,iCell))/rho_zz(k,iCell)
             exner(k,iCell) = (zz(k,iCell)*(rgas/p0)*(rtheta_p(k,iCell)+rtheta_base(k,iCell)))**rcv
              ! pressure below is perturbation pressure
@@ -1154,8 +1130,6 @@
                                                           * (exner(k,iCell)-exner_base(k,iCell)))
           end do
 
-!        end if
-
          !calculation of the surface pressure:
          surface_pressure(iCell) = 0.5*gravity/rdzw(1)                                              &amp;
                        * (1.25* rho_zz(1,iCell) * (1. + qvapor(1,iCell))  &amp;
@@ -1193,27 +1167,17 @@
 
           !SHP-mtn
           flux = cf1*ru(1,iEdge) + cf2*ru(2,iEdge) + cf3*ru(3,iEdge)
-          w(1,cell2) = w(1,cell2) - zb(1,2,iEdge)*flux/(cf1*rho_zz(1,cell2)+cf2*rho_zz(2,cell2)+cf3*rho_zz(3,cell2))
-          w(1,cell1) = w(1,cell1) + zb(1,1,iEdge)*flux/(cf1*rho_zz(1,cell1)+cf2*rho_zz(2,cell1)+cf3*rho_zz(3,cell1))
-!3rd order stencil
-          if (config_theta_adv_order == 3) then
-             w(1,cell2) = w(1,cell2) - sign(1.,flux)*config_coef_3rd_order      &amp;
-                                      *zb3(1,2,iEdge)*flux/(cf1*rho_zz(1,cell2)+cf2*rho_zz(2,cell2)+cf3*rho_zz(3,cell2))
-             w(1,cell1) = w(1,cell1) + sign(1.,flux)*config_coef_3rd_order      &amp;
-                                      *zb3(1,1,iEdge)*flux/(cf1*rho_zz(1,cell1)+cf2*rho_zz(2,cell1)+cf3*rho_zz(3,cell1))
-          end if
+          w(1,cell2) = w(1,cell2) - (zb(1,2,iEdge) + sign(1.,flux)*coef_3rd_order*zb3(1,2,iEdge))  &amp;
+                                 *flux/(cf1*rho_zz(1,cell2)+cf2*rho_zz(2,cell2)+cf3*rho_zz(3,cell2))
+          w(1,cell1) = w(1,cell1) + (zb(1,1,iEdge) + sign(1.,flux)*coef_3rd_order*zb3(1,1,iEdge))  &amp;
+                                 *flux/(cf1*rho_zz(1,cell1)+cf2*rho_zz(2,cell1)+cf3*rho_zz(3,cell1))
 
           do k = 2, nVertLevels
             flux = (fzm(k)*ru(k,iEdge)+fzp(k)*ru(k-1,iEdge))
-            w(k,cell2) = w(k,cell2) - zb(k,2,iEdge)*flux/(fzm(k)*rho_zz(k,cell2)+fzp(k)*rho_zz(k-1,cell2))
-            w(k,cell1) = w(k,cell1) + zb(k,1,iEdge)*flux/(fzm(k)*rho_zz(k,cell1)+fzp(k)*rho_zz(k-1,cell1))
-!3rd order! stencil
-            if (config_theta_adv_order ==3) then
-               w(k,cell2) = w(k,cell2) - sign(1.,flux)*config_coef_3rd_order    &amp;
-                                        *zb3(k,2,iEdge)*flux/(fzm(k)*rho_zz(k,cell2)+fzp(k)*rho_zz(k-1,cell2)) 
-               w(k,cell1) = w(k,cell1) + sign(1.,flux)*config_coef_3rd_order    &amp;
-                                        *zb3(k,1,iEdge)*flux/(fzm(k)*rho_zz(k,cell1)+fzp(k)*rho_zz(k-1,cell1)) 
-            end if
+            w(k,cell2) = w(k,cell2) - (zb(k,2,iEdge)+sign(1.,flux)*coef_3rd_order*zb3(k,2,iEdge)) &amp;
+                                 *flux/(fzm(k)*rho_zz(k,cell2)+fzp(k)*rho_zz(k-1,cell2))
+            w(k,cell1) = w(k,cell1) + (zb(k,1,iEdge)+sign(1.,flux)*coef_3rd_order*zb3(k,1,iEdge)) &amp;
+                                 *flux/(fzm(k)*rho_zz(k,cell1)+fzp(k)*rho_zz(k-1,cell1))
           enddo
 
 !        end if
@@ -1243,7 +1207,8 @@
       real (kind=RKIND) :: dt
 
       integer :: i, iCell, iEdge, k, iScalar, cell1, cell2
-      real (kind=RKIND) :: flux, scalar_edge, d2fdx2_cell1, d2fdx2_cell2
+      real (kind=RKIND) :: flux, scalar_edge, d2fdx2_cell1, d2fdx2_cell2, scalar_weight
+      real (kind=RKIND) :: scalar_weight_cell1, scalar_weight_cell2
 
       real (kind=RKIND), dimension(:,:,:), pointer :: scalar_old, scalar_new, scalar_tend
       real (kind=RKIND), dimension(:,:,:), pointer :: deriv_two
@@ -1251,6 +1216,13 @@
       real (kind=RKIND), dimension(:), pointer :: dvEdge, dcEdge, areaCell, qv_init
       integer, dimension(:,:), pointer :: cellsOnEdge
 
+      integer, dimension(:,:), pointer :: advCellsForEdge
+      integer, dimension(:), pointer :: nAdvCellsForEdge
+      real (kind=RKIND), dimension(:,:), pointer :: adv_coefs, adv_coefs_3rd
+      real (kind=RKIND), dimension( s_old % num_scalars, grid % nVertLevels ) :: flux_arr
+
+      real (kind=RKIND), dimension( s_old % num_scalars, grid % nVertLevels ) :: d2fdx2_cell1_arr, d2fdx2_cell2_arr
+
       real (kind=RKIND), dimension( s_old % num_scalars, grid % nVertLevels + 1 ) :: wdtn
       integer :: nVertLevels
 
@@ -1262,6 +1234,8 @@
       real (kind=RKIND) :: flux3, flux4
       real (kind=RKIND) :: q_im2, q_im1, q_i, q_ip1, ua, coef3
 
+      integer, parameter :: hadv_opt = 2
+
       flux4(q_im2, q_im1, q_i, q_ip1, ua) =                     &amp;
           ua*( 7.*(q_i + q_im1) - (q_ip1 + q_im2) )/12.0
 
@@ -1269,38 +1243,33 @@
                 flux4(q_im2, q_im1, q_i, q_ip1, ua) +           &amp;
                 coef3*abs(ua)*((q_ip1 - q_im2)-3.*(q_i-q_im1))/12.0
 
-!      coef_3rd_order = 0.
-!      if (config_scalar_adv_order == 3) coef_3rd_order = 1.0
-!      if (config_scalar_adv_order == 3 .and. config_monotonic) coef_3rd_order = 0.25
-
       coef_3rd_order = config_coef_3rd_order
 
       scalar_old  =&gt; s_old % scalars % array
       scalar_new  =&gt; s_new % scalars % array
       kdiff       =&gt; diag % kdiff % array
       deriv_two   =&gt; grid % deriv_two % array
-!****      uhAvg       =&gt; grid % uhAvg % array
       uhAvg       =&gt; diag % ruAvg % array
       dvEdge      =&gt; grid % dvEdge % array
       dcEdge      =&gt; grid % dcEdge % array
       cellsOnEdge =&gt; grid % cellsOnEdge % array
       scalar_tend =&gt; tend % scalars % array
-!****      h_old       =&gt; s_old % h % array
-!****      h_new       =&gt; s_new % h % array
       h_old       =&gt; s_old % rho_zz % array
       h_new       =&gt; s_new % rho_zz % array
       wwAvg       =&gt; diag % wwAvg % array
       areaCell    =&gt; grid % areaCell % array
 
-!****      fnm         =&gt; grid % fnm % array
-!****      fnp         =&gt; grid % fnp % array
-!****      rdnw        =&gt; grid % rdnw % array
       fnm         =&gt; grid % fzm % array
       fnp         =&gt; grid % fzp % array
       rdnw        =&gt; grid % rdzw % array
       meshScalingDel2 =&gt; grid % meshScalingDel2 % array
       meshScalingDel4 =&gt; grid % meshScalingDel4 % array
 
+      nAdvCellsForEdge =&gt; grid % nAdvCellsForEdge % array
+      advCellsForEdge =&gt; grid % advCellsForEdge % array
+      adv_coefs =&gt; grid % adv_coefs % array
+      adv_coefs_3rd =&gt; grid % adv_coefs_3rd % array
+
       nVertLevels = grid % nVertLevels
 
       h_theta_eddy_visc2 = config_h_theta_eddy_visc2
@@ -1320,250 +1289,126 @@
       !
       !  horizontal flux divergence, accumulate in scalar_tend
 
-      if (config_scalar_adv_order == 2) then
-
          do iEdge=1,grid%nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
-            if (cell1 &lt;= grid%nCells .and. cell2 &lt;= grid%nCells) then
-               do k=1,grid % nVertLevels
-                  do iScalar=1,s_old % num_scalars
-                     scalar_edge = 0.5 * (scalar_new(iScalar,k,cell1) + scalar_new(iScalar,k,cell2))
-                     flux = uhAvg(k,iEdge) * dvEdge(iEdge)  * scalar_edge
-                     scalar_tend(iScalar,k,cell1) = scalar_tend(iScalar,k,cell1) - flux/areaCell(cell1)
-                     scalar_tend(iScalar,k,cell2) = scalar_tend(iScalar,k,cell2) + flux/areaCell(cell2)
-                  end do 
-               end do 
-            end if
-         end do 
-
-      else if (config_scalar_adv_order == 3) then
-
-         do iEdge=1,grid%nEdges
-            cell1 = cellsOnEdge(1,iEdge)
-            cell2 = cellsOnEdge(2,iEdge)
-            if (cell1 &lt;= grid%nCells .and. cell2 &lt;= grid%nCells) then
+            if (cell1 &lt;= grid%nCellsSolve .or. cell2 &lt;= grid%nCellsSolve) then  ! only for owned cells
   
-               do k=1,grid % nVertLevels
-   
-                  do iScalar=1,s_old % num_scalars
-                     d2fdx2_cell1 = deriv_two(1,1,iEdge) * scalar_new(iScalar,k,cell1)
-                     d2fdx2_cell2 = deriv_two(1,2,iEdge) * scalar_new(iScalar,k,cell2)
-                     do i=1, grid % nEdgesOnCell % array (cell1)
-                        if ( grid % CellsOnCell % array (i,cell1) &lt;= grid%nCells) &amp;
-                        d2fdx2_cell1 = d2fdx2_cell1 + &amp;
-                                       deriv_two(i+1,1,iEdge) * scalar_new(iScalar,k,grid % CellsOnCell % array (i,cell1))
-                     end do
-                     do i=1, grid % nEdgesOnCell % array (cell2)
-                        if ( grid % CellsOnCell % array (i,cell2) &lt;= grid%nCells) &amp;
-                        d2fdx2_cell2 = d2fdx2_cell2 + &amp;
-                                       deriv_two(i+1,2,iEdge) * scalar_new(iScalar,k,grid % CellsOnCell % array (i,cell2))
-                     end do

-                     if (uhAvg(k,iEdge) &gt; 0) then
-                        flux = dvEdge(iEdge) * uhAvg(k,iEdge) * (          &amp;
-                                               0.5*(scalar_new(iScalar,k,cell1) + scalar_new(iScalar,k,cell2))      &amp;
-                                                -(dcEdge(iEdge) **2) * (d2fdx2_cell1 + d2fdx2_cell2) / 12.          &amp;
-                                                -(dcEdge(iEdge) **2) * coef_3rd_order*(d2fdx2_cell1 - d2fdx2_cell2) / 12. )
-                     else
-                        flux = dvEdge(iEdge) *  uhAvg(k,iEdge) * (          &amp;
-                                               0.5*(scalar_new(iScalar,k,cell1) + scalar_new(iScalar,k,cell2))      &amp;
-                                                -(dcEdge(iEdge) **2) * (d2fdx2_cell1 + d2fdx2_cell2) / 12.          &amp;
-                                                +(dcEdge(iEdge) **2) * coef_3rd_order*(d2fdx2_cell1 - d2fdx2_cell2) / 12. )
-                     end if
+               flux_arr(:,:) = 0.
+               do i=1,nAdvCellsForEdge(iEdge)
+                 iCell = advCellsForEdge(i,iEdge)
+                 do k=1,grid % nVertLevels
+                   scalar_weight = adv_coefs(i,iEdge) + coef_3rd_order*sign(1.,uhAvg(k,iEdge))*adv_coefs_3rd(i,iEdge)
+                   do iScalar=1,s_old % num_scalars
+                     flux_arr(iScalar,k) = flux_arr(iScalar,k) + scalar_weight* scalar_new(iScalar,k,iCell)
+                   end do
+                 end do
+               end do
 
-                     scalar_tend(iScalar,k,cell1) = scalar_tend(iScalar,k,cell1) - flux/areaCell(cell1)
-                     scalar_tend(iScalar,k,cell2) = scalar_tend(iScalar,k,cell2) + flux/areaCell(cell2)
-  
-                  end do 
-               end do 
-            end if
-         end do 
-
-      else  if (config_scalar_adv_order == 4) then
-
-         do iEdge=1,grid%nEdges
-            cell1 = cellsOnEdge(1,iEdge)
-            cell2 = cellsOnEdge(2,iEdge)
-            if (cell1 &lt;= grid%nCells .and. cell2 &lt;= grid%nCells) then
-
                do k=1,grid % nVertLevels
-   
-                  do iScalar=1,s_old % num_scalars
-                     d2fdx2_cell1 = deriv_two(1,1,iEdge) * scalar_new(iScalar,k,cell1)
-                     d2fdx2_cell2 = deriv_two(1,2,iEdge) * scalar_new(iScalar,k,cell2)
-                     do i=1, grid % nEdgesOnCell % array (cell1)
-                        if ( grid % CellsOnCell % array (i,cell1) &lt;= grid%nCells) &amp;
-                           d2fdx2_cell1 = d2fdx2_cell1 + &amp;
-                                          deriv_two(i+1,1,iEdge) * scalar_new(iScalar,k,grid % CellsOnCell % array (i,cell1))
-                     end do
-                     do i=1, grid % nEdgesOnCell % array (cell2)
-                        if ( grid % CellsOnCell % array (i,cell2) &lt;= grid%nCells) &amp;
-                        d2fdx2_cell2 = d2fdx2_cell2 + &amp;
-                                       deriv_two(i+1,2,iEdge) * scalar_new(iScalar,k,grid % CellsOnCell % array (i,cell2))
-                     end do
-       
-                     flux = dvEdge(iEdge) *  uhAvg(k,iEdge) * (          &amp;
-                                            0.5*(scalar_new(iScalar,k,cell1) + scalar_new(iScalar,k,cell2))      &amp;
-                                             -(dcEdge(iEdge) **2) * (d2fdx2_cell1 + d2fdx2_cell2) / 12. )
-       
-                     scalar_tend(iScalar,k,cell1) = scalar_tend(iScalar,k,cell1) - flux/areaCell(cell1)
-                     scalar_tend(iScalar,k,cell2) = scalar_tend(iScalar,k,cell2) + flux/areaCell(cell2)
-                  end do 
-               end do 
-            end if

-         end do
-      end if
-
-!  horizontal mixing for scalars - we could combine this with transport...
-
-!ldf (04-05-2011): do not do horizontal or vertical mixing if advection of scalars is set to be monotic.
-      if(.not. config_monotonic) then
-
-      if ( h_theta_eddy_visc2 &gt; 0.0 ) then
-
-         do iEdge=1,grid % nEdges
-            cell1 = grid % cellsOnEdge % array(1,iEdge)
-            cell2 = grid % cellsOnEdge % array(2,iEdge)
-            if (cell1 &lt;= grid%nCells .and. cell2 &lt;= grid%nCells) then
-
-               do k=1,grid % nVertLevels
-                  do iScalar=1,s_old % num_scalars
-                    scalar_turb_flux = h_theta_eddy_visc2*prandtl*  &amp;
-                                        (scalar_new(iScalar,k,cell2) - scalar_new(iScalar,k,cell1))/dcEdge(iEdge)
-                    scalar_turb_flux = scalar_turb_flux * meshScalingDel2(iEdge)
-                    flux = dvEdge (iEdge) * rho_edge(k,iEdge) * scalar_turb_flux
-                    scalar_tend(iScalar,k,cell1) = scalar_tend(iScalar,k,cell1) + flux/areaCell(cell1)
-                    scalar_tend(iScalar,k,cell2) = scalar_tend(iScalar,k,cell2) - flux/areaCell(cell2)
-                  end do
+               do iScalar=1,s_old % num_scalars
+                     scalar_tend(iScalar,k,cell1) = scalar_tend(iScalar,k,cell1) &amp;
+                            - uhAvg(k,iEdge)*flux_arr(iScalar,k)/areaCell(cell1)
+                     scalar_tend(iScalar,k,cell2) = scalar_tend(iScalar,k,cell2) &amp;
+                            + uhAvg(k,iEdge)*flux_arr(iScalar,k)/areaCell(cell2)
                end do
+               end do
 
             end if
-         end do
+          end do
 
-      else if ( config_horiz_mixing == &quot;2d_smagorinsky&quot;) then
+      !
+      !  vertical flux divergence
+      !
 
-         do iEdge=1,grid % nEdges
-            cell1 = grid % cellsOnEdge % array(1,iEdge)
-            cell2 = grid % cellsOnEdge % array(2,iEdge)
-            if (cell1 &lt;= grid%nCells .and. cell2 &lt;= grid%nCells) then
+      ! zero fluxes at top and bottom
 
-               do k=1,grid % nVertLevels
-                  do iScalar=1,s_old % num_scalars
-                    scalar_turb_flux = 0.5*(kdiff(k,cell1)+kdiff(k,cell2))*prandtl*  &amp;
-                                        (scalar_new(iScalar,k,cell2) - scalar_new(iScalar,k,cell1))/dcEdge(iEdge)
-                    scalar_turb_flux = scalar_turb_flux * meshScalingDel2(iEdge)
-                    flux = dvEdge (iEdge) * rho_edge(k,iEdge) * scalar_turb_flux
-                    scalar_tend(iScalar,k,cell1) = scalar_tend(iScalar,k,cell1) + flux/areaCell(cell1)
-                    scalar_tend(iScalar,k,cell2) = scalar_tend(iScalar,k,cell2) - flux/areaCell(cell2)
-                  end do
-               end do
+         wdtn(:,1) = 0.
+         wdtn(:,grid % nVertLevels+1) = 0.
 
-            end if
-         end do
+         if (config_scalar_vadv_order == 2) then
 
-      end if
+           do iCell=1,grid % nCellsSolve
+             do k = 2, nVertLevels
+                do iScalar=1,s_old % num_scalars
+                  wdtn(iScalar,k) = wwAvg(k,iCell)*(fnm(k)*scalar_new(iScalar,k,iCell)+fnp(k)*scalar_new(iScalar,k-1,iCell))
+                end do
+             end do
+             do k=1,grid % nVertLevelsSolve
+                do iScalar=1,s_old % num_scalars
+                  scalar_new(iScalar,k,iCell) = (   scalar_old(iScalar,k,iCell)*h_old(k,iCell) &amp;
+                        + dt*( scalar_tend(iScalar,k,iCell) -rdnw(k)*(wdtn(iScalar,k+1)-wdtn(iScalar,k)) ) )/h_new(k,iCell)
+                end do
+             end do
+           end do
 
-      ! vertical mixing
+         else if ( config_scalar_vadv_order == 3 ) then
 
-      if ( v_theta_eddy_visc2 &gt; 0.0 ) then
+           do iCell=1,grid % nCellsSolve
 
-         do iCell = 1, grid % nCellsSolve
-            do k=2,nVertLevels-1
-               z1 = zgrid(k-1,iCell)
-               z2 = zgrid(k  ,iCell)
-               z3 = zgrid(k+1,iCell)
-               z4 = zgrid(k+2,iCell)
-
-               zm = 0.5*(z1+z2)
-               z0 = 0.5*(z2+z3)
-               zp = 0.5*(z3+z4)
-
+             k = 2
+             do iScalar=1,s_old % num_scalars
+               wdtn(iScalar,k) = wwAvg(k,iCell)*(fnm(k)*scalar_new(iScalar,k,iCell)+fnp(k)*scalar_new(iScalar,k-1,iCell))
+             enddo
+             
+             do k=3,nVertLevels-1
                do iScalar=1,s_old % num_scalars
-                 scalar_tend(iScalar,k,iCell) = scalar_tend(iScalar,k,iCell) + v_theta_eddy_visc2*prandtl*rho_zz(k,iCell)*(&amp;
-                                        (scalar_new(iScalar,k+1,iCell)-scalar_new(iScalar,k  ,iCell))/(zp-z0)                 &amp;
-                                       -(scalar_new(iScalar,k  ,iCell)-scalar_new(iScalar,k-1,iCell))/(z0-zm) )/(0.5*(zp-zm))
+                 wdtn(iScalar,k) = flux3( scalar_new(iScalar,k-2,iCell),scalar_new(iScalar,k-1,iCell),  &amp;
+                                          scalar_new(iScalar,k  ,iCell),scalar_new(iScalar,k+1,iCell),  &amp;
+                                          wwAvg(k,iCell), coef_3rd_order )
                end do
              end do
+             k = nVertLevels
+             do iScalar=1,s_old % num_scalars
+               wdtn(iScalar,k) = wwAvg(k,iCell)*(fnm(k)*scalar_new(iScalar,k,iCell)+fnp(k)*scalar_new(iScalar,k-1,iCell))
+             enddo
 
-             if ( .not. config_mix_full) then
-             iScalar = s_new % index_qv
-               do k=2,nVertLevels-1
-                z1 = zgrid(k-1,iCell)
-                z2 = zgrid(k  ,iCell)
-                z3 = zgrid(k+1,iCell)
-                z4 = zgrid(k+2,iCell)
+             do k=1,grid % nVertLevelsSolve
+                do iScalar=1,s_old % num_scalars
+                  scalar_new(iScalar,k,iCell) = (   scalar_old(iScalar,k,iCell)*h_old(k,iCell) &amp;
+                        + dt*( scalar_tend(iScalar,k,iCell) -rdnw(k)*(wdtn(iScalar,k+1)-wdtn(iScalar,k)) ) )/h_new(k,iCell)
+                end do
+             end do
 
-                zm = 0.5*(z1+z2)
-                z0 = 0.5*(z2+z3)
-                zp = 0.5*(z3+z4)
+           end do
 
-                 scalar_tend(iScalar,k,iCell) = scalar_tend(iScalar,k,iCell) + v_theta_eddy_visc2*prandtl*rho_zz(k,iCell)*(&amp;
-                                        (-qv_init(k+1)+qv_init(k))/(zp-z0) &amp;
-                                       -(-qv_init(k)+qv_init(k-1))/(z0-zm) )/(0.5*(zp-zm))
-               end do
-             end if
+         else if ( config_scalar_vadv_order == 4 ) then
 
-         end do
+           do iCell=1,grid % nCellsSolve
 
-         end if
-         
-         end if !config_monotonic
-
-      !
-      !  vertical flux divergence
-      !
-
-      do iCell=1,grid % nCells
-
-         wdtn(:,1) = 0.
-         if (config_scalar_vadv_order == 2) then
-           do k = 2, nVertLevels
-              do iScalar=1,s_old % num_scalars
-                wdtn(iScalar,k) = wwAvg(k,iCell)*(fnm(k)*scalar_new(iScalar,k,iCell)+fnp(k)*scalar_new(iScalar,k-1,iCell))
-              end do
-           end do
-         else 
-           k = 2
-           do iScalar=1,s_old % num_scalars
-             wdtn(iScalar,k) = wwAvg(k,iCell)*(fnm(k)*scalar_new(iScalar,k,iCell)+fnp(k)*scalar_new(iScalar,k-1,iCell))
-           enddo
-           if ( config_scalar_vadv_order == 3 ) then
-             do k=3,nVertLevels-1
+             k = 2
              do iScalar=1,s_old % num_scalars
-               wdtn(iScalar,k) = flux3( scalar_new(iScalar,k-2,iCell),scalar_new(iScalar,k-1,iCell),  &amp;
-                                        scalar_new(iScalar,k  ,iCell),scalar_new(iScalar,k+1,iCell),  &amp;
-                                        wwAvg(k,iCell), coef_3rd_order )
-             end do
-             end do
-           else if ( config_scalar_vadv_order == 4 ) then
+               wdtn(iScalar,k) = wwAvg(k,iCell)*(fnm(k)*scalar_new(iScalar,k,iCell)+fnp(k)*scalar_new(iScalar,k-1,iCell))
+             enddo
              do k=3,nVertLevels-1
+               do iScalar=1,s_old % num_scalars
+                 wdtn(iScalar,k) = flux4( scalar_new(iScalar,k-2,iCell),scalar_new(iScalar,k-1,iCell),  &amp;
+                                          scalar_new(iScalar,k  ,iCell),scalar_new(iScalar,k+1,iCell), wwAvg(k,iCell) )
+               end do
+             end do
+             k = nVertLevels
              do iScalar=1,s_old % num_scalars
-               wdtn(iScalar,k) = flux4( scalar_new(iScalar,k-2,iCell),scalar_new(iScalar,k-1,iCell),  &amp;
-                                        scalar_new(iScalar,k  ,iCell),scalar_new(iScalar,k+1,iCell), wwAvg(k,iCell) )
+               wdtn(iScalar,k) = wwAvg(k,iCell)*(fnm(k)*scalar_new(iScalar,k,iCell)+fnp(k)*scalar_new(iScalar,k-1,iCell))
+             enddo
+
+             do k=1,grid % nVertLevelsSolve
+                do iScalar=1,s_old % num_scalars
+                  scalar_new(iScalar,k,iCell) = (   scalar_old(iScalar,k,iCell)*h_old(k,iCell) &amp;
+                        + dt*( scalar_tend(iScalar,k,iCell) -rdnw(k)*(wdtn(iScalar,k+1)-wdtn(iScalar,k)) ) )/h_new(k,iCell)
+                end do
              end do
-             end do
-           end if
-           k = nVertLevels
-           do iScalar=1,s_old % num_scalars
-             wdtn(iScalar,k) = wwAvg(k,iCell)*(fnm(k)*scalar_new(iScalar,k,iCell)+fnp(k)*scalar_new(iScalar,k-1,iCell))
-           enddo
 
-         end if
-         wdtn(:,nVertLevels+1) = 0.
-
-         do k=1,grid % nVertLevelsSolve
-            do iScalar=1,s_old % num_scalars
-              scalar_new(iScalar,k,iCell) = (   scalar_old(iScalar,k,iCell)*h_old(k,iCell) &amp;
-                    + dt*( scalar_tend(iScalar,k,iCell) -rdnw(k)*(wdtn(iScalar,k+1)-wdtn(iScalar,k)) ) )/h_new(k,iCell)
+           end do
                                                                                         
-            end do
-         end do
-      end do
+         else 
 
+           write(0,*) ' bad value for config_scalar_vadv_order - ', config_scalar_vadv_order
+
+         end if
+
    end subroutine advance_scalars
 
+!---------------------------
 
    subroutine advance_scalars_mono( tend, s_old, s_new, diag, grid, dt, rk_step, rk_order, dminfo, cellsToSend, cellsToRecv)
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
@@ -1571,9 +1416,6 @@
    ! Input: s - current model state
    !        grid - grid metadata
    !
-   ! Output: tend - computed scalar tendencies
-   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
-
       implicit none
 
       type (tend_type), intent(in) :: tend
@@ -1581,35 +1423,49 @@
       type (state_type), intent(inout) :: s_new
       type (diag_type), intent(in) :: diag
       type (mesh_type), intent(in) :: grid
+      real (kind=RKIND) :: dt
+
       integer, intent(in) :: rk_step, rk_order
-      real (kind=RKIND), intent(in) :: dt
       type (dm_info), intent(in) :: dminfo
       type (exchange_list), pointer :: cellsToSend, cellsToRecv
 
-      integer :: i, iCell, iEdge, k, iScalar, cell_upwind, cell1, cell2
-      real (kind=RKIND) :: flux, scalar_edge, d2fdx2_cell1, d2fdx2_cell2
-      real (kind=RKIND) :: fdir, flux_upwind, h_flux_upwind, s_upwind
 
-      real (kind=RKIND), dimension(:,:,:), pointer :: scalar_old, scalar_new, scalar_tend
+      integer :: i, iCell, iEdge, k, iScalar, cell1, cell2
+      real (kind=RKIND) :: flux, scalar_edge, d2fdx2_cell1, d2fdx2_cell2, scalar_weight
+      real (kind=RKIND) :: scalar_weight_cell1, scalar_weight_cell2
+
+      real (kind=RKIND), dimension(:,:,:), pointer :: scalar_old_in, scalar_new_in, scalar_tend
       real (kind=RKIND), dimension(:,:,:), pointer :: deriv_two
-      real (kind=RKIND), dimension(:,:), pointer :: uhAvg, h_old, h_new, wwAvg
-      real (kind=RKIND), dimension(:), pointer :: dvEdge, dcEdge, areaCell
+      real (kind=RKIND), dimension(:,:), pointer :: uhAvg, h_old, h_new, wwAvg, rho_edge, rho_zz, zgrid, kdiff
+      real (kind=RKIND), dimension(:), pointer :: dvEdge, dcEdge, areaCell, qv_init
       integer, dimension(:,:), pointer :: cellsOnEdge
 
-      real (kind=RKIND), dimension( s_old % num_scalars, grid % nEdges+1) :: h_flux
-      real (kind=RKIND), dimension( s_old % num_scalars, grid % nCells+1, 2 ) :: v_flux, v_flux_upwind, s_update
-      real (kind=RKIND), dimension( s_old % num_scalars, grid % nCells+1, 2 ) :: scale_out, scale_in
-      real (kind=RKIND), dimension( s_old % num_scalars ) :: s_max, s_min, s_max_update, s_min_update
+      integer, dimension(:,:), pointer :: advCellsForEdge
+      integer, dimension(:), pointer :: nAdvCellsForEdge
+      real (kind=RKIND), dimension(:,:), pointer :: adv_coefs, adv_coefs_3rd
 
-      integer :: nVertLevels, km0, km1, ktmp, kcp1, kcm1
+      real (kind=RKIND), dimension( grid % nVertLevels, grid % nCells ) :: scalar_old, scalar_new
+      real (kind=RKIND), dimension( grid % nVertLevels, grid % nCells ) :: s_max, s_min, s_update
+      real (kind=RKIND), dimension( grid % nVertLevels, grid % nCells ) :: scale_in, scale_out
 
-      real (kind=RKIND), dimension(:), pointer :: fnm, fnp, rdnw
-      real (kind=RKIND), parameter :: eps=1.e-20
+      real (kind=RKIND), dimension( grid % nVertLevels, grid % nEdges ) :: flux_arr
+      real (kind=RKIND), dimension( grid % nVertLevels + 1, grid % nCells ) :: wdtn
+
+      integer :: nVertLevels, isc, num_scalars, icellmax, kmax
+
+      real (kind=RKIND), dimension(:), pointer :: fnm, fnp, rdnw, meshScalingDel2, meshScalingDel4
       real (kind=RKIND) :: coef_3rd_order
 
-      real (kind=RKIND) :: flux3, flux4
-      real (kind=RKIND) :: q_im2, q_im1, q_i, q_ip1, ua, coef3
+      real (kind=RKIND) :: h_theta_eddy_visc2, v_theta_eddy_visc2, scalar_turb_flux, z1,z2,z3,z4,zm,z0,zp
 
+      real (kind=RKIND) :: flux3, flux4, flux_upwind
+      real (kind=RKIND) :: q_im2, q_im1, q_i, q_ip1, ua, coef3, scmin,scmax
+      real (kind=RKIND) :: s_min_update, s_max_update, s_upwind, scale_factor
+
+      integer, parameter :: hadv_opt = 2
+      real (kind=RKIND), parameter :: eps=1.e-20
+      logical, parameter :: debug_print = .false.
+
       flux4(q_im2, q_im1, q_i, q_ip1, ua) =                     &amp;
           ua*( 7.*(q_i + q_im1) - (q_ip1 + q_im2) )/12.0
 
@@ -1617,258 +1473,265 @@
                 flux4(q_im2, q_im1, q_i, q_ip1, ua) +           &amp;
                 coef3*abs(ua)*((q_ip1 - q_im2)-3.*(q_i-q_im1))/12.0
 
-!------
+      coef_3rd_order = config_coef_3rd_order
 
-      scalar_old  =&gt; s_old % scalars % array
-      scalar_new  =&gt; s_new % scalars % array
+      scalar_old_in  =&gt; s_old % scalars % array
+      scalar_new_in  =&gt; s_new % scalars % array
+      kdiff       =&gt; diag % kdiff % array
       deriv_two   =&gt; grid % deriv_two % array
-!****      uhAvg       =&gt; grid % uhAvg % array
       uhAvg       =&gt; diag % ruAvg % array
       dvEdge      =&gt; grid % dvEdge % array
       dcEdge      =&gt; grid % dcEdge % array
       cellsOnEdge =&gt; grid % cellsOnEdge % array
       scalar_tend =&gt; tend % scalars % array
-!****      h_old       =&gt; s_old % h % array
-!****      h_new       =&gt; s_new % h % array
       h_old       =&gt; s_old % rho_zz % array
       h_new       =&gt; s_new % rho_zz % array
       wwAvg       =&gt; diag % wwAvg % array
       areaCell    =&gt; grid % areaCell % array
 
-!****      fnm         =&gt; grid % fnm % array
-!****      fnp         =&gt; grid % fnp % array
-!****      rdnw        =&gt; grid % rdnw % array
       fnm         =&gt; grid % fzm % array
       fnp         =&gt; grid % fzp % array
       rdnw        =&gt; grid % rdzw % array
+      meshScalingDel2 =&gt; grid % meshScalingDel2 % array
+      meshScalingDel4 =&gt; grid % meshScalingDel4 % array
 
+      nAdvCellsForEdge =&gt; grid % nAdvCellsForEdge % array
+      advCellsForEdge =&gt; grid % advCellsForEdge % array
+      adv_coefs =&gt; grid % adv_coefs % array
+      adv_coefs_3rd =&gt; grid % adv_coefs_3rd % array
+
       nVertLevels = grid % nVertLevels
 
+      h_theta_eddy_visc2 = config_h_theta_eddy_visc2
+      v_theta_eddy_visc2 = config_v_theta_eddy_visc2
+      rho_edge     =&gt; diag % rho_edge % array
+      rho_zz       =&gt; s_new % rho_zz % array
+      qv_init      =&gt; grid % qv_init % array
+      zgrid        =&gt; grid % zgrid % array
+
 #ifndef DO_PHYSICS
       scalar_tend = 0.  !  testing purposes - we have no sources or sinks
 #endif
 
       !
+      ! Update scalars for physics (i.e., what is in scalar_tend)
+      !   we should probably move this to another routine called before mono advection ****
+      !
+
+      do iCell = 1,grid%nCellsSolve
+      do k = 1, grid%nVertLevels
+      do iScalar = 1,s_old%num_scalars
+         scalar_old_in(iScalar,k,iCell) = scalar_old_in(iScalar,k,iCell)+dt*scalar_tend(iScalar,k,iCell) / h_old(k,iCell)
+         scalar_tend(iScalar,k,iCell) = 0.
+      end do
+      end do
+      end do
+
+      !  halo exchange
+
+      call dmpar_exch_halo_field3dReal( dminfo,               &amp;
+                                        scalar_old_in(:,:,:), &amp;
+                                        s_old % num_scalars,  &amp;
+                                        grid % nVertLevels,   &amp;
+                                        grid % nCells,        &amp;
+                                        cellsToSend, cellsToRecv )
+
+      !
       ! Runge Kutta integration, so we compute fluxes from scalar_new values, update starts from scalar_old
       !
 
-      km1 = 1
-      km0 = 2
-      v_flux(:,:,km1) = 0.
-      v_flux_upwind(:,:,km1) = 0.
-      scale_out(:,:,:) = 1.
-      scale_in(:,:,:) = 1.
+      ! do one scalar at a time
 
-!      coef_3rd_order = 0.
-!      if (config_scalar_adv_order == 3) coef_3rd_order = 1.0
-!      if (config_scalar_adv_order == 3 .and. config_monotonic) coef_3rd_order = 0.25
+      num_scalars = 1
 
-      coef_3rd_order = config_coef_3rd_order
+      do iScalar = 1, s_old % num_scalars
+        write(0,*) ' mono transport for scalar ',iScalar
 
-      do k = 1, grid % nVertLevels
-         kcp1 = min(k+1,grid % nVertLevels)
-         kcm1 = max(k-1,1)
+        do iCell = 1, grid%nCells
+        do k=1, grid%nVertLevels
+          scalar_old(k,iCell) = scalar_old_in(iScalar,k,iCell)
+          scalar_new(k,iCell) = scalar_new_in(iScalar,k,iCell)
+        end do
+        end do
 
-!  vertical flux
+        scmin = scalar_old(1,1)
+        scmax = scalar_old(1,1)
+        do iCell = 1, grid%nCells
+        do k=1, grid%nVertLevels
+          scmin = min(scmin,scalar_old(k,iCell))
+          scmax = max(scmax,scalar_old(k,iCell))
+        enddo
+        enddo
+        write(0,*) ' scmin, scmin old in ',scmin,scmax
 
-         do iCell=1,grid % nCells
+        scmin = scalar_new(1,1)
+        scmax = scalar_new(1,1)
+        do iCell = 1, grid%nCells
+        do k=1, grid%nVertLevels
+          scmin = min(scmin,scalar_new(k,iCell))
+          scmax = max(scmax,scalar_new(k,iCell))
+        enddo
+        enddo
+        write(0,*) ' scmin, scmin new in ',scmin,scmax
 
-            if (k &lt; grid % nVertLevels) then
 
-               if ((config_scalar_vadv_order == 2) .or. (k==1) .or. (k==grid % nVertLevels-1)) then
-                  do iScalar=1,s_old % num_scalars
-                     v_flux(iScalar,iCell,km0) = dt * wwAvg(k+1,iCell) *   &amp;
-                          (fnm(k+1) * scalar_new(iScalar,k+1,iCell) + fnp(k+1) * scalar_new(iScalar,k,iCell))
-                  end do
-               else if (config_scalar_vadv_order == 3) then
-                  do iScalar=1,s_old % num_scalars
-                     v_flux(iScalar,iCell,km0) = dt * flux3( scalar_new(iScalar,k-1,iCell),scalar_new(iScalar,k  ,iCell),  &amp;
-                                                             scalar_new(iScalar,k+1,iCell),scalar_new(iScalar,k+2,iCell),  &amp;
-                                                             wwAvg(k+1,iCell), coef_3rd_order )
-                  end do
-               else if (config_scalar_vadv_order == 4) then
-                  do iScalar=1,s_old % num_scalars
-                     v_flux(iScalar,iCell,km0) = dt * flux4( scalar_new(iScalar,k-1,iCell),scalar_new(iScalar,k  ,iCell),  &amp;
-                                                             scalar_new(iScalar,k+1,iCell),scalar_new(iScalar,k+2,iCell), wwAvg(k+1,iCell) )
-                  end do
-               end if
+      !
+      !  vertical flux divergence, and min and max bounds for flux limiter
+      !
 
-               cell_upwind = k
-               if (wwAvg(k+1,iCell) &gt;= 0) cell_upwind = k+1

+         do iCell=1,grid % nCellsSolve
 
-               do iScalar=1,s_old % num_scalars
-                  v_flux_upwind(iScalar,iCell,km0) = dt * wwAvg(k+1,iCell) * scalar_old(iScalar,cell_upwind,iCell)
-                  v_flux(iScalar,iCell,km0) = v_flux(iScalar,iCell,km0) - v_flux_upwind(iScalar,iCell,km0)
-!                  v_flux(iScalar,iCell,km0) = 0.  ! use only upwind - for testing
-                  s_update(iScalar,iCell,km0) = scalar_old(iScalar,k,iCell) * h_old(k,iCell)  &amp;
-                            - rdnw(k) * (v_flux_upwind(iScalar,iCell,km0) - v_flux_upwind(iScalar,iCell,km1))
-               end do
+          ! zero flux at top and bottom
+           wdtn(1,iCell) = 0.
+           wdtn(grid % nVertLevels+1,iCell) = 0.
 
+           k = 1
+           s_max(k,iCell) = max(scalar_old(1,iCell),scalar_old(2,iCell))
+           s_min(k,iCell) = min(scalar_old(1,iCell),scalar_old(2,iCell))
 
-            else
-               do iScalar=1,s_old % num_scalars
-                  v_flux(iScalar,iCell,km0) = 0.
-                  v_flux_upwind(iScalar,iCell,km0) = 0.
-                  s_update(iScalar,iCell,km0) = scalar_old(iScalar,k,iCell) * h_old(k,iCell)  &amp;
-                            - rdnw(k) * (v_flux_upwind(iScalar,iCell,km0) - v_flux_upwind(iScalar,iCell,km1))
+           k = 2
+           wdtn(k,iCell) = wwAvg(k,iCell)*(fnm(k)*scalar_new(k,iCell)+fnp(k)*scalar_new(k-1,iCell))
+           s_max(k,iCell) = max(scalar_old(k-1,iCell),scalar_old(k,iCell),scalar_old(k+1,iCell))
+           s_min(k,iCell) = min(scalar_old(k-1,iCell),scalar_old(k,iCell),scalar_old(k+1,iCell))
+             
+           do k=3,nVertLevels-1
+              wdtn(k,iCell) = flux3( scalar_new(k-2,iCell),scalar_new(k-1,iCell),  &amp;
+                                     scalar_new(k  ,iCell),scalar_new(k+1,iCell),  &amp;
+                                     wwAvg(k,iCell), coef_3rd_order )
+              s_max(k,iCell) = max(scalar_old(k-1,iCell),scalar_old(k,iCell),scalar_old(k+1,iCell))
+              s_min(k,iCell) = min(scalar_old(k-1,iCell),scalar_old(k,iCell),scalar_old(k+1,iCell))
+           end do

+           k = nVertLevels
+           wdtn(k,iCell) = wwAvg(k,iCell)*(fnm(k)*scalar_new(k,iCell)+fnp(k)*scalar_new(k-1,iCell))
+           s_max(k,iCell) = max(scalar_old(k,iCell),scalar_old(k-1,iCell))
+           s_min(k,iCell) = min(scalar_old(k,iCell),scalar_old(k-1,iCell))
+
+      ! pull s_min and s_max from the (horizontal) surrounding cells
+
+           do i=1, grid % nEdgesOnCell % array(iCell)
+             do k=1, grid % nVertLevels
+               s_max(k,iCell) = max(s_max(k,iCell),scalar_old(k, grid % CellsOnCell % array(i,iCell)))
+               s_min(k,iCell) = min(s_min(k,iCell),scalar_old(k, grid % CellsOnCell % array(i,iCell)))
+             end do
+           end do
+
+         end do
+
+      !
+      !  horizontal flux divergence
+
+         flux_arr(:,:) = 0.
+         do iEdge=1,grid%nEdges
+
+            cell1 = cellsOnEdge(1,iEdge)
+            cell2 = cellsOnEdge(2,iEdge)
+
+            if (cell1 &lt;= grid%nCellsSolve .or. cell2 &lt;= grid%nCellsSolve) then  ! only for owned cells
+  
+               do i=1,nAdvCellsForEdge(iEdge)
+                 iCell = advCellsForEdge(i,iEdge)
+                 do k=1,grid % nVertLevels
+                   scalar_weight = uhAvg(k,iEdge)*(adv_coefs(i,iEdge) + coef_3rd_order*sign(1.,uhAvg(k,iEdge))*adv_coefs_3rd(i,iEdge))
+                   flux_arr(k,iEdge) = flux_arr(k,iEdge) + scalar_weight* scalar_new(k,iCell)
+                 end do
                end do
+
             end if
 
-         end do
+          end do
 
-! horizontal flux
+!  vertical flux divergence for upwind update, we will put upwind update into scalar_new, and put factor of dt in fluxes
 
-         if (config_scalar_adv_order == 2) then
+          do iCell = 1, grid % nCellsSolve
 
-            do iEdge=1,grid%nEdges
-               cell1 = cellsOnEdge(1,iEdge)
-               cell2 = cellsOnEdge(2,iEdge)
-               if (cell1 &lt;= grid%nCells .and. cell2 &lt;= grid%nCells) then
-                  cell_upwind = cell2
-                  if (uhAvg(k,iEdge) &gt;= 0) cell_upwind = cell1
-                  do iScalar=1,s_old % num_scalars
-                     scalar_edge = 0.5 * (scalar_new(iScalar,k,cell1) + scalar_new(iScalar,k,cell2))
-                     h_flux(iScalar,iEdge) = dt * uhAvg(k,iEdge) * dvEdge(iEdge) * scalar_edge
-                     h_flux_upwind = dt * uhAvg(k,iEdge) * dvEdge(iEdge) * scalar_old(iScalar,k,cell_upwind)
-                     h_flux(iScalar,iEdge) = h_flux(iScalar,iEdge) - h_flux_upwind
-!                     h_flux(iScalar,iEdge) = 0.  ! use only upwind - for testing
-                     s_update(iScalar,cell1,km0) = s_update(iScalar,cell1,km0) - h_flux_upwind / grid % areaCell % array(cell1)
-                     s_update(iScalar,cell2,km0) = s_update(iScalar,cell2,km0) + h_flux_upwind / grid % areaCell % array(cell2)
-                  end do 
-               end if
-            end do 
+            k = 1
+            scalar_new(k,iCell) = scalar_old(k,iCell)*h_old(k,iCell)
 
-         else if (config_scalar_adv_order &gt;= 3) then
+            do k = 2, nVertLevels
+              scalar_new(k,iCell) = scalar_old(k,iCell)*h_old(k,iCell)
+              flux_upwind = dt*(max(0.,wwAvg(k,iCell))*scalar_old(k-1,iCell) + min(0.,wwAvg(k,iCell))*scalar_old(k,iCell))
+              scalar_new(k-1,iCell) = scalar_new(k-1,iCell) - flux_upwind*rdnw(k-1)
+              scalar_new(k  ,iCell) = scalar_new(k  ,iCell) + flux_upwind*rdnw(k)
+              wdtn(k,iCell) = dt*wdtn(k,iCell) - flux_upwind
+            end do
 
-            do iEdge=1,grid%nEdges
-               cell1 = cellsOnEdge(1,iEdge)
-               cell2 = cellsOnEdge(2,iEdge)
-               if (cell1 &lt;= grid%nCells .and. cell2 &lt;= grid%nCells) then
-                  cell_upwind = cell2
-                  if (uhAvg(k,iEdge) &gt;= 0) cell_upwind = cell1
-                  do iScalar=1,s_old % num_scalars
-  
-                     d2fdx2_cell1 = deriv_two(1,1,iEdge) * scalar_new(iScalar,k,cell1)
-                     d2fdx2_cell2 = deriv_two(1,2,iEdge) * scalar_new(iScalar,k,cell2)
-                     do i=1, grid % nEdgesOnCell % array (cell1)
-                        if ( grid % CellsOnCell % array (i,cell1) &lt;= grid%nCells) &amp;
-                        d2fdx2_cell1 = d2fdx2_cell1 + &amp;
-                                       deriv_two(i+1,1,iEdge) * scalar_new(iScalar,k,grid % CellsOnCell % array (i,cell1))
-                     end do
-                     do i=1, grid % nEdgesOnCell % array (cell2)
-                        if ( grid % CellsOnCell % array (i,cell2) &lt;= grid%nCells) &amp;
-                        d2fdx2_cell2 = d2fdx2_cell2 + &amp;
-                                       deriv_two(i+1,2,iEdge) * scalar_new(iScalar,k,grid % CellsOnCell % array (i,cell2))
-                     end do
-    
-                     if (uhAvg(k,iEdge) &gt; 0) then
-                        flux = dvEdge(iEdge) * uhAvg(k,iEdge) * (          &amp;
-                                               0.5*(scalar_new(iScalar,k,cell1) + scalar_new(iScalar,k,cell2))      &amp;
-                                                -(dcEdge(iEdge) **2) * (d2fdx2_cell1 + d2fdx2_cell2) / 12.          &amp;
-                                                -(dcEdge(iEdge) **2) * coef_3rd_order*(d2fdx2_cell1 - d2fdx2_cell2) / 12. )
-                     else
-                        flux = dvEdge(iEdge) *  uhAvg(k,iEdge) * (          &amp;
-                                               0.5*(scalar_new(iScalar,k,cell1) + scalar_new(iScalar,k,cell2))      &amp;
-                                                -(dcEdge(iEdge) **2) * (d2fdx2_cell1 + d2fdx2_cell2) / 12.          &amp;
-                                                +(dcEdge(iEdge) **2) * coef_3rd_order*(d2fdx2_cell1 - d2fdx2_cell2) / 12. )
-                     end if
-   
-                     h_flux(iScalar,iEdge) = dt * flux
-                     h_flux_upwind = dt * uhAvg(k,iEdge) * dvEdge(iEdge) * scalar_old(iScalar,k,cell_upwind)
-                     h_flux(iScalar,iEdge) = h_flux(iScalar,iEdge) - h_flux_upwind
-!                     h_flux(iScalar,iEdge) = 0.  ! use only upwind - for testing
-                     s_update(iScalar,cell1,km0) = s_update(iScalar,cell1,km0) - h_flux_upwind / grid % areaCell % array(cell1)
-                     s_update(iScalar,cell2,km0) = s_update(iScalar,cell2,km0) + h_flux_upwind / grid % areaCell % array(cell2)
-                  end do 
-               end if
-            end do 
+! scale_in(:,:) and scale_out(:,:) are used here to store the incoming and outgoing perturbation flux 
+! contributions to the update:  first the vertical flux component, then the horizontal
 
-         end if
+            do k=1,nVertLevels
+              scale_in (k,iCell) = - rdnw(k)*(min(0.,wdtn(k+1,iCell))-max(0.,wdtn(k,iCell)))
+              scale_out(k,iCell) = - rdnw(k)*(max(0.,wdtn(k+1,iCell))-min(0.,wdtn(k,iCell)))
+            end do
 
+          end do
 
-         if ( (rk_step == rk_order) .and. (config_monotonic .or. config_positive_definite) ) then   
+!  horizontal flux divergence for upwind update
 
-!*************************************************************************************************************
-!---  limiter - we limit horizontal and vertical fluxes on level k 
-!---  (these are h fluxes contributing to level k scalars, and v flux contributing to level k, k-1 scalars)
+         !  upwind flux computation
 
-            do iCell=1,grid % nCells
-  
-               do iScalar=1,s_old % num_scalars
-   
-                  s_max(iScalar) = max(scalar_old(iScalar,k,iCell), scalar_old(iScalar,kcp1,iCell), scalar_old(iScalar,kcm1,iCell))
-                  s_min(iScalar) = min(scalar_old(iScalar,k,iCell), scalar_old(iScalar,kcp1,iCell), scalar_old(iScalar,kcm1,iCell))
-                  s_max_update(iScalar) = s_update(iScalar,iCell,km0)
-                  s_min_update(iScalar) = s_update(iScalar,iCell,km0)
-    
-                  ! add in vertical flux to get max and min estimate
-                  s_max_update(iScalar) = s_max_update(iScalar)  &amp;
-                     - rdnw(k) * (max(0.,v_flux(iScalar,iCell,km0)) - min(0.,v_flux(iScalar,iCell,km1)))
-                  s_min_update(iScalar) = s_min_update(iScalar)  &amp;
-                     - rdnw(k) * (min(0.,v_flux(iScalar,iCell,km0)) - max(0.,v_flux(iScalar,iCell,km1)))
-    
+         do iEdge=1,grid%nEdges
+           cell1 = cellsOnEdge(1,iEdge)
+           cell2 = cellsOnEdge(2,iEdge)
+             if (cell1 &lt;= grid%nCellsSolve .or. cell2 &lt;= grid%nCellsSolve) then  ! only for owned cells
+               do k=1,grid % nVertLevels
+                 flux_upwind = grid % dvEdge % array(iEdge) * dt *   &amp;
+                        (max(0.,uhAvg(k,iEdge))*scalar_old(k,cell1) + min(0.,uhAvg(k,iEdge))*scalar_old(k,cell2))
+                 flux_arr(k,iEdge) = dt*flux_arr(k,iEdge) - flux_upwind
+                 scalar_new(k,cell1) = scalar_new(k,cell1) - flux_upwind / areaCell(cell1)
+                 scalar_new(k,cell2) = scalar_new(k,cell2) + flux_upwind / areaCell(cell2)
+
+                 scale_out(k,cell1) = scale_out(k,cell1) - max(0.,flux_arr(k,iEdge)) / areaCell(cell1)
+                 scale_in (k,cell1) = scale_in (k,cell1) - min(0.,flux_arr(k,iEdge)) / areaCell(cell1)
+                 scale_out(k,cell2) = scale_out(k,cell2) + min(0.,flux_arr(k,iEdge)) / areaCell(cell2)
+                 scale_in (k,cell2) = scale_in (k,cell2) + max(0.,flux_arr(k,iEdge)) / areaCell(cell2)
+
                end do
-   
-               do i = 1, grid % nEdgesOnCell % array(iCell)  ! go around the edges of each cell
-                  if (grid % cellsOnCell % array(i,iCell) &lt;= grid%nCells) then
-                     do iScalar=1,s_old % num_scalars
-    
-                        s_max(iScalar)  = max(scalar_old(iScalar,k,grid % cellsOnCell % array(i,iCell)), s_max(iScalar))
-                        s_min(iScalar)  = min(scalar_old(iScalar,k,grid % cellsOnCell % array(i,iCell)), s_min(iScalar))
-     
-                        iEdge = grid % EdgesOnCell % array (i,iCell)
-                        if (iCell == cellsOnEdge(1,iEdge)) then
-                           fdir = 1.0
-                        else
-                           fdir = -1.0
-                        end if
-                        flux = -fdir * h_flux(iScalar,iEdge)/grid % areaCell % array(iCell)
-                        s_max_update(iScalar) = s_max_update(iScalar) + max(0.,flux)
-                        s_min_update(iScalar) = s_min_update(iScalar) + min(0.,flux)
-    
-                     end do
-                  end if
-   
-               end do
-   
-               if( config_positive_definite ) s_min(:) = 0.
-   
-               do iScalar=1,s_old % num_scalars
-                  scale_out (iScalar,iCell,km0) = 1.
-                  scale_in (iScalar,iCell,km0) = 1.
-                  s_max_update (iScalar) =  s_max_update (iScalar) / h_new (k,iCell)
-                  s_min_update (iScalar) =  s_min_update (iScalar) / h_new (k,iCell)
-                  s_upwind = s_update(iScalar,iCell,km0) / h_new(k,iCell)
-                  if ( s_max_update(iScalar) &gt; s_max(iScalar) .and. config_monotonic)   &amp;
-                     scale_in (iScalar,iCell,km0) = max(0.,(s_max(iScalar)-s_upwind)/(s_max_update(iScalar)-s_upwind+eps))
-                  if ( s_min_update(iScalar) &lt; s_min(iScalar) )   &amp;
-                     scale_out (iScalar,iCell,km0) = max(0.,(s_upwind-s_min(iScalar))/(s_upwind-s_min_update(iScalar)+eps))
-                end do
-  
-            end do ! end loop over cells to compute scale factor
+             end if
+          end do
 
+!  next, the limiter
 
-            call dmpar_exch_halo_field2dReal(dminfo, scale_out(:,:,km0), &amp;
-                                             s_old % num_scalars, grid % nCells, &amp;
-                                             cellsToSend, cellsToRecv)
-            call dmpar_exch_halo_field2dReal(dminfo, scale_in(:,:,km0), &amp;
-                                             s_old % num_scalars, grid % nCells, &amp;
-                                             cellsToSend, cellsToRecv)
+          do iCell = 1, grid % nCellsSolve
+            do k = 1, nVertLevels
+               s_min_update = (scalar_new(k,iCell)+scale_out(k,iCell))/h_new(k,iCell)
+               s_max_update = (scalar_new(k,iCell)+scale_in (k,iCell))/h_new(k,iCell)
+               s_upwind = scalar_new(k,iCell)/h_new(k,iCell)
+           
+               scale_factor = (s_max(k,iCell)-s_upwind)/(s_max_update-s_upwind+eps)
+               scale_in(k,iCell) = min( 1.0, max( 0.0, scale_factor) )
 
-       ! rescale the horizontal fluxes

+               scale_factor = (s_upwind-s_min(k,iCell))/(s_upwind-s_min_update+eps)
+               scale_out(k,iCell) = min( 1.0, max( 0.0, scale_factor) )
+
+            end do
+          end do
+!
+!  communicate scale factors here
+!
+      call dmpar_exch_halo_field2dReal( dminfo,               &amp;
+                                        scale_in(:,:),        &amp;
+                                        grid % nVertLevels,   &amp;
+                                        grid % nCells,        &amp;
+                                        cellsToSend, cellsToRecv )
+      call dmpar_exch_halo_field2dReal( dminfo,               &amp;
+                                        scale_out(:,:),       &amp;
+                                        grid % nVertLevels,   &amp;
+                                        grid % nCells,        &amp;
+                                        cellsToSend, cellsToRecv )
+!
+!  rescale the fluxes
+!
             do iEdge = 1, grid % nEdges
                cell1 = grid % cellsOnEdge % array(1,iEdge)
                cell2 = grid % cellsOnEdge % array(2,iEdge)
-               if (cell1 &lt;= grid%nCells .and. cell2 &lt;= grid%nCells) then
-                  do iScalar=1,s_old % num_scalars
-                     flux = h_flux(iScalar,iEdge)
-                     if (flux &gt; 0) then
-                        flux = flux * min(scale_out(iScalar,cell1,km0), scale_in(iScalar,cell2,km0))
-                     else
-                        flux = flux * min(scale_in(iScalar,cell1,km0), scale_out(iScalar,cell2,km0))
-                     end if
-                     h_flux(iScalar,iEdge) = flux
+               if (cell1 &lt;= grid%nCellsSolve .or. cell2 &lt;= grid%nCellsSolve) then
+                  do k = 1, nVertLevels
+                     flux = flux_arr(k,iEdge)
+                     flux = max(0.,flux) * min(scale_out(k,cell1), scale_in(k,cell2)) &amp;
+                          + min(0.,flux) * min(scale_in(k,cell1), scale_out(k,cell2))
+                     flux_arr(k,iEdge) = flux
                   end do
                end if
             end do
@@ -1876,95 +1739,63 @@
        ! rescale the vertical flux
  
             do iCell=1,grid % nCells
-               do iScalar=1,s_old % num_scalars
-                  flux =  v_flux(iScalar,iCell,km1)
-                  if (flux &gt; 0) then
-                     flux = flux * min(scale_out(iScalar,iCell,km0), scale_in(iScalar,iCell,km1))
-                  else
-                     flux = flux * min(scale_in(iScalar,iCell,km0), scale_out(iScalar,iCell,km1))
-                  end if
-                  v_flux(iScalar,iCell,km1) = flux
+               do k = 2, nVertLevels
+                  flux =  wdtn(k,iCell)
+                  flux = max(0.,flux) * min(scale_out(k-1,iCell), scale_in(k  ,iCell)) &amp;
+                       + min(0.,flux) * min(scale_out(k  ,iCell), scale_in(k-1,iCell))
+                  wdtn(k,iCell) = flux
                end do
             end do
-
-!  end of limiter
-!*******************************************************************************************************************
-
-         end if
-
-!---  update
-
-         do iCell=1,grid % nCells
-            !  add in upper vertical flux that was just renormalized
-            do iScalar=1,s_old % num_scalars
-               s_update(iScalar,iCell,km0) = s_update(iScalar,iCell,km0) + rdnw(k) * v_flux(iScalar,iCell,km1)
-               if (k &gt; 1) s_update(iScalar,iCell,km1) = s_update(iScalar,iCell,km1) - rdnw(k-1)*v_flux(iScalar,iCell,km1)
-            end do
-         end do

+!
+!  do the scalar update now that we have the fluxes
+!
          do iEdge=1,grid%nEdges
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
-            if (cell1 &lt;= grid%nCells .and. cell2 &lt;= grid%nCells) then
-               do iScalar=1,s_old % num_scalars
-                  s_update(iScalar,cell1,km0) = s_update(iScalar,cell1,km0) - &amp;
-                      h_flux(iScalar,iEdge) / grid % areaCell % array(cell1)
-                  s_update(iScalar,cell2,km0) = s_update(iScalar,cell2,km0) + &amp;
-                      h_flux(iScalar,iEdge) / grid % areaCell % array(cell2)
-               end do 
+            if (cell1 &lt;= grid%nCellsSolve .or. cell2 &lt;= grid%nCellsSolve) then  ! only for owned cells
+               do k=1,grid % nVertLevels
+                  scalar_new(k,cell1) = scalar_new(k,cell1) - flux_arr(k,iEdge)/areaCell(cell1)
+                  scalar_new(k,cell2) = scalar_new(k,cell2) + flux_arr(k,iEdge)/areaCell(cell2)
+               end do
             end if
-         end do 

-         ! decouple from mass
-!        if (k &gt; 1) then
-!           do iCell=1,grid % nCells
-!              do iScalar=1,s_old % num_scalars
-!                 s_update(iScalar,iCell,km1) = s_update(iScalar,iCell,km1) / h_new(k-1,iCell)
-!              end do
-!           end do

-!           do iCell=1,grid % nCells
-!              do iScalar=1,s_old % num_scalars
-!                 scalar_new(iScalar,k-1,iCell) = s_update(iScalar,iCell,km1) 
-!              end do
-!           end do
-!        end if
-!ldf begin:
-         if(k &gt; 1) then
-            do iCell = 1,grid%nCells
-            do iScalar = 1,s_old%num_scalars
-               scalar_new(iScalar,k-1,iCell) = (s_update(iScalar,iCell,km1)+dt*scalar_tend(iScalar,k-1,iCell)) &amp;
-                                             / h_new(k-1,iCell)
-            enddo
-            enddo
-         endif
-!ldf end.

-         ktmp = km1
-         km1 = km0
-         km0 = ktmp
+         end do
 
-      end do
+          do iCell=1,grid % nCellsSolve
+             do k=1,grid % nVertLevels
+               scalar_new(k,iCell) = (   scalar_new(k,iCell)  &amp;
+                   + (-rdnw(k)*(wdtn(k+1,iCell)-wdtn(k,iCell)) ) )/h_new(k,iCell)
+             end do
+          end do
 
-!     do iCell=1,grid % nCells
-!        do iScalar=1,s_old % num_scalars
-!           scalar_new(iScalar,grid % nVertLevels,iCell) = s_update(iScalar,iCell,km1) / h_new(grid%nVertLevels,iCell)
-!        end do
-!     end do
-!ldf begin:
-      do iCell = 1,grid%nCells
-      do iScalar = 1,s_old%num_scalars
-         scalar_new(iScalar,grid%nVertLevels,iCell) = (s_update(iScalar,iCell,km1)+dt*scalar_tend(iScalar,grid%nVertLevels,iCell)) &amp;
-                                                    / h_new(grid%nVertLevels,iCell)
-      enddo
-      enddo
+        if(debug_print) then
 
-      !remove negative values:
-      do iScalar=1,s_old%num_scalars
-          where(scalar_new(iScalar,:,:) .lt. 0.) scalar_new(iScalar,:,:) = 0.
-      enddo
-!ldf end.
+        scmin = scalar_new(1,1)
+        scmax = scalar_new(1,1)
+        do iCell = 1, grid%nCellsSolve
+        do k=1, grid%nVertLevels
+          scmax = max(scmax,scalar_new(k,iCell))
+          scmin = min(scmin,scalar_new(k,iCell))
+          if(s_max(k,iCell) &lt; scalar_new(k,iCell)) then
+            write(32,*) ' over - k,iCell,s_min,s_max,scalar_new ',k,iCell,s_min(k,iCell),s_max(k,iCell),scalar_new(k,iCell)
+          end if
+          if(s_min(k,iCell) &gt; scalar_new(k,iCell)) then
+            write(32,*) ' under - k,iCell,s_min,s_max,scalar_new ',k,iCell,s_min(k,iCell),s_max(k,iCell),scalar_new(k,iCell)
+          end if
+        enddo
+        enddo
+        write(0,*) ' scmin, scmax new out ',scmin,scmax
+        write(0,*) ' icell_min, k_min ',icellmax, kmax
 
+        end if
+
+          do iCell = 1, grid%nCells
+          do k=1, grid%nVertLevels
+            scalar_new_in(iScalar,k,iCell) = max(0.,scalar_new(k,iCell))
+          end do
+          end do
+
+      end do !  loop over scalars
+
    end subroutine advance_scalars_mono
 
 !----
@@ -1992,7 +1823,7 @@
       logical, parameter :: rk_diffusion = .false.
 
       integer :: iEdge, iCell, iVertex, k, cell1, cell2, vertex1, vertex2, eoe, i, j, iq
-      real (kind=RKIND) :: flux, vorticity_abs, rho_vertex, workpv, q, upstream_bias
+      real (kind=RKIND) :: flux, vorticity_abs, rho_vertex, workpv, upstream_bias
 
       integer :: nCells, nEdges, nVertices, nVertLevels, nCellsSolve
       real (kind=RKIND) :: h_mom_eddy_visc2,   v_mom_eddy_visc2,   h_mom_eddy_visc4
@@ -2011,10 +1842,15 @@
       integer, dimension(:), pointer :: nEdgesOnCell, nEdgesOnEdge
 
       real (kind=RKIND), dimension( grid % nVertLevels + 1 ) :: wduz, wdwz, wdtz, dpzx
-      real (kind=RKIND), dimension( grid % nVertLevels ) :: u_mix
+      real (kind=RKIND), dimension( grid % nVertLevels ) :: u_mix, ru_edge_w, q
       real (kind=RKIND) :: theta_edge, theta_turb_flux, z1, z2, z3, z4, zm, z0, zp, r
       real (kind=RKIND) :: d2fdx2_cell1, d2fdx2_cell2, pgrad
 
+      integer, dimension(:,:), pointer :: advCellsForEdge
+      integer, dimension(:), pointer :: nAdvCellsForEdge
+      real (kind=RKIND), dimension(:,:), pointer :: adv_coefs, adv_coefs_3rd
+      real (kind=RKIND) :: scalar_weight
+
       real (kind=RKIND), dimension(:), pointer :: rdzu, rdzw, fzm, fzp, qv_init
       real (kind=RKIND), dimension(:,:), pointer :: t_init 
 
@@ -2037,7 +1873,7 @@
       real (kind=RKIND), dimension(:,:), pointer :: ur_cell, vr_cell
 
       real (kind=RKIND), parameter :: c_s = 0.25
-      real (kind=RKIND), dimension( grid % nVertLevels ) :: d_diag, d_off_diag
+      real (kind=RKIND), dimension( grid % nVertLevels ) :: d_diag, d_off_diag, flux_arr
       real (kind=RKIND), dimension(:,:), pointer :: defc_a, defc_b
       logical :: delsq_horiz_mixing, newpx
 
@@ -2144,12 +1980,40 @@
       h_theta_eddy_visc4 = config_h_theta_eddy_visc4
       v_theta_eddy_visc2 = config_v_theta_eddy_visc2
 
+      nAdvCellsForEdge =&gt; grid % nAdvCellsForEdge % array
+      advCellsForEdge =&gt; grid % advCellsForEdge % array
+      adv_coefs =&gt; grid % adv_coefs % array
+      adv_coefs_3rd =&gt; grid % adv_coefs_3rd % array
+
       !
       ! Compute u (normal) velocity tendency for each edge (cell face)
       !
 
       write(0,*) ' rk_step in compute_dyn_tend ',rk_step
 
+
+      delsq_horiz_mixing = .false.
+      if (config_horiz_mixing == &quot;2d_smagorinsky&quot; .and. (rk_step == 1 .or. rk_diffusion)) then
+         do iCell = 1, nCells
+            d_diag(:) = 0.
+            d_off_diag(:) = 0.
+            do iEdge = 1, grid % nEdgesOnCell % array (iCell)
+               do k=1, nVertLevels
+                  d_diag(k)     = d_diag(k)     + defc_a(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell))  &amp;
+                                                - defc_b(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
+                  d_off_diag(k) = d_off_diag(k) + defc_b(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell))  &amp;
+                                                + defc_a(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
+               end do
+            end do
+            do k=1, nVertLevels
+               kdiff(k,iCell) = (c_s * config_len_disp)**2 * sqrt(d_diag(k)**2 + d_off_diag(k)**2)
+            end do
+         end do
+         delsq_horiz_mixing = .true.
+      else if ( config_horiz_mixing == &quot;2d_fixed&quot;) then
+         delsq_horiz_mixing = .true.
+      end if
+
       tend_u(:,:) = 0.0
 
       cf1 = grid % cf1 % scalar
@@ -2188,62 +2052,13 @@
         end do
       end do    
 
-      delsq_horiz_mixing = .false.
-      if (config_horiz_mixing == &quot;2d_smagorinsky&quot; .and. (rk_step == 1 .or. rk_diffusion)) then
-         do iCell = 1, nCells
-            d_diag(:) = 0.
-            d_off_diag(:) = 0.
-            do iEdge = 1, grid % nEdgesOnCell % array (iCell)
-               do k=1, nVertLevels
-                  d_diag(k)     = d_diag(k)     + defc_a(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell))  &amp;
-                                                - defc_b(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
-                  d_off_diag(k) = d_off_diag(k) + defc_b(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell))  &amp;
-                                                + defc_a(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
-               end do
-            end do
-            do k=1, nVertLevels
-               kdiff(k,iCell) = (c_s * config_len_disp)**2 * sqrt(d_diag(k)**2 + d_off_diag(k)**2)
-            end do
-         end do
-         delsq_horiz_mixing = .true.
-      else if ( config_horiz_mixing == &quot;2d_fixed&quot;) then
-         delsq_horiz_mixing = .true.
-      end if
+!****       u dyn tend
 
-!**** horizontal pressure gradient ***************************************************
-
-      if (newpx) then
-
          do iEdge=1,grid % nEdgesSolve
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
 
             k = 1
-            pr  = cpr(k,iEdge)*pp(k,cell2)+cpr(k+1,iEdge)*pp(k+1,cell2)+cpr(k+2,iEdge)*pp(k+2,cell2)
-            pl  = cpl(k,iEdge)*pp(k,cell1)+cpl(k+1,iEdge)*pp(k+1,cell1)+cpl(k+2,iEdge)*pp(k+2,cell1)
-            tend_u(k,iEdge) =  - cqu(k,iEdge)*2./(zz(k,cell1)+zz(k,cell2))*(pr-pl)/dcEdge(iEdge)
-
-            do k=2,nVertLevels
-
-               kr = min(nVertLevels,k+ nint(.5-sign(.5,zx(k,iEdge)+zx(k+1,iEdge))))
-               kl = min(nVertLevels,2*k+1-kr)
-
-               pr = pp(k,cell2)+.5*(zgrid(k   ,cell1)+zgrid(k +1,cell1)-zgrid(k ,cell2)-zgrid(k +1,cell2))   &amp;
-                                  /(zgrid(kr+1,cell2)-zgrid(kr-1,cell2))*( pp(kr,cell2)-pp   (kr-1,cell2))
-               pl = pp(k,cell1)+.5*(zgrid(k   ,cell2)+zgrid(k +1,cell2)-zgrid(k ,cell1)-zgrid(k +1,cell1))   &amp;
-                                  /(zgrid(kl+1,cell1)-zgrid(kl-1,cell1))*( pp(kl,cell1)-pp   (kl-1,cell1))
-               tend_u(k,iEdge) =  - cqu(k,iEdge)*2./(zz(k,cell1)+zz(k,cell2))*(pr-pl)/dcEdge(iEdge)
-
-            end do
-         end do
-
-      else
-
-         do iEdge=1,grid % nEdgesSolve
-            cell1 = cellsOnEdge(1,iEdge)
-            cell2 = cellsOnEdge(2,iEdge)
-
-            k = 1
             dpzx(k) = .5*zx(k,iEdge)*(cf1*(pp(k  ,cell2)+pp(k  ,cell1))   &amp;
                                      +cf2*(pp(k+1,cell2)+pp(k+1,cell1))   &amp;
                                      +cf3*(pp(k+2,cell2)+pp(k+2,cell1)))
@@ -2253,53 +2068,7 @@
             end do
             dpzx(nVertLevels+1) = 0.
 
-            do k=1,nVertLevels
-               tend_u(k,iEdge) =  - cqu(k,iEdge)*( (pp(k,cell2)/zz(k,cell2) - pp(k,cell1)/zz(k,cell1))  &amp;
-                                                /  dcEdge(iEdge) - rdzw(k)*(dpzx(k+1)-dpzx(k)) )
-            end do
-         end do
-
-      end if
-
-!**** nonlinear Coriolis term and ke gradient ****************************************
-
-
-      do iEdge=1,grid % nEdgesSolve
-         cell1 = cellsOnEdge(1,iEdge)
-         cell2 = cellsOnEdge(2,iEdge)
-
-         do k=1,nVertLevels
-            q = 0.0
-            do j = 1,nEdgesOnEdge(iEdge)
-               eoe = edgesOnEdge(j,iEdge)
-               workpv = 0.5 * (pv_edge(k,iEdge) + pv_edge(k,eoe))
-               q = q + weightsOnEdge(j,iEdge) * u(k,eoe) * workpv * rho_edge(k,eoe)
-            end do
-            tend_u(k,iEdge) = tend_u(k,iEdge) + rho_edge(k,iEdge)* (q - (ke(k,cell2) - ke(k,cell1))          &amp;
-                                                                 / dcEdge(iEdge))                            &amp;
-                                              - u(k,iEdge)*0.5*(divergence_ru(k,cell1)+divergence_ru(k,cell2)) 
-         end do
-
-      end do
-
-      deallocate(divergence_ru)
-
-      !
-      !  vertical advection for u
-      !
-      do iEdge=1,grid % nEdgesSolve
-         cell1 = cellsOnEdge(1,iEdge)
-         cell2 = cellsOnEdge(2,iEdge)
-
-         wduz(1) = 0.
-         if (config_u_vadv_order == 2) then
-
-            do k=2,nVertLevels
-               wduz(k) =  0.5*( rw(k,cell1)+rw(k,cell2) )*(fzm(k)*u(k,iEdge)+fzp(k)*u(k-1,iEdge))  
-            end do
-
-         else if (config_u_vadv_order == 3) then
-
+            wduz(1) = 0.
             k = 2
             wduz(k) =  0.5*( rw(k,cell1)+rw(k,cell2) )*(fzm(k)*u(k,iEdge)+fzp(k)*u(k-1,iEdge))  
             do k=3,nVertLevels-1
@@ -2308,25 +2077,36 @@
             k = nVertLevels
             wduz(k) =  0.5*( rw(k,cell1)+rw(k,cell2) )*(fzm(k)*u(k,iEdge)+fzp(k)*u(k-1,iEdge))  
 
-         else if (config_u_vadv_order == 4) then
+            wduz(nVertLevels+1) = 0.
 
-            k = 2
-            wduz(k) =  0.5*( rw(k,cell1)+rw(k,cell2) )*(fzm(k)*u(k,iEdge)+fzp(k)*u(k-1,iEdge))  
-            do k=3,nVertLevels-1
-               wduz(k) = flux4( u(k-2,iEdge),u(k-1,iEdge),u(k,iEdge),u(k+1,iEdge),0.5*(rw(k,cell1)+rw(k,cell2)) )
+            do k=1,nVertLevels
+               tend_u(k,iEdge) =  - cqu(k,iEdge)*( (pp(k,cell2)/zz(k,cell2) - pp(k,cell1)/zz(k,cell1))  &amp;
+                                                /  dcEdge(iEdge) - rdzw(k)*(dpzx(k+1)-dpzx(k)) )
+               tend_u(k,iEdge) = tend_u(k,iEdge) - rdzw(k)*(wduz(k+1)-wduz(k)) 
             end do
-            k = nVertLevels
-            wduz(k) =  0.5*( rw(k,cell1)+rw(k,cell2) )*(fzm(k)*u(k,iEdge)+fzp(k)*u(k-1,iEdge))  
 
-         end if
+               q(:) = 0.0
+               do j = 1,nEdgesOnEdge(iEdge)
+                 eoe = edgesOnEdge(j,iEdge)
+                 do k=1,nVertLevels
+                   workpv = 0.5 * (pv_edge(k,iEdge) + pv_edge(k,eoe))
+                   q(k) = q(k) + weightsOnEdge(j,iEdge) * u(k,eoe) * workpv * rho_edge(k,eoe)
+                 end do
+               end do
 
-         wduz(nVertLevels+1) = 0.
-
-         do k=1,nVertLevels
-            tend_u(k,iEdge) = tend_u(k,iEdge) - rdzw(k)*(wduz(k+1)-wduz(k)) 
+            do k=1,nVertLevels
+               tend_u(k,iEdge) = tend_u(k,iEdge) + rho_edge(k,iEdge)* (q(k) - (ke(k,cell2) - ke(k,cell1))       &amp;
+                                                                    / dcEdge(iEdge))                            &amp;
+                                                - u(k,iEdge)*0.5*(divergence_ru(k,cell1)+divergence_ru(k,cell2)) 
+               tend_u(k,iEdge) = tend_u(k,iEdge) &amp;
+                                - 2.*omega_e*cos(grid % angleEdge % array(iEdge))*cos(grid % latEdge % array(iEdge)) &amp;
+                                  *.25*(rw(k,cell1)+rw(k+1,cell1)+rw(k,cell2)+rw(k+1,cell2))                         &amp;
+                                - u(k,iEdge)*.25*(rw(k+1,cell1)+rw(k,cell1)+rw(k,cell2)+rw(k+1,cell2))/r_earth
+            end do
          end do
-      end do
 
+      deallocate(divergence_ru)
+
       !
       !  horizontal mixing for u
       !
@@ -2550,26 +2330,6 @@
          end do
       end do
 
-      !SHP-curvature
-      if (curvature) then
-         do iEdge=1,grid % nEdgesSolve
-
-            cell1 = cellsOnEdge(1,iEdge)
-            cell2 = cellsOnEdge(2,iEdge)
-
-            do k=1,nVertLevels
-               tend_u(k,iEdge) = tend_u(k,iEdge) &amp;
-                                - 2.*omega_e*cos(grid % angleEdge % array(iEdge))*cos(grid % latEdge % array(iEdge)) &amp;
-                                  *.25*(rw(k,cell1)+rw(k+1,cell1)+rw(k,cell2)+rw(k+1,cell2))                         &amp;
-                                - u(k,iEdge)*.25*(rw(k+1,cell1)+rw(k,cell1)+rw(k,cell2)+rw(k+1,cell2))/r_earth
-                              !  + .5*(rho(k,cell1)+rho(k,cell2))*v(k,iEdge)*tan(grid % latEdge % array(iEdge))       &amp;
-                              ! *(u(k,iEdge)*cos(grid % angleEdge % array(iEdge))            &amp;
-                              !   -v(k,iEdge)*sin(grid % angleEdge % array(iEdge)))/r_earth
-            end do
-         end do
-      end if
-
-
 !----------- rhs for w
 
       tend_w(:,:) = 0.
@@ -2601,33 +2361,52 @@
             if (cell1 &lt;= nCellsSolve .or. cell2 &lt;= nCellsSolve) then
 
                do k=2,grid % nVertLevels
+                 ru_edge_w(k) = fzm(k)*ru(k,iEdge) + fzp(k)*ru(k-1,iEdge)
+               end do
 
-                  d2fdx2_cell1 = deriv_two(1,1,iEdge) * w(k,cell1)
-                  d2fdx2_cell2 = deriv_two(1,2,iEdge) * w(k,cell2)
-                  do i=1, grid % nEdgesOnCell % array (cell1)
-                     if ( grid % CellsOnCell % array (i,cell1) &lt;= grid%nCells) &amp;
-                     d2fdx2_cell1 = d2fdx2_cell1 + deriv_two(i+1,1,iEdge) * w(k,grid % CellsOnCell % array (i,cell1))
-                  end do
-                  do i=1, grid % nEdgesOnCell % array (cell2)
-                     if ( grid % CellsOnCell % array (i,cell2) &lt;= grid%nCells) &amp;
-                     d2fdx2_cell2 = d2fdx2_cell2 + deriv_two(i+1,2,iEdge) * w(k,grid % CellsOnCell % array (i,cell2))
-                  end do
+               flux_arr(:) = 0.
+               do i=1,nAdvCellsForEdge(iEdge)
+                 iCell = advCellsForEdge(i,iEdge)
+                 do k=2,grid % nVertLevels
+                   scalar_weight = adv_coefs(i,iEdge) + coef_3rd_order*sign(1.,ru_edge_w(k))*adv_coefs_3rd(i,iEdge)
+                   flux_arr(k) = flux_arr(k) + scalar_weight* w(k,iCell)
+                 end do
+               end do
 
+               do k=1,grid % nVertLevels
+                  tend_w(k,cell1) = tend_w(k,cell1) - ru_edge_w(k)*flux_arr(k)
+                  tend_w(k,cell2) = tend_w(k,cell2) + ru_edge_w(k)*flux_arr(k)
+               end do
+
+!               do k=2,grid % nVertLevels
+!
+!                  d2fdx2_cell1 = deriv_two(1,1,iEdge) * w(k,cell1)
+!                  d2fdx2_cell2 = deriv_two(1,2,iEdge) * w(k,cell2)
+!                  do i=1, grid % nEdgesOnCell % array (cell1)
+!                     if ( grid % CellsOnCell % array (i,cell1) &lt;= grid%nCells) &amp;
+!                     d2fdx2_cell1 = d2fdx2_cell1 + deriv_two(i+1,1,iEdge) * w(k,grid % CellsOnCell % array (i,cell1))
+!                  end do
+!                  do i=1, grid % nEdgesOnCell % array (cell2)
+!                     if ( grid % CellsOnCell % array (i,cell2) &lt;= grid%nCells) &amp;
+!                     d2fdx2_cell2 = d2fdx2_cell2 + deriv_two(i+1,2,iEdge) * w(k,grid % CellsOnCell % array (i,cell2))
+!                  end do
+!
 !  3rd order stencil
-                  if( u(k,iEdge)+u(k-1,iEdge) &gt; 0) then
-                     flux = dvEdge(iEdge) * (fzm(k)*ru(k,iEdge) + fzp(k)*ru(k-1,iEdge))*(  &amp;
-                                             0.5*(w(k,cell1) + w(k,cell2))                 &amp;
-                                            -(dcEdge(iEdge) **2) * (d2fdx2_cell1) / 6. )
-                  else
-                     flux = dvEdge(iEdge) * (fzm(k)*ru(k,iEdge) + fzp(k)*ru(k-1,iEdge))*(  &amp;
-                                             0.5*(w(k,cell1) + w(k,cell2))                 &amp;
-                                            -(dcEdge(iEdge) **2) * (d2fdx2_cell2) / 6. )
-                  end if
+!                  if( u(k,iEdge)+u(k-1,iEdge) &gt; 0) then
+!                     flux = dvEdge(iEdge) * (fzm(k)*ru(k,iEdge) + fzp(k)*ru(k-1,iEdge))*(  &amp;
+!                                             0.5*(w(k,cell1) + w(k,cell2))                 &amp;
+!                                            -(dcEdge(iEdge) **2) * (d2fdx2_cell1) / 6. )
+!                  else
+!                     flux = dvEdge(iEdge) * (fzm(k)*ru(k,iEdge) + fzp(k)*ru(k-1,iEdge))*(  &amp;
+!                                             0.5*(w(k,cell1) + w(k,cell2))                 &amp;
+!                                            -(dcEdge(iEdge) **2) * (d2fdx2_cell2) / 6. )
+!                  end if
+!
+!                  tend_w(k,cell1) = tend_w(k,cell1) - flux
+!                  tend_w(k,cell2) = tend_w(k,cell2) + flux
+!
+!               end do
 
-                  tend_w(k,cell1) = tend_w(k,cell1) - flux
-                  tend_w(k,cell2) = tend_w(k,cell2) + flux
-
-               end do
             end if
          end do
 
@@ -2664,6 +2443,22 @@
          end do
       end if
 
+      !SHP-curvature
+      if (curvature) then
+
+         do iCell = 1, grid % nCellsSolve
+            do k=2,nVertLevels
+               tend_w(k,iCell) = tend_w(k,iCell) &amp;
+                                + rho_zz(k,iCell)*( (fzm(k)*ur_cell(k,iCell)+fzp(k)*ur_cell(k-1,iCell))**2.             &amp;
+                                                +(fzm(k)*vr_cell(k,iCell)+fzp(k)*vr_cell(k-1,iCell))**2. )/r_earth   &amp;
+                                + 2.*omega_e*cos(grid % latCell % array(iCell))*rho_zz(k,iCell)   &amp;
+                                    *(fzm(k)*ur_cell(k,iCell)+fzp(k)*ur_cell(k-1,iCell))
+
+            end do
+         end do
+
+      end if
+
       !
       !  horizontal mixing for w - we could combine this with advection directly (i.e. as a turbulent flux),
       !  but here we can also code in hyperdiffusion if we wish (2nd order at present)
@@ -2816,26 +2611,6 @@
                                              rr(k,iCell)*(1.+qtot(k,iCell)))                  &amp;
                                     +fzp(k)*(rb(k-1,iCell)*(qtot(k-1,iCell))  +  &amp;
                                              rr(k-1,iCell)*(1.+qtot(k-1,iCell))) ))
-!SHP-old version
-!                                   ( fzm(k)*rr(k,iCell) + fzm(k)*(rb(k,iCell)+rr(k,iCell))*qtot(k,iCell) &amp; 
-!                                    +fzp(k)*rr(k-1,iCell) + fzp(k)*(rb(k-1,iCell)+rr(k-1,iCell))*qtot(k-1,iCell) )) 
-
-
-!                                  - gravity*(fzm(k)*rb(k,iCell)+fzp(k)*rb(k-1,iCell)) )       &amp;
-!                                  - gravity*( fzm(k)*(rr(k,iCell)+rb(k,iCell)) + fzp(k)*(rr(k-1,iCell)+rb(k-1,iCell)) )
-
-!                               - cqw(k,iCell)*rdzu(k)*(pp(k,iCell)-pp(k-1,iCell))                            &amp;
-!                                - gravity*( fzm(k)*rr(k,iCell)+fzp(k)*rr(k-1,iCell) &amp;
-!                                           +(1.-cqw(k,iCell))*(fzm(k)*rb(k,iCell)+fzp(k)*rb(k-1,iCell)))
-
-! WCS version                               - cqw(k,iCell)*rdzu(k)*(pp(k,iCell)-pp(k-1,iCell))                            &amp;
-!                                - gravity*0.5*(rr(k,iCell)+rr(k-1,iCell)+(1.-cqw(k,iCell))*(rb(k,iCell)+rb(k-1,iCell)))
-
-!Joe formulation
-!                                  - cqw(k,iCell)*( rdzu(k)*(pp(k,iCell)-pp(k-1,iCell))        &amp;
-!                                  - gravity*(fzm(k)*rb(k,iCell)+fzp(k)*rb(k-1,iCell)) )       &amp;
-!                                  - gravity*( fzm(k)*(rr(k,iCell)+rb(k,iCell)) + fzp(k)*(rr(k-1,iCell)+rb(k-1,iCell)) )
-
          end do
       end do
 
@@ -2849,10 +2624,6 @@
 
          do iCell = 1, grid % nCellsSolve
             do k=2,nVertLevels
-!               tend_w(k,iCell) = tend_w(k,iCell) + v_mom_eddy_visc2*0.5*(rho_zz(k,iCell)+rho_zz(k-1,iCell))*(  &amp;
-!                                        (w(k+1,iCell)-w(k  ,iCell))*rdzw(k)                              &amp;
-!                                       -(w(k  ,iCell)-w(k-1,iCell))*rdzw(k-1) )*rdzu(k)
-
                tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + v_mom_eddy_visc2*0.5*(rho_zz(k,iCell)+rho_zz(k-1,iCell))*(  &amp;
                                         (w(k+1,iCell)-w(k  ,iCell))*rdzw(k)                              &amp;
                                        -(w(k  ,iCell)-w(k-1,iCell))*rdzw(k-1) )*rdzu(k)
@@ -2874,22 +2645,6 @@
 
       deallocate(qtot)
 
-      !SHP-curvature
-      if (curvature) then
-
-         do iCell = 1, grid % nCellsSolve
-            do k=2,nVertLevels
-               tend_w(k,iCell) = tend_w(k,iCell) &amp;
-                                + rho_zz(k,iCell)*( (fzm(k)*ur_cell(k,iCell)+fzp(k)*ur_cell(k-1,iCell))**2.             &amp;
-                                                +(fzm(k)*vr_cell(k,iCell)+fzp(k)*vr_cell(k-1,iCell))**2. )/r_earth   &amp;
-                                + 2.*omega_e*cos(grid % latCell % array(iCell))*rho_zz(k,iCell)   &amp;
-                                    *(fzm(k)*ur_cell(k,iCell)+fzp(k)*ur_cell(k-1,iCell))
-
-            end do
-         end do
-
-      end if
-
 !----------- rhs for theta
 
       tend_theta(:,:) = 0.
@@ -2919,44 +2674,53 @@
             cell2 = cellsOnEdge(2,iEdge)
             if (cell1 &lt;= nCellsSolve .or. cell2 &lt;= nCellsSolve) then
 
+               flux_arr(:) = 0.
+               do i=1,nAdvCellsForEdge(iEdge)
+                 iCell = advCellsForEdge(i,iEdge)
+                 do k=1,grid % nVertLevels
+                   scalar_weight = adv_coefs(i,iEdge) + coef_3rd_order*sign(1.,ru(k,iEdge))*adv_coefs_3rd(i,iEdge)
+                   flux_arr(k) = flux_arr(k) + scalar_weight* theta_m(k,iCell)
+                 end do
+               end do
+
                do k=1,grid % nVertLevels
+                  tend_theta(k,cell1) = tend_theta(k,cell1) - ru(k,iEdge)*flux_arr(k)
+                  tend_theta(k,cell2) = tend_theta(k,cell2) + ru(k,iEdge)*flux_arr(k)
+               end do
 
-                  d2fdx2_cell1 = deriv_two(1,1,iEdge) * theta_m(k,cell1)
-                  d2fdx2_cell2 = deriv_two(1,2,iEdge) * theta_m(k,cell2)
-                  do i=1, grid % nEdgesOnCell % array (cell1)
-                     if ( grid % CellsOnCell % array (i,cell1) &lt;= grid%nCells) &amp;
-                     d2fdx2_cell1 = d2fdx2_cell1 + deriv_two(i+1,1,iEdge) * theta_m(k,grid % CellsOnCell % array (i,cell1))
-                  end do
-                  do i=1, grid % nEdgesOnCell % array (cell2)
-                     if ( grid % CellsOnCell % array (i,cell2) &lt;= grid%nCells) &amp;
-                     d2fdx2_cell2 = d2fdx2_cell2 + deriv_two(i+1,2,iEdge) * theta_m(k,grid % CellsOnCell % array (i,cell2))
-                  end do
 
+!               do k=1,grid % nVertLevels
+!
+!                  d2fdx2_cell1 = deriv_two(1,1,iEdge) * theta_m(k,cell1)
+!                  d2fdx2_cell2 = deriv_two(1,2,iEdge) * theta_m(k,cell2)
+!                  do i=1, grid % nEdgesOnCell % array (cell1)
+!                     if ( grid % CellsOnCell % array (i,cell1) &lt;= grid%nCells) &amp;
+!                     d2fdx2_cell1 = d2fdx2_cell1 + deriv_two(i+1,1,iEdge) * theta_m(k,grid % CellsOnCell % array (i,cell1))
+!                  end do
+!                  do i=1, grid % nEdgesOnCell % array (cell2)
+!                     if ( grid % CellsOnCell % array (i,cell2) &lt;= grid%nCells) &amp;
+!                     d2fdx2_cell2 = d2fdx2_cell2 + deriv_two(i+1,2,iEdge) * theta_m(k,grid % CellsOnCell % array (i,cell2))
+!                  end do
+!
 !  3rd order stencil
+!
+!                  if( u(k,iEdge) &gt; 0) then
+!                        flux = dvEdge(iEdge) * ru(k,iEdge) * (          &amp;
+!                                               0.5*(theta_m(k,cell1) + theta_m(k,cell2))      &amp;
+!                                                -(dcEdge(iEdge) **2) * (d2fdx2_cell1 + d2fdx2_cell2) / 12.          &amp;
+!                                                -(dcEdge(iEdge) **2) * coef_3rd_order*(d2fdx2_cell1 - d2fdx2_cell2) / 12. )
+!                     else
+!                        flux = dvEdge(iEdge) *  ru(k,iEdge) * (          &amp;
+!                                               0.5*(theta_m(k,cell1) + theta_m(k,cell2))      &amp;
+!                                                -(dcEdge(iEdge) **2) * (d2fdx2_cell1 + d2fdx2_cell2) / 12.          &amp;
+!                                                +(dcEdge(iEdge) **2) * coef_3rd_order*(d2fdx2_cell1 - d2fdx2_cell2) / 12. )
+!                  end if
+!
+!                  tend_theta(k,cell1) = tend_theta(k,cell1) - flux
+!                  tend_theta(k,cell2) = tend_theta(k,cell2) + flux
+!
+!               end do
 
-                  if( u(k,iEdge) &gt; 0) then
-                        flux = dvEdge(iEdge) * ru(k,iEdge) * (          &amp;
-                                               0.5*(theta_m(k,cell1) + theta_m(k,cell2))      &amp;
-                                                -(dcEdge(iEdge) **2) * (d2fdx2_cell1 + d2fdx2_cell2) / 12.          &amp;
-                                                -(dcEdge(iEdge) **2) * coef_3rd_order*(d2fdx2_cell1 - d2fdx2_cell2) / 12. )
-                     else
-                        flux = dvEdge(iEdge) *  ru(k,iEdge) * (          &amp;
-                                               0.5*(theta_m(k,cell1) + theta_m(k,cell2))      &amp;
-                                                -(dcEdge(iEdge) **2) * (d2fdx2_cell1 + d2fdx2_cell2) / 12.          &amp;
-                                                +(dcEdge(iEdge) **2) * coef_3rd_order*(d2fdx2_cell1 - d2fdx2_cell2) / 12. )
-!                     flux = dvEdge(iEdge) *  ru(k,iEdge) * (        &amp;
-!                                            0.5*(theta_m(k,cell1) + theta_m(k,cell2))      &amp;
-!                                            -(dcEdge(iEdge) **2) * (d2fdx2_cell1) / 6. )
-!                  else
-!                     flux = dvEdge(iEdge) *  ru(k,iEdge) * (        &amp;
-!                                            0.5*(theta_m(k,cell1) + theta_m(k,cell2))      &amp;
-!                                            -(dcEdge(iEdge) **2) * (d2fdx2_cell2) / 6. )
-                  end if
-
-                  tend_theta(k,cell1) = tend_theta(k,cell1) - flux
-                  tend_theta(k,cell2) = tend_theta(k,cell2) + flux
-
-               end do
             end if
          end do
 

</font>
</pre>