<pre>
module heat_xfer_mod

   implicit none
   private

   integer, public, parameter :: P = 1000, Q = P/2
   real, public, parameter :: tolerance = 1.0e-5
   real, public, dimension(:, :), allocatable :: plate
   real, public, dimension(:, :), codimension[:,:], &
         allocatable, target :: quad
   real, public, dimension(:, :), allocatable :: temp_interior
   real, public, codimension[*] :: diff
   enum, bind(C)
      enumerator :: NW=1, SW, NE, SE 
   end enum
   real, public, pointer, dimension(:,:) :: n, e, s, w, interior
   real, public, allocatable, dimension(:) :: top, bottom, left, right
   integer :: j, image
   integer, public :: n_iter = 0, alloc_stat
   public :: set_boundary_conditions, initialize_quadrants, heat_xfer
   public :: print_plate

contains

subroutine set_boundary_conditions ()

  allocate (top (0:P+1), bottom(0:P+1), &
            left(0:P+1), right (0:P+1), &
            stat = alloc_stat)
  if (alloc_stat > 0) then
     print *, "Allocation of boundary failed on image", &
              this_image()
     stop
  end if

  top = [ 1.0, ( real(j)/P, j = P, 0, -1) ]
  left = 1.0
  right = 0.0
  bottom = 0.0

end subroutine set_boundary_conditions

subroutine initialize_quadrants ()

   allocate (quad(0:Q+1, 0:Q+1) [2,*], &
              stat = alloc_stat)
   if (alloc_stat > 0) then
      print *, "Allocation of quadrant failed on image", &
               this_image()
      stop
   end if

   allocate (temp_interior(1:Q, 1:Q), &
             stat = alloc_stat)
   if (alloc_stat > 0) then
      print *, "Allocation of temp interior failed on image", &
               this_image()
      stop
   end if

   quad = 0.0
   select case (this_image())
      case(NW)
         quad(:,0) = left(:Q+1)
         quad(0,:) = top(:Q+1)
      case(SW)
         quad(:,0) = left(Q:)
         quad(Q+1,:) = bottom(:Q+1)
      case(NE)
         quad(Q+1,:) = right(:Q+1)
         quad(0,:) = top(Q:)
      case(SE)
         quad(Q+1,:) = right(Q:)
         quad(Q+1,:) = bottom(Q:)
   end select

!  Set up pointers in all four quadrants

   interior => quad(1:Q, 1:Q)
   n => quad(0:Q-1, 1:Q  )
   s => quad(2:Q+1, 1:Q  )
   e => quad(1:Q  , 2:Q+1)
   w => quad(1:Q  , 0:Q-1)

end subroutine initialize_quadrants

subroutine heat_xfer ()

   heat_xfer_loop: do

      ! Update interior quadrant boundaries
      ! Plate boundaries have not changed
      select case (this_image())
         case(NW)
            quad(Q+1, 1:Q) = quad(1,   1:Q) [2,1] ! S
            quad(1:Q, Q+1) = quad(1:Q, 1  ) [1,2] ! E
         case(SW)
            quad(0,   1:Q) = quad(Q  , 1:Q) [1,1] ! N
            quad(1:Q, Q+1) = quad(1:Q, 1  ) [2,2] ! E
         case(NE)
            quad(Q+1, 1:Q) = quad(1,   1:Q) [2,2] ! S
            quad(1:Q, 0  ) = quad(1:Q, Q  ) [1,1] ! W
         case(SE)
            quad(0,   1:Q) = quad(Q  , 1:Q) [1,2] ! N
            quad(1:Q, 0  ) = quad(1:Q, Q  ) [2,1] ! W
      end select

      sync all

      temp_interior = (n + e + s + w) / 4

      diff = maxval(abs(interior - temp_interior))
      interior = temp_interior

      sync all

      if (this_image() == 1) then
         n_iter = n_iter + 1
         do image = 2, num_images()
            diff = max (diff, diff[image])
         end do
      end if
      sync all
      if (diff[1] < tolerance) exit heat_xfer_loop

   end do heat_xfer_loop

end subroutine heat_xfer

subroutine print_plate(x)

   real, dimension(:,:), intent(in) :: x
   integer :: line

   print *
   do line = 1, size(x, 2)
      print "(1000f5.2)", x(line, :)
   end do

end subroutine print_plate

end module heat_xfer_mod


program heat4

   use heat_xfer_mod
   implicit none

   integer :: start, stop, counts_per_second
   integer :: line
   real, dimension(:,:), allocatable, target :: a
   real, dimension(:,:), allocatable :: temp

   call set_boundary_conditions()
   sync all
   call system_clock(start, counts_per_second)
   call initialize_quadrants()
   sync all
   call heat_xfer()
   sync all

   if (this_image() == 1) then
      call system_clock(stop)
      print *, "For 4-image solution,  time = ", &
         (stop - start) / counts_per_second, " seconds"

      allocate (plate(0:P+1,0:P+1), stat = alloc_stat)
        if (alloc_stat > 0) then
           print *, "Allocation of plate failed"
           stop
        end if

      plate(0:Q,  0:Q)  = quad(0:Q,   0:Q  ) [1,1] ! NW 
      plate(Q+1:, 0:Q)  = quad(1:Q+1, 0:Q  ) [2,1] ! SW
      plate(0:Q,  Q+1:) = quad(0:Q,   1:Q+1) [1,2] ! NE
      plate(Q+1:, Q+1:) = quad(1:Q+1, 1:Q+1) [2,2] ! SE

      print *
      print *, "Number of iterations (4 images):", n_iter

!      call print_plate(plate)
   end if

   if (this_image() == 1) then
      allocate (a(0:P+1,0:P+1), temp(P,P), &
                stat = alloc_stat)
        if (alloc_stat > 0) then
           print *, "Allocation of a or temp failed"
           stop
        end if

      a = 0
      a(0, :) = top
      a(:, 0) = left
      a(:, P+1) = right
      a(P+1:, 0) = bottom

      interior => a(1:P, 1:P)
      n => a(0:P-1, 1:P  )
      s => a(2:P+1, 1:P  )
      w => a(1:P,   0:P-1)
      e => a(1:P,   2:P+1)
      
      call system_clock(start)
      n_iter = 0
      do
         temp = (n + e + w + s) / 4
         n_iter = n_iter + 1
         diff = maxval(abs(temp - interior))
         interior = temp
         if (diff < tolerance) exit
      end do

      call system_clock(stop)
      print *
      print *, "For 1-image solution,  time = ", &
            (stop - start) / counts_per_second, " seconds"

      diff = maxval(abs(plate(1:P, 1:P) - a(1:P, 1:P)))

      print *
      print *, "Number of iterations (1 image):", n_iter

!      call print_plate(a)

      print *
      print *, "Max difference between methods:", diff
         
   end if

end program heat4
</pre>
