MONC
fftsolver.F90
Go to the documentation of this file.
1 
5  use grids_mod, only : x_index, y_index, z_index
6  use state_mod, only : model_state_type
14  use mpi, only : mpi_request_null, mpi_statuses_ignore
15  implicit none
16 
17 #ifndef TEST_MODE
18  private
19 #endif
20 
21  real(kind=DEFAULT_PRECISION), dimension(:), allocatable :: cos_x, cos_y
22  real(kind=DEFAULT_PRECISION) :: pi
23  real(kind=DEFAULT_PRECISION), dimension(:,:,:), allocatable :: pt
24  integer :: fourier_space_sizes(3)
26 
28 contains
29 
33  fftsolver_get_descriptor%name="fftsolver"
34  fftsolver_get_descriptor%version=0.1
38  end function fftsolver_get_descriptor
39 
42  subroutine initialisation_callback(current_state)
43  type(model_state_type), target, intent(inout) :: current_state
44 
45  integer :: i, my_y_start, my_x_start
46 
47  if (.not. is_component_enabled(current_state%options_database, "diverr")) then
48  call log_master_log(log_error, "The FFT solver component requires the diverr component to be enabled")
49  end if
50 
51  pi=4.0_default_precision*atan(1.0_default_precision)
52 
53  fourier_space_sizes=initialise_pencil_fft(current_state, my_y_start, my_x_start)
54 
55  call init_halo_communication(current_state, get_single_field_per_halo_cell, halo_swap_state, 1, .false.)
56 
57  allocate(pt(fourier_space_sizes(z_index), fourier_space_sizes(y_index), fourier_space_sizes(x_index)))
58 
59 #ifdef U_ACTIVE
60  allocate(cos_x(fourier_space_sizes(x_index)))
61  do i=1, fourier_space_sizes(x_index)
62  cos_x(i)=(2.0_default_precision/(current_state%global_grid%configuration%horizontal%dx*&
63  current_state%global_grid%configuration%horizontal%dx))&
64  *(cos(2.0_default_precision*pi*real(((i-1)+(my_x_start-1))/2, kind=default_precision)/&
65  real(current_state%global_grid%size(X_INDEX), kind=default_precision))-1.0_default_precision)
66  end do
67 #endif
68 
69 #ifdef V_ACTIVE
70  allocate(cos_y(fourier_space_sizes(y_index)))
71  do i=1, fourier_space_sizes(y_index)
72  cos_y(i)=(2.0_default_precision/(current_state%global_grid%configuration%horizontal%dy*&
73  current_state%global_grid%configuration%horizontal%dy))&
74  *(cos(2.0_default_precision*pi*real(((i-1)+(my_y_start-1))/2, kind=default_precision)/&
75  real(current_state%global_grid%size(Y_INDEX), kind=default_precision))-1.0_default_precision)
76  end do
77 #endif
78  current_state%psrce_x_hs_send_request=mpi_request_null
79  current_state%psrce_y_hs_send_request=mpi_request_null
80  current_state%psrce_x_hs_recv_request=mpi_request_null
81  current_state%psrce_y_hs_recv_request=mpi_request_null
82  end subroutine initialisation_callback
83 
86  subroutine timestep_callback(current_state)
87  type(model_state_type), target, intent(inout) :: current_state
88 
89  integer :: start_loc(3), end_loc(3), i
90 
91  do i=1,3
92  start_loc(i)=current_state%local_grid%local_domain_start_index(i)
93  end_loc(i)=current_state%local_grid%local_domain_end_index(i)
94  end do
95 
96  call complete_psrce_calculation(current_state, current_state%local_grid%halo_size(y_index), &
97  current_state%local_grid%halo_size(x_index))
98 
99 #ifdef FFT_TEST_MODE
100  current_state%p%data=real(current_state%parallel%my_rank, kind=8) + 1.d0
101 #endif
102 
103  call perform_forward_3dfft(current_state, current_state%p%data(start_loc(z_index):end_loc(z_index), &
104  start_loc(y_index):end_loc(y_index), start_loc(x_index):end_loc(x_index)), pt)
105 
106 #ifndef FFT_TEST_MODE
107  call tridiagonal_solver(current_state, pt, fourier_space_sizes)
108 #endif
109 
110  ! Here it is complex space, distributed as needed. The other option is real space but with +1 for the last process in Y
111  ! as require that complex number - not sure the best approach. We can extract the values here from complex anyway so
112  ! worth trying that first. Operating on the size of pt rather than grid sizes. Downside of real space xform is then we have +1
113  ! in the forwards transformation
114  call perform_backwards_3dfft(current_state, pt, current_state%p%data(start_loc(z_index):end_loc(z_index), &
115  start_loc(y_index):end_loc(y_index), start_loc(x_index):end_loc(x_index)))
116 
117 #ifdef FFT_TEST_MODE
118  if (current_state%parallel%my_rank == 1) then
119  write(*,*) current_state%parallel%my_rank, current_state%p%data(:,3,3)
120  end if
121 #endif
122 
123  call blocking_halo_swap(current_state, halo_swap_state, copy_p_to_halo_buffer, &
125 
126  end subroutine timestep_callback
127 
130  subroutine finalisation_callback(current_state)
131  type(model_state_type), target, intent(inout) :: current_state
132 
133  call finalise_pencil_fft(current_state%parallel%monc_communicator)
134  deallocate(pt)
135  if (allocated(cos_x)) deallocate(cos_x)
136  if (allocated(cos_y)) deallocate(cos_y)
137  end subroutine finalisation_callback
138 
144  subroutine complete_psrce_calculation(current_state, y_halo_size, x_halo_size)
145  type(model_state_type), target, intent(inout) :: current_state
146  integer, intent(in) :: y_halo_size, x_halo_size
147 
148  integer :: ierr, combined_handles(2), i, j, k
149 
150  combined_handles(1)=current_state%psrce_x_hs_recv_request
151  combined_handles(2)=current_state%psrce_y_hs_recv_request
152  call mpi_waitall(2, combined_handles, mpi_statuses_ignore, ierr)
153 
154  do j=current_state%local_grid%local_domain_start_index(y_index), current_state%local_grid%local_domain_end_index(y_index)
155  do k=2,current_state%local_grid%size(z_index)
156 #ifdef U_ACTIVE
157  current_state%p%data(k,j,x_halo_size+1)=current_state%p%data(k,j,x_halo_size+1)-&
158  current_state%psrce_recv_buffer_x(k-1,j-x_halo_size)
159 #endif
160 #ifdef V_ACTIVE
161  if (j .gt. y_halo_size+1) current_state%p%data(k, j, x_halo_size+1)=current_state%p%data(k, j, x_halo_size+1)-&
162  current_state%global_grid%configuration%horizontal%cy * current_state%sv%data(k, j-1, x_halo_size+1)
163 #endif
164  end do
165  end do
166 
167 #ifdef V_ACTIVE
168  do i=current_state%local_grid%local_domain_start_index(x_index), current_state%local_grid%local_domain_end_index(x_index)
169  do k=2,current_state%local_grid%size(z_index)
170  current_state%p%data(k,y_halo_size+1,i)=current_state%p%data(k,y_halo_size+1,i)-&
171  current_state%psrce_recv_buffer_y(k-1,i-y_halo_size)
172  end do
173  end do
174 #endif
175 
176  combined_handles(1)=current_state%psrce_x_hs_send_request
177  combined_handles(2)=current_state%psrce_y_hs_send_request
178  call mpi_waitall(2, combined_handles, mpi_statuses_ignore, ierr)
179  end subroutine complete_psrce_calculation
180 
188  subroutine tridiagonal_solver(current_state, pressure_term, fourier_space_sizes)
189  type(model_state_type), target, intent(inout) :: current_state
190  real(kind=DEFAULT_PRECISION), dimension(:,:,:), intent(inout) :: pressure_term
191  integer, intent(in) :: fourier_space_sizes(3)
192 
193  integer :: i, j, k, j_start
194  real(kind=DEFAULT_PRECISION) :: b(fourier_space_sizes(z_index)), b1(fourier_space_sizes(z_index)), &
195  s(fourier_space_sizes(z_index)), s1(fourier_space_sizes(z_index)), cij
196 
197  do i=1,fourier_space_sizes(x_index)
198  j_start=merge(3, 1, current_state%local_grid%start(y_index) ==1 .and. current_state%local_grid%start(x_index) == 1 &
199  .and. i .lt. 3)
200  do j=j_start,fourier_space_sizes(y_index)
201 
202  cij=cos_x(i)+cos_y(j)
203  b(2)=cij-current_state%global_grid%configuration%vertical%cza(2)
204  b1(2)=1.0_default_precision/b(2)
205  s1(2)=pressure_term(2,j,i)
206 
207  do k=3,fourier_space_sizes(z_index)
208  b(k)=cij+current_state%global_grid%configuration%vertical%czg(k)
209  b1(k)=1.0_default_precision/(b(k)-current_state%global_grid%configuration%vertical%czh(k)*b1(k-1))
210  s1(k)=pressure_term(k,j,i)-current_state%global_grid%configuration%vertical%czb(k)*s1(k-1)*b1(k-1)
211  end do
212 
213  pressure_term(fourier_space_sizes(z_index),j,i)=s1(fourier_space_sizes(z_index))* b1(fourier_space_sizes(z_index))
214 
215  do k=fourier_space_sizes(z_index)-1,2,-1
216  pressure_term(k,j,i)=(s1(k)-current_state%global_grid%configuration%vertical%cza(k)*&
217  pressure_term(k+1,j,i))*b1(k)
218  end do
219  end do
220  end do
221 
222  ! Handle the zero wavenumber, which will only be on processes where the X and Y dimension starts at 1
223  if (current_state%local_grid%start(y_index) == 1 .and. current_state%local_grid%start(x_index) == 1) then
224  do i=1,2
225  do j=1,2
226  s(2)=pressure_term(2,j,i)
227  pressure_term(1,j,i)=0.0_default_precision
228  pressure_term(2,j,i)=0.0_default_precision
229  do k=3, fourier_space_sizes(z_index)
230  s(k)=pressure_term(k,j,i)
231  pressure_term(k,j,i)=(s(k-1)-current_state%global_grid%configuration%vertical%czg(k-1)*pressure_term(k-1,j,i)-&
232  current_state%global_grid%configuration%vertical%czb(k-1)*pressure_term(k-2,j,i))/&
233  current_state%global_grid%configuration%vertical%cza(k-1)
234  end do
235  end do
236  end do
237  end if
238  end subroutine tridiagonal_solver
239 
248  subroutine copy_p_to_halo_buffer(current_state, neighbour_description, dim, source_index, &
249  pid_location, current_page, source_data)
250  type(model_state_type), intent(inout) :: current_state
251  integer, intent(in) :: dim, pid_location, source_index
252  integer, intent(inout) :: current_page(:)
253  type(neighbour_description_type), intent(inout) :: neighbour_description
254  type(field_data_wrapper_type), dimension(:), intent(in), optional :: source_data
255 
256  call copy_field_to_buffer(current_state%local_grid, neighbour_description%send_halo_buffer, current_state%p%data, &
257  dim, source_index, current_page(pid_location))
258 
259  current_page(pid_location)=current_page(pid_location)+1
260  end subroutine copy_p_to_halo_buffer
261 
262  !! @param dim The dimension we receive for
263  !! @param target_index The target index for the dimension we are receiving for
264  !! @param neighbour_location The location in the local neighbour data stores of this neighbour
265  !! @param current_page The current, next, halo swap page to read from (all previous have been read and copied already)
266  !! @param source_data Optional source data which is written into
267  subroutine copy_halo_buffer_to_p(current_state, neighbour_description, dim, target_index, &
268  neighbour_location, current_page, source_data)
269  type(model_state_type), intent(inout) :: current_state
270  integer, intent(in) :: dim, target_index, neighbour_location
271  integer, intent(inout) :: current_page(:)
272  type(neighbour_description_type), intent(inout) :: neighbour_description
273  type(field_data_wrapper_type), dimension(:), intent(in), optional :: source_data
274 
275  call copy_buffer_to_field(current_state%local_grid, neighbour_description%recv_halo_buffer, current_state%p%data, &
276  dim, target_index, current_page(neighbour_location))
277 
278  current_page(neighbour_location)=current_page(neighbour_location)+1
279  end subroutine copy_halo_buffer_to_p
280 
284  subroutine perform_local_data_copy_for_p(current_state, halo_depth, involve_corners, source_data)
285  type(model_state_type), intent(inout) :: current_state
286  integer, intent(in) :: halo_depth
287  logical, intent(in) :: involve_corners
288  type(field_data_wrapper_type), dimension(:), intent(in), optional :: source_data
289 
290  call perform_local_data_copy_for_field(current_state%p%data, current_state%local_grid, &
291  current_state%parallel%my_rank, halo_depth, involve_corners)
292  end subroutine perform_local_data_copy_for_p
293 end module fftsolver_mod
subroutine tridiagonal_solver(current_state, pressure_term, fourier_space_sizes)
The tridiagonal solver which runs in Fourier space on the pressure terms. Note that because we are go...
Definition: fftsolver.F90:189
subroutine, public init_halo_communication(current_state, get_fields_per_halo_cell, halo_state, halo_depth, involve_corners)
Initialises a halo swapping state, by determining the neighbours, size of data in each swap and alloc...
subroutine, public blocking_halo_swap(current_state, halo_swap_state, copy_to_halo_buffer, perform_local_data_copy, copy_from_halo_buffer, copy_corners_to_halo_buffer, copy_from_halo_buffer_to_corner, source_data)
Performs the entire halo swap operation, this is simply a wrapper around the nonblocking initiate and...
subroutine perform_local_data_copy_for_p(current_state, halo_depth, involve_corners, source_data)
Does local data copying for P variable halo swap.
Definition: fftsolver.F90:285
subroutine, public perform_local_data_copy_for_field(field_data, local_grid, my_rank, halo_depth, involve_corners)
Will perform a a local copy for the halo data of a field.
integer, parameter, public log_error
Only log ERROR messages.
Definition: logging.F90:11
Contains the types used for communication, holding the state of communications and supporting activit...
subroutine, public finalise_pencil_fft(monc_communicator)
Cleans up allocated buffer memory.
Definition: pencilfft.F90:93
Logging utility.
Definition: logging.F90:2
Pressure solver which uses a tridiagonal algorithm operating on the pressure terms in Fourier space...
Definition: fftsolver.F90:3
type(halo_communication_type), save halo_swap_state
Definition: fftsolver.F90:25
integer, parameter, public default_precision
MPI communication type which we use for the prognostic and calculation data.
Definition: datadefn.F90:17
integer, parameter, public z_index
Grid index parameters.
Definition: grids.F90:14
subroutine complete_psrce_calculation(current_state, y_halo_size, x_halo_size)
Completes the psrce calculation by waiting on all outstanding psrce communications to complete and th...
Definition: fftsolver.F90:145
subroutine initialisation_callback(current_state)
This initialisation callback sets up the pencil fft module, allocates data for the fourier space pres...
Definition: fftsolver.F90:43
Contains common definitions for the data and datatypes used by MONC.
Definition: datadefn.F90:2
The ModelState which represents the current state of a run.
Definition: state.F90:39
logical function, public is_component_enabled(options_database, component_name)
Determines whether or not a specific component is registered and enabled.
Definition: registry.F90:334
subroutine, public log_master_log(level, message)
Will log just from the master process.
Definition: logging.F90:47
subroutine, public copy_buffer_to_field(local_grid, halo_buffer, field_data, dim, target_index, halo_page)
Copies the received buffer for a specific field to the corresponding halo data of that prognostic fie...
subroutine, public perform_forward_3dfft(current_state, source_data, target_data)
Performs a forward 3D FFT and currently results in target data which is the X, Z, Y oriented pencil N...
Definition: pencilfft.F90:115
real(kind=default_precision) pi
Definition: fftsolver.F90:22
integer function, dimension(3), public initialise_pencil_fft(current_state, my_y_start, my_x_start)
Initialises the pencil FFT functionality, this will create the transposition structures needed...
Definition: pencilfft.F90:52
Maintains the state of a halo swap and contains buffers, neighbours etc.
integer function, public get_single_field_per_halo_cell(current_state)
A very common function, which returns a single field per halo cell which is used to halo swap just on...
Interfaces and types that MONC components must specify.
subroutine copy_p_to_halo_buffer(current_state, neighbour_description, dim, source_index, pid_location, current_page, source_data)
Copies the p field data to halo buffers for a specific process in a dimension and halo cell...
Definition: fftsolver.F90:250
Provides the mechanism for halo swapping. This module contains the functionality required to determin...
subroutine, public perform_backwards_3dfft(current_state, source_data, target_data)
Performs a backwards 3D FFT and currently results in target data which is the X, Z, Y oriented pencil Note that the source_data here takes no account for the halo, it is up to caller to exclude this. This does no FFT in Z, but transposes to Y, does FFT in Y, then transposes to X and performs an FFT in that dimension. Pencil decomposition is used which has already been set up.
Definition: pencilfft.F90:138
Describes the neighbours of a process in a specific dimension and contains the communication buffers ...
subroutine, public copy_field_to_buffer(local_grid, halo_buffer, field_data, dim, source_index, halo_page)
Copies prognostic field data to send buffer for specific field, dimension, halo cell.
integer, dimension(3) fourier_space_sizes
Definition: fftsolver.F90:24
This Pencil FFT performs 3D forward and backwards FFTs using pencil decomposition. It uses FFTW for the actual FFT kernel and this module contains all the data decomposition around this. There is no FFT required in Z, so this performs FFTs in Y and X (in that order forward and reversed backwards.) The data decomposition is the complex aspect, there is the concept of forward and backwards transformations. Forward transformations will go from pencil Z to Y to X and the backwards transformations undo these, so go from X to Y to Z. Note that we use quite a lot of buffer space here, this could be cut down if Y=X dimensions so some optimisation on memory could be done there in that case.
Definition: pencilfft.F90:8
Functionality to support the different types of grid and abstraction between global grids and local o...
Definition: grids.F90:5
subroutine timestep_callback(current_state)
Timestep call back, which will transform to Fourier space, do a tridiagonal solve and then back into ...
Definition: fftsolver.F90:87
subroutine, public finalise_halo_communication(halo_swap_state)
Finalises the halo swap represented by the state by freeing up all the allocated memory.
real(kind=default_precision), dimension(:,:,:), allocatable pt
Definition: fftsolver.F90:23
type(component_descriptor_type) function, public fftsolver_get_descriptor()
Descriptor of this component for registration.
Definition: fftsolver.F90:33
subroutine copy_halo_buffer_to_p(current_state, neighbour_description, dim, target_index, neighbour_location, current_page, source_data)
Definition: fftsolver.F90:269
The model state which represents the current state of a run.
Definition: state.F90:2
subroutine finalisation_callback(current_state)
Called at MONC finalisation, will call to the pencil fft module to clean itself up and free the press...
Definition: fftsolver.F90:131
real(kind=default_precision), dimension(:), allocatable cos_x
Definition: fftsolver.F90:21
integer, parameter, public y_index
Definition: grids.F90:14
integer, parameter, public x_index
Definition: grids.F90:14
real(kind=default_precision), dimension(:), allocatable cos_y
Definition: fftsolver.F90:21
MONC component registry.
Definition: registry.F90:5