Module fd4_couple_mod


Uses:
    module fd4_globaldef_mod
    module fd4_vartab_mod
    module fd4_block_mod
    module fd4_domain_mod
    module fd4_iter_mod
    module fd4_mpi_mod
    module fd4_mpitype_mod
    module fd4_comm_mod
    module fd4_stat_mod
    module fd4_neigh_mod
    module fd4_metadata_mod
    module fd4_mdsd_mod
    module fd4_part_sfc_mod
    module rbtree_fd4_mdsd_mod
    module rbtree_fd4_neigh_mod
    module rbtree_int_mod
    module rbtree_int8_mod
    module stack_mod
    module stack3_mod
    module section_mod
    module timing_mod
Types:
    public type fd4_couple_local_desc
    public type fd4_couple
Variables:
    integer (kind=i_k), private, parameter :: CPL_PUT = 1
    integer (kind=i_k), private, parameter :: CPL_GET = 2
    integer (kind=i_k), private, parameter :: CPL_GET_NOFACE = 3
    integer (kind=i_k), private, parameter :: CPL_GET_CASE = 4
    integer (kind=i_k), private, parameter :: CPL_GET_CASE_SKIP = 0
    integer (kind=i_k), private, parameter :: CPL_GET_CASE_NORMAL = 1
    integer (kind=i_k), private, parameter :: CPL_GET_CASE_CHECK_L = 2
    integer (kind=i_k), private, parameter :: CPL_GET_CASE_CHECK_R = 3
    integer (kind=i_k), private, parameter :: CPL_NEW = 1
    integer (kind=i_k), private, parameter :: CPL_CRT = 2
    integer (kind=i_k), private, parameter :: CPL_ADD = 3
    integer (kind=i_k), private, parameter :: CPL_LADD = 4
    integer (kind=i_k), private, parameter :: CPL_CMMT = 5
    integer (kind=i_k), private, parameter :: CPL_PART = 6
    integer (kind=i_k), private, parameter :: CPL_VAR = 7
    integer (kind=i_k), private, parameter :: CPL_COPY = 8
    integer (kind=i_k), public, parameter :: FD4_CPL_GET = 1
    integer (kind=i_k), public, parameter :: FD4_CPL_PUT = 2
    integer (kind=i_k), public, parameter :: FD4_CPL_BOTH = IOR (FD4_CPL_GET, FD4_CPL_PUT)
    integer (kind=i_k), private, parameter :: FD4_NEIGH_CPL = 1
    integer (kind=i_k), private, parameter :: FD4_NEIGH_FD4 = 2
    integer (kind=i_k), private, parameter, dimension (3,0:3) :: FEXT = reshape ((/ 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1 /), (/ 3, 4 /))
Subroutines and functions:
    public subroutine fd4_couple_debug_metadata (cpl, name)
    public subroutine fd4_couple_create (cpl, domain, err, opt_nvar, opt_nabnd, opt_cpldir, opt_fext, opt_cpl)
    public subroutine fd4_couple_add_partition (cpl, rank, abnd, err)
    public subroutine fd4_couple_add_var (cpl, idx, st, err, opt_cplarridx, opt_cplarridx_multi, opt_nvar)
    public subroutine fd4_couple_add_array (cpl, rank, idx, st, abnd, cplarridx, err)
    private subroutine fd4_couple_abnd_to_mdsd (cpl, rank, abnd, err)
    private subroutine fd4_couple_abnd_to_local (cpl, abnd, abnd_idx)
    public subroutine fd4_couple_set_local_3D_array (cpl, cplarridx, array, offset, err)
    public subroutine fd4_couple_set_local_4D_array (cpl, cplarridx, array4, offset, offset4, err)
    public subroutine fd4_couple_chg_local_3D_array (cpl, cplarridx, array, err)
    public subroutine fd4_couple_chg_local_4D_array (cpl, cplarridx, array4, err)
    public subroutine fd4_couple_commit (cpl, err, opt_face_overlap)
    private subroutine fd4_couple_build_cplarray_type (desc_local, abnd, bbnd, nbins, datatype, err)
    public subroutine fd4_couple_delete (cpl, err)
    public subroutine fd4_couple_put (cpl, err)
    public subroutine fd4_couple_get (cpl, err, opt_novnull)
    private subroutine fd4_couple_blocktype (domain, idx, st, ext, bnd, blocktype)
    public subroutine fd4_couple_mark_blocks (cpl, err)
    private subroutine fd4_couple_stack_global_merge (mpi, st, err, opt_stat, opt_statidx)

Coupling of FD4 to other parallel model.

Limitations:

Preprocessor options: Couple statistics: Example:
  real(r_k), allocatable :: array_Rho(:,:,:,:), array_Tmp(:,:,:,:)
  real(r_k), pointer :: arraypointer(:,:,:)
  type(fd4_couple) :: couple

  [...]
  initialize(array_Rho, array_Tmp, ...)
  [...]

  ! Create the couple context.
  call fd4_couple_create(couple, domain, err)

  ! Loop over all MPI processes to tell FD4 the extern partitioning.
  do irank=0,nproc-1
    ! Set cabnd(3,2) to the bounds of the partition of current rank.
    ! (here we simply calculate bounds for an 1D-partitioning)
    cabnd(1:3,1) = (/   1+(irank*sizeX)/nproc,     1 ,     1 /)
    cabnd(1:3,2) = (/ ((irank+1)*sizeX)/nproc, sizeY , sizeZ /)
    ! Add a couple array of current rank with given bounds.
    call fd4_couple_add_partition(couple, irank, cabnd, err)
  end do

  ! Add the variables var_Rho and var_Tmp to the couple context.
  ! FD4 assigns an index to the couple array (caidx_Rho, caidx_Tmp).
  call fd4_couple_add_var(couple, var_Rho, step, err, caidx_Rho)
  call fd4_couple_add_var(couple, var_Tmp, step, err, caidx_Tmp)

  ! Assign the pointer and offset to relevant data of the local couple array.
  ! The argument must be a pointer. Use an additional pointer to point to an
  ! allocatable where required (contiguous subsections are allowed).
  arraypointer => array_Rho(:,:,:,2)
  call fd4_couple_set_local_3D_array(couple, caidx_Rho, arraypointer, (/1,1,1/), err)
  arraypointer => array_Tmp(:,:,:,1)
  call fd4_couple_set_local_3D_array(couple, caidx_Tmp, arraypointer, (/1,1,1/), err)

  ! Commit the couple context, FD4 now checks the couple arrays and prepares MPI data types
  call fd4_couple_commit(couple, err)

  ! Finally put data to FD4's data structures
  call fd4_couple_put(couple, err)
 

Some notes about sorting of the components of the mpi datatypes we are building here:

TODO:

Author: Matthias Lieber

See also: module fd4_domain_mod


Description of Types

fd4_couple_local_desc

public type fd4_couple_local_desc
    real (kind=r_k), pointer, dimension (:,:,:) :: array => NULL ()
    real (kind=r_k), pointer, dimension (:,:,:,:) :: array4 => NULL ()
    integer (kind=i_k), dimension (3) :: offset = (/ 1, 1, 1 /)
    integer (kind=i_k) :: offset4 = 1
    integer (kind=MPI_ADDRESS_KIND) :: disp
end type fd4_couple_local_desc
Components:
array data array
array4 4-dimensional data array (for variables with bins)
offset spatial offset of the data in array, e.g. (/1,1,1/)
offset4 offset of the data in the 4th dimension
disp memory address of the array
Description of one local coupling array. Used only internally.


fd4_couple

public type fd4_couple
    type (fd4_domain), pointer :: domain => NULL ()
    integer (kind=i_k), dimension (3,2) :: loopbnd
    integer, pointer, dimension (:,:,:,:,:) :: arraytypes => NULL ()
    type (rbtree_fd4_neigh), dimension (2) :: neighstore
    integer (kind=i_k) :: ncplarrays = 0
    integer (kind=i_k) :: nvars = 0
    integer (kind=i_k), dimension (3,2) :: lbnd
    integer (kind=i_k), dimension (3) :: faceoffset = 0
    integer :: state = CPL_NEW
    type (stack3) :: desc_var
    type (stack3) :: desc_abnd
    type (stack) :: desc_mdidx
    type (fd4_couple_local_desc), pointer, dimension (:) :: desc_local => NULL ()
    integer (kind=i_k) :: ins_rank = 0
    integer (kind=i_k) :: cpldir = FD4_CPL_BOTH
    integer (kind=i_k) :: stat_put
    integer (kind=i_k) :: stat_get
    integer (kind=i_k) :: stat_type
    integer (kind=i_k) :: stat_total_us
    integer (kind=i_k) :: stat_mpi_us
    logical :: face_overlap = .true.
    integer (kind=i_k) :: partidx = 0
    type (section) :: mdsect
    type (rbtree_int) :: rankcnt
    logical :: one_mdsd_per_rank = .false.
    integer (kind=i_k) :: ini_nabnd = 1
    integer (kind=i_k) :: ini_nvar = 1
    integer (kind=i_k) :: use_fext = 1
    integer (kind=i8k), dimension (2) :: partition_id = -1
    integer (kind=i8k), dimension (2) :: partition_called = -1
    logical :: couple_arrays_changed = .true.
end type fd4_couple
Components:
domain the domain
loopbnd bounds for the loop over blocks
arraytypes pre-built MPI types for couple arrays
neighstore MPI types for each communication partner are stored in a rbtree
ncplarrays number of local couple arrays in couple description
nvars number of couple variables
lbnd union bounds of all local couple arrays
faceoffset offset to local block bounds union due to face variables
state state of couple context
desc_var (idx, step, abnd_index) of local couple arrays
desc_abnd abnd (two stack3 entries) of local couple arrays
desc_mdidx meta data subdomain index of local couple arrays
desc_local pointer and offset of local coupling arrays
ins_rank last inserted rank
cpldir direction of coupling (get/put/both)
stat_put index of transferred bytes and new built types in the statistics object
stat_get index of transferred bytes and new built types in the statistics object
stat_type index of transferred bytes and new built types in the statistics object
stat_total_us index of total time and MPI time in the statistics object (only get and put)
stat_mpi_us index of total time and MPI time in the statistics object (only get and put)
face_overlap do couple arrays for face variables overlap?
partidx index of this couple context's partition in the meta data
mdsect section describing the bounds of the meta data
rankcnt counts the couple arrays of a rank in all local mdsd's
one_mdsd_per_rank true if each rank appears in max. one mdsd
ini_nabnd initial sizes of the stacks of meta data subdomains
ini_nvar initial sizes of the stacks of meta data subdomains
use_fext extend the couple arrays of face variables by one row in face dimension
partition_id partition_id for which the types in neighstore are built
partition_called partition_called for which the types in neighstore are built
couple_arrays_changed have couple arrays been changed since last get/put?
The FD4 couple context.

Description of Subroutines and Functions

fd4_couple_debug_metadata

public subroutine fd4_couple_debug_metadata (cpl, name)
    type (fd4_couple), intent(in), target :: cpl
    character (len=*), intent(in) :: name
end subroutine fd4_couple_debug_metadata
Parameters:
cpl couple context
name file name prefix
Print some debug information about coupling meta data to files for each rank

fd4_couple_create

public subroutine fd4_couple_create (cpl, domain, err, opt_nvar, opt_nabnd, opt_cpldir, opt_fext, opt_cpl)
    type (fd4_couple), intent(inout) :: cpl
    type (fd4_domain), intent(inout), target :: domain
    integer (kind=i_k), intent(out) :: err
    integer (kind=i_k), optional, intent(in) :: opt_nvar
    integer (kind=i_k), optional, intent(in) :: opt_nabnd
    integer (kind=i_k), optional, intent(in) :: opt_cpldir
    logical, optional, intent(in) :: opt_fext
    type (fd4_couple), optional, intent(in) :: opt_cpl
end subroutine fd4_couple_create
Parameters:
cpl couple context
domain the domain
err error status: 0...ok
opt_nvar number of coupling arrays per rank (hint)
opt_nabnd number of different global coupling array bounds per rank (hint)
opt_cpldir restriction of use (get/put/both)
opt_fext extend the couple arrays of face variables? default: true
opt_cpl couple context
Create FD4 couple context.

User may pass hints for the meta data creation (opt_nvar, opt_nabnd). Giving accurate hints speeds up the process of adding coupling arrays and reduces memory consumption. If possible, make use of these hints!

With opt_cpldir you can restrict the use of the couple context to fd4_couple_get and fd4_couple_put/fd4_mark_blocks. Use the parameters FD4_CPL_GET and FD4_CPL_PUT for these purpose. The default is no restriction (FD4_CPL_BOTH). The restriction is used by fd4_couple to reduce the effort in preparing MPI data types.

If opt_cpl is given, the same extern partitioning is used. In this case, no couple array bounds must be defined (fd4_couple_add_partition or fd4_couple_add_array). The next call to define the cpl must be fd4_couple_add_var. This mechanism saves memory for the definition of extern partitionings.


fd4_couple_add_partition

public subroutine fd4_couple_add_partition (cpl, rank, abnd, err)
    type (fd4_couple), intent(inout) :: cpl
    integer (kind=i_k), intent(in) :: rank
    integer (kind=i_k), intent(in), dimension (3,2) :: abnd
    integer (kind=i_k), intent(out) :: err
end subroutine fd4_couple_add_partition
Parameters:
cpl couple context
rank owner of this partition
abnd bounds of the partition in global coordinates
err error status: 0...ok
Add an extern partition to the FD4 couple context.

Only allowed after fd4_couple_create and before fd4_couple_add_var. Consecutive calls must have equal or increasing rank arguments. All ranks must call this function for all partitions (own and all other ranks').


fd4_couple_add_var

public subroutine fd4_couple_add_var (cpl, idx, st, err, opt_cplarridx, opt_cplarridx_multi, opt_nvar)
    type (fd4_couple), intent(inout) :: cpl
    integer (kind=i_k), intent(in) :: idx
    integer (kind=i_k), intent(in) :: st
    integer (kind=i_k), intent(out) :: err
    integer (kind=i_k), optional, intent(out) :: opt_cplarridx
    integer (kind=i_k), optional, intent(out), dimension (:) :: opt_cplarridx_multi
    integer (kind=i_k), optional, intent(in) :: opt_nvar
end subroutine fd4_couple_add_var
Parameters:
cpl couple context
idx field index in vartab
st time step index
err error status: 0...ok
opt_cplarridx local couple array index
opt_cplarridx_multi local couple array indexes in case of multiple array bounds per rank
opt_nvar exact number of variables, must be present in case of multiple array bounds per rank
Add a variable to the FD4 couple context.

Only allowed after fd4_couple_add_partition and before fd4_couple_set_local_*D_array. If the couple context has been created with an optional partitioning of an existing couple context, this subroutine can be called immediately after fd4_couple_create. All ranks must call this function for the same set of variables in the same order.


fd4_couple_add_array

public subroutine fd4_couple_add_array (cpl, rank, idx, st, abnd, cplarridx, err)
    type (fd4_couple), intent(inout) :: cpl
    integer (kind=i_k), intent(in) :: rank
    integer (kind=i_k), intent(in) :: idx
    integer (kind=i_k), intent(in) :: st
    integer (kind=i_k), intent(in), dimension (3,2) :: abnd
    integer (kind=i_k), intent(inout) :: cplarridx
    integer (kind=i_k), intent(out) :: err
end subroutine fd4_couple_add_array
Parameters:
cpl couple context
rank owner of this coupling array
idx field index in vartab
st time step index
abnd bounds of the coupling array in global coordinates
cplarridx local couple array index (>0), unchanged if not local
err error status: 0...ok
Add a coupling array to the FD4 couple context.

DEPRECATED - use fd4_couple_add_partition and fd4_couple_add_var instead.

Only allowed after fd4_couple_create and before fd4_couple_set_local_*D_array. Consecutive calls must have equal or increasing rank arguments. All ranks must call this function for all coupling arrays (own and all other ranks').


fd4_couple_abnd_to_mdsd

private subroutine fd4_couple_abnd_to_mdsd (cpl, rank, abnd, err)
    type (fd4_couple), intent(inout) :: cpl
    integer (kind=i_k), intent(in) :: rank
    integer (kind=i_k), intent(in), dimension (3,2) :: abnd
    integer (kind=i_k), intent(out) :: err
end subroutine fd4_couple_abnd_to_mdsd
Parameters:
cpl couple context
rank owner of this partition
abnd bounds of the partition in global coordinates
err error status: 0...ok
Internal routine which adds a partition to the meta data subdomains.

fd4_couple_abnd_to_local

private subroutine fd4_couple_abnd_to_local (cpl, abnd, abnd_idx)
    type (fd4_couple), intent(inout) :: cpl
    integer (kind=i_k), intent(in), dimension (3,2) :: abnd
    integer (kind=i_k), intent(out) :: abnd_idx
end subroutine fd4_couple_abnd_to_local
Parameters:
cpl couple context
abnd bounds of the partition in global coordinates
abnd_idx index of the abnd in cpl%desc_abnd
Internal routine which adds a partition to local description of partitions.

This is an external partition this rank owns.


fd4_couple_set_local_3D_array

public subroutine fd4_couple_set_local_3D_array (cpl, cplarridx, array, offset, err)
    type (fd4_couple), intent(inout) :: cpl
    integer (kind=i_k), intent(in) :: cplarridx
    real (kind=r_k), pointer, dimension (:,:,:) :: array
    integer (kind=i_k), intent(in), dimension (3) :: offset
    integer (kind=i_k), intent(out) :: err
end subroutine fd4_couple_set_local_3D_array
Parameters:
cpl couple context
cplarridx local couple array index, value returned by fd4_couple_add_array
array data array
offset spatial offset of the data in array, e.g. (/1,1,1/)
err error status: 0...ok
Add a local 3D coupling array to the FD4 couple context.

Only allowed after fd4_couple_add_array and before fd4_couple_commit. Needs to be called for all local coupling arrays which have been added via fd4_couple_add_array with rank parameter == local rank.


fd4_couple_set_local_4D_array

public subroutine fd4_couple_set_local_4D_array (cpl, cplarridx, array4, offset, offset4, err)
    type (fd4_couple), intent(inout) :: cpl
    integer (kind=i_k), intent(in) :: cplarridx
    real (kind=r_k), pointer, dimension (:,:,:,:) :: array4
    integer (kind=i_k), intent(in), dimension (3) :: offset
    integer (kind=i_k), intent(in) :: offset4
    integer (kind=i_k), intent(out) :: err
end subroutine fd4_couple_set_local_4D_array
Parameters:
cpl couple context
cplarridx local couple array index, value returned by fd4_couple_add_array
array4 data array
offset spatial offset of the data in array, e.g. (/1,1,1/)
offset4 offset of the data in the 4th dimension
err error status: 0...ok
Add a local 4D coupling array to the FD4 couple context.

Only allowed after fd4_couple_add_array and before fd4_couple_commit. Needs to be called for all local coupling arrays which have been added via fd4_couple_add_array with rank parameter == local rank.


fd4_couple_chg_local_3D_array

public subroutine fd4_couple_chg_local_3D_array (cpl, cplarridx, array, err)
    type (fd4_couple), intent(inout) :: cpl
    integer (kind=i_k), intent(in) :: cplarridx
    real (kind=r_k), pointer, dimension (:,:,:) :: array
    integer (kind=i_k), intent(out) :: err
end subroutine fd4_couple_chg_local_3D_array
Parameters:
cpl couple context
cplarridx local couple array index, value returned by fd4_couple_add_array
array data array
err error status: 0...ok
Change a local 3D coupling array to the FD4 couple context.

Only allowed after fd4_couple_commit.


fd4_couple_chg_local_4D_array

public subroutine fd4_couple_chg_local_4D_array (cpl, cplarridx, array4, err)
    type (fd4_couple), intent(inout) :: cpl
    integer (kind=i_k), intent(in) :: cplarridx
    real (kind=r_k), pointer, dimension (:,:,:,:) :: array4
    integer (kind=i_k), intent(out) :: err
end subroutine fd4_couple_chg_local_4D_array
Parameters:
cpl couple context
cplarridx local couple array index, value returned by fd4_couple_add_array
array4 data array
err error status: 0...ok
Change a local 4D coupling array to the FD4 couple context.

Only allowed after fd4_couple_commit.


fd4_couple_commit

public subroutine fd4_couple_commit (cpl, err, opt_face_overlap)
    type (fd4_couple), intent(inout), target :: cpl
    integer (kind=i_k), intent(out) :: err
    logical, optional, intent(in) :: opt_face_overlap
end subroutine fd4_couple_commit
Parameters:
cpl couple context
err error status: 0...ok
opt_face_overlap Do the couple arrays for face variables overlap at the inner boundaries (one cell in face direction)? Default is yes.
Commit FD4 couple context.

fd4_couple_build_cplarray_type

private subroutine fd4_couple_build_cplarray_type (desc_local, abnd, bbnd, nbins, datatype, err)
    type (fd4_couple_local_desc), intent(in) :: desc_local
    integer (kind=i_k), intent(in), dimension (3,2) :: abnd
    integer (kind=i_k), intent(in), dimension (3,2) :: bbnd
    integer (kind=i_k), intent(in) :: nbins
    integer, intent(out) :: datatype
    integer (kind=i_k), intent(out) :: err
end subroutine fd4_couple_build_cplarray_type
Parameters:
desc_local description of couple local array
abnd bounds of couple array
bbnd bounds of this (locally non-existing) block
nbins number of bins of the variable in desc
datatype the MPI data type
err error status: 0...ok
Build an MPI data type for a couple array and an FD4 block. Internal routine.

fd4_couple_delete

public subroutine fd4_couple_delete (cpl, err)
    type (fd4_couple), intent(inout) :: cpl
    integer (kind=i_k), intent(out) :: err
end subroutine fd4_couple_delete
Parameters:
cpl couple context
err error status: 0...ok
Delete the couple context and all its memory.

fd4_couple_put

public subroutine fd4_couple_put (cpl, err)
    type (fd4_couple), intent(inout), target :: cpl
    integer (kind=i_k), intent(out) :: err
end subroutine fd4_couple_put
Parameters:
cpl couple context
err error status: 0...ok
Put data to the framework. Each process contributes several blocks of data. desc%abnd has to be set in all processes correctly. You must supply the same desc as in fd4_couple_create, the only allowed change is the pointer to the data (desc%array).

TODO:


fd4_couple_get

public subroutine fd4_couple_get (cpl, err, opt_novnull)
    type (fd4_couple), intent(inout), target :: cpl
    integer (kind=i_k), intent(out) :: err
    logical, optional, intent(in) :: opt_novnull
end subroutine fd4_couple_get
Parameters:
cpl couple context
err error status: 0...ok
opt_novnull do not initalize the receiver arrays with vnull
Get data from the framework. Each process asks for several blocks of data. desc%abnd has to be set in all processes correctly.

Additional limitation for face variables: The upper face cell plane of the blocks is just cut and not transferred to the couple arrays!

TODO:


fd4_couple_blocktype

private subroutine fd4_couple_blocktype (domain, idx, st, ext, bnd, blocktype)
    type (fd4_domain), intent(inout) :: domain
    integer (kind=i_k), intent(in) :: idx
    integer (kind=i_k), intent(in) :: st
    integer (kind=i_k), intent(in), dimension (3) :: ext
    integer (kind=i_k), intent(in), dimension (3,2) :: bnd
    integer, intent(out) :: blocktype
end subroutine fd4_couple_blocktype
Parameters:
domain the domain
idx variable index
st time step index
ext block extents
bnd bound of spatial subset
blocktype MPI data type for the block
Get an MPI data type for the variable described by (idx, st), a block of size ext, and a spatial subsection within the block described by bbnd. If an MPI data type with same properties has already been created, this type will be re-used.

Internal routine. Used in fd4_couple_put and fd4_couple_get.


fd4_couple_mark_blocks

public subroutine fd4_couple_mark_blocks (cpl, err)
    type (fd4_couple), intent(inout), target :: cpl
    integer (kind=i_k), intent(out) :: err
end subroutine fd4_couple_mark_blocks
Parameters:
cpl couple context
err error status: 0...ok
Pretend putting data to the framework and mark all framework blocks which need to be created to fully capture all threshold variables.

Use this in a 3-stage coupling process:

You must use the same parameters for fd4_couple_mark_blocks as for fd4_couple_put.

TODO:


fd4_couple_stack_global_merge

private subroutine fd4_couple_stack_global_merge (mpi, st, err, opt_stat, opt_statidx)
    type (fd4_mpi), intent(inout) :: mpi
    type (stack), intent(inout) :: st
    integer (kind=i_k), intent(out) :: err
    type (fd4_stat), optional, intent(inout) :: opt_stat
    integer (kind=i_k), optional, intent(in) :: opt_statidx
end subroutine fd4_couple_stack_global_merge
Parameters:
mpi the domain's mpi context
st the stack
err error status: 0...ok
opt_stat fd4 statistics
opt_statidx stat idx for MPI time
Merge the process-local stacks st to one global stack in every process.

Duplicates will be removed, the stack will be sorted. It is assumed that the process local stacks are already sorted.

Internal routine.