module fd4_globaldef_mod module fd4_domain_mod module fd4_iter_mod module fd4_part_1d_mod module fd4_mpi_mod module hilbert_mod module hilbert2d_mod module morton_mod module morton2d_mod module mrgrnk_mod module stack_mod module stack3_mod module rbtree_fd4_block_mod module timing_modTypes:
public type part_sfc_iterVariables:
integer (kind=i_k), public, parameter :: FD4_PART_SFC_SIMPLE = 1 integer (kind=i_k), public, parameter :: FD4_PART_SFC_BINSRCH = 2 integer (kind=i_k), public, parameter :: FD4_PART_SFC_PARALLEL = 4 integer (kind=i_k), public, parameter :: FD4_PART_SFC_SIMPLE2 = 8 integer (kind=i_k), public, parameter :: FD4_PART_SFC_RB = 16 integer (kind=i_k), public, parameter :: FD4_PART_SFC_EBS = 32 integer (kind=i_k), public, parameter :: FD4_PART_SFC_HILBERT = 128 integer (kind=i_k), public, parameter :: FD4_PART_SFC_MORTON = 256Subroutines and functions:
public subroutine fd4_part_sfc_prefixsum_parallel (domain, sfctype, err, opt_sumweight, opt_maxweight, opt_consistent) public subroutine fd4_part_sfc_weight2 (domain, local_blocks, method, sfctype, binsteps, accuracy, blkmap_update, par_pfxs, p1d_root, maxwgt_comm, balance, err) public subroutine fd4_part_sfc_weight (domain, all_blocks, all_blocks_len, local_blocks, method, sfctype, blocks_changed, binsteps, accuracy, balance, err) private subroutine fd4_part_sfc_exch_weights_r8k (domain, localweights, globalweights, err, opt_root) private subroutine fd4_part_sfc_exch_weights (domain, err, opt_localweights, opt_set_blkweights) public subroutine fd4_part_sfc_noweight (domain, all_blocks, all_blocks_len, local_blocks, sfctype, add_only, opt_blocks_changed) public subroutine fd4_part_sfc_update_blkmap_bnd (domain, bbnd, err) public subroutine fd4_part_sfc_update_blkmap_loc (domain, sfciter, oldp, pstart, pend, local_blocks, err) public subroutine fd4_part_sfc_update_blkmap (domain)
Author: Matthias Lieber
See also: module hilbert_mod, module fd4_part_1d_mod, module fd4_domain_mod
public type part_sfc_iter integer (kind=i_k), dimension (3) :: p = -1 integer (kind=i_k) :: idx = -1 integer (kind=i_k) :: sfctype = -1 integer (kind=i_k), dimension (2) :: map2d = 1 type (hilbert_iter) :: hiter type (hilbert2d_iter) :: h2diter type (morton_iter) :: miter type (morton2d_iter) :: m2diter end type part_sfc_itergeneral iterator over SFC
public subroutine fd4_part_sfc_prefixsum_parallel (domain, sfctype, err, opt_sumweight, opt_maxweight, opt_consistent) type (fd4_domain), intent(inout) :: domain integer (kind=i_k), intent(in) :: sfctype integer (kind=i_k), intent(out) :: err real (kind=r8k), optional, intent(out) :: opt_sumweight real (kind=r8k), optional, intent(out) :: opt_maxweight logical, optional, intent(in) :: opt_consistent end subroutine fd4_part_sfc_prefixsum_parallelParameters:
domain | the domain |
sfctype | space-filling curve to use |
err | error status: 0...ok |
opt_sumweight | sum of all block weights (bcast'ed to all) |
opt_maxweight | local maximum weight |
opt_consistent | ensure consistent values at process borders (default: no) |
Stores result distributed in domain%balance%sfcd_blkweights_ps.
The local sfcd_blkweights_ps(0) contains always the sum weight of all blocks on ranks < current rank.
public subroutine fd4_part_sfc_weight2 (domain, local_blocks, method, sfctype, binsteps, accuracy, blkmap_update, par_pfxs, p1d_root, maxwgt_comm, balance, err) type (fd4_domain), intent(inout) :: domain type (stack3), intent(inout) :: local_blocks integer (kind=i_k), intent(in) :: method integer (kind=i_k), intent(in) :: sfctype integer (kind=i_k), intent(in) :: binsteps real (kind=r8k), intent(in) :: accuracy logical, intent(in) :: blkmap_update logical, intent(in) :: par_pfxs integer (kind=i_k), intent(in) :: p1d_root logical, intent(in) :: maxwgt_comm real (kind=r4k), intent(out) :: balance integer (kind=i_k), intent(out) :: err end subroutine fd4_part_sfc_weight2Parameters:
domain | the domain |
local_blocks | stack3 of new local partition (differs from rank to rank) |
method | 1D partitioning method to use |
sfctype | space-filling curve to use |
binsteps | max. number of steps for binary search methods |
accuracy | target ratio from best possible load balance for binary steps estimation, must be <= 1.0 |
blkmap_update | update the full block map |
par_pfxs | perform prefix sum in parallel |
p1d_root | perform 1d-part. calc. serial on rank p1d_root if p1d_root>=0 |
maxwgt_comm | determine maxweight with MPI reduce (or local comp.) |
balance | estimated balance of new partitioning |
err | error status: 0...ok |
This version only works in non-adaptive block mode and when the SFC type is never changed.
balance is not set properly by all methods.
Optimizations in comparison to fd4_part_sfc_weight:
public subroutine fd4_part_sfc_weight (domain, all_blocks, all_blocks_len, local_blocks, method, sfctype, blocks_changed, binsteps, accuracy, balance, err) type (fd4_domain), intent(inout) :: domain integer (kind=i_k), pointer, dimension (:,:) :: all_blocks integer (kind=i_k), intent(in) :: all_blocks_len type (stack3), intent(inout) :: local_blocks integer (kind=i_k), intent(in) :: method integer (kind=i_k), intent(in) :: sfctype logical, intent(in) :: blocks_changed integer (kind=i_k), intent(in) :: binsteps real (kind=r8k), intent(in) :: accuracy real (kind=r4k), intent(out) :: balance integer (kind=i_k), intent(out) :: err end subroutine fd4_part_sfc_weightParameters:
domain | the domain |
all_blocks | array of positions of the blocks in the new partion |
all_blocks_len | length of all_blocks(3,:) |
local_blocks | stack3 of new local partition (differs from rank to rank) |
method | 1D partitioning method to use |
sfctype | space-filling curve to use |
blocks_changed | tell me if the blocks have changed (created/removed) |
binsteps | max. number of steps for binary search methods |
accuracy | target ratio from best possible load balance for binary steps estimation, must be <= 1.0 |
balance | estimated balance of new partitioning |
err | error status: 0...ok |
In adaptive block mode all_blocks may be NULL() and all_blocks_len is ignored.
If blocks_changed is set to false: fd4_part_sfc_weight reuses the hilbert indexes and the ranking of blocks by the hilbert index from the last call!
THIS IS LEGACY CODE, use fd4_part_sfc_weight2 if not using adaptive block mode
private subroutine fd4_part_sfc_exch_weights_r8k (domain, localweights, globalweights, err, opt_root) type (fd4_domain), intent(inout), target :: domain real (kind=r8k), intent(in) :: localweights real (kind=r8k), intent(inout) :: globalweights integer (kind=i_k), intent(out) :: err integer (kind=i_k), optional, intent(in) :: opt_root end subroutine fd4_part_sfc_exch_weights_r8kParameters:
domain | the domain |
localweights | start of array of local block weights |
globalweights | (start-1) of output array of global block weights |
err | error status: 0...ok |
opt_root | only gather weights in rank opt_root, no allgather |
difference to fd4_part_sfc_exch_weights:
Internal routine.
private subroutine fd4_part_sfc_exch_weights (domain, err, opt_localweights, opt_set_blkweights) type (fd4_domain), intent(inout), target :: domain integer (kind=i_k), intent(out) :: err real (kind=rwk), optional, intent(in), dimension (:) :: opt_localweights logical, optional, intent(in) :: opt_set_blkweights end subroutine fd4_part_sfc_exch_weightsParameters:
domain | the domain |
err | error status: 0...ok |
opt_localweights | optional array of local block weights |
opt_set_blkweights | set domain%blkweights? default: yes, if allocated |
Updates domain%balance%blkweights.
Internal routine.
public subroutine fd4_part_sfc_noweight (domain, all_blocks, all_blocks_len, local_blocks, sfctype, add_only, opt_blocks_changed) type (fd4_domain), intent(inout) :: domain integer (kind=i_k), intent(in), dimension (:,:) :: all_blocks integer (kind=i_k), intent(in) :: all_blocks_len type (stack3), intent(inout) :: local_blocks integer (kind=i_k), intent(in) :: sfctype logical, optional, intent(in) :: add_only logical, optional, intent(in) :: opt_blocks_changed end subroutine fd4_part_sfc_noweightParameters:
domain | the domain |
all_blocks | array of positions of the blocks in the new partion |
all_blocks_len | length of all_blocks(3,:) |
local_blocks | stack3 of new local partition (differs from rank to rank) |
sfctype | space-filling curve to use |
add_only | only add new blocks, but don't touch existing blocks |
opt_blocks_changed | tell me if the blocks have changed (created/removed) |
The special option add_only effects:
public subroutine fd4_part_sfc_update_blkmap_bnd (domain, bbnd, err) type (fd4_domain), intent(inout) :: domain integer (kind=i_k), intent(in), dimension (3,2) :: bbnd integer (kind=i_k), intent(out) :: err end subroutine fd4_part_sfc_update_blkmap_bndParameters:
domain | the domain |
bbnd | bounds in block map |
err | error status: 0...ok (only L3 checking) |
public subroutine fd4_part_sfc_update_blkmap_loc (domain, sfciter, oldp, pstart, pend, local_blocks, err) type (fd4_domain), intent(inout) :: domain type (part_sfc_iter), intent(inout) :: sfciter integer (kind=i_k), intent(inout), dimension (2) :: oldp integer (kind=i_k), intent(in) :: pstart integer (kind=i_k), intent(in) :: pend type (stack3), intent(in) :: local_blocks integer (kind=i_k), intent(out) :: err end subroutine fd4_part_sfc_update_blkmap_locParameters:
domain | the domain |
sfciter | sfc iterator correctly initialized for the domain |
oldp | start and end index of old own partition |
pstart | start index of new own partition |
pend | end index of new own partition |
local_blocks | stack3 of new local partition (differs from rank to rank) |
err | error status: 0...ok |
Does not make a full update of the block map, only update:
public subroutine fd4_part_sfc_update_blkmap (domain) type (fd4_domain), intent(inout) :: domain end subroutine fd4_part_sfc_update_blkmapParameters:
domain | the domain |