Module fd4_balance_mod


Uses:
    module fd4_globaldef_mod
    module fd4_vartab_mod
    module fd4_block_mod
    module fd4_domain_mod
    module fd4_stat_mod
    module fd4_ghostlist_mod
    module fd4_iter_mod
    module fd4_mpi_mod
    module fd4_part_sfc_mod
    module fd4_part_sfcd_mod
    module fd4_part_rcb_mod
    module fd4_part_metis_mod
    module fd4_mpitype_mod
    module fd4_neigh_mod
    module fd4_metadata_mod
    module rbtree_fd4_neigh_mod
    module rbtree_fd4_block_mod
    module rbtree_int_mod
    module stack_mod
    module stack3_mod
    module mrgrnk_mod
    module merge_mod
    module section_mod
    module timing_mod
Types:
    public type fd4_balance_statistics
Variables:
    integer (kind=i_k), public, parameter :: FD4_BALANCE_HSFC_SIMPLE = FD4_PART_SFC_SIMPLE
    integer (kind=i_k), public, parameter :: FD4_BALANCE_HSFC_SIMPLE2 = FD4_PART_SFC_SIMPLE2
    integer (kind=i_k), public, parameter :: FD4_BALANCE_HSFC_BINSRCH = FD4_PART_SFC_BINSRCH
    integer (kind=i_k), public, parameter :: FD4_BALANCE_HSFC_PARALLEL = FD4_PART_SFC_PARALLEL
    integer (kind=i_k), public, parameter :: FD4_BALANCE_HILBERT = FD4_BALANCE_HSFC_BINSRCH
    integer (kind=i_k), public, parameter :: FD4_BALANCE_METIS = 101
    integer (kind=i_k), public, parameter :: FD4_BALANCE_SERIAL = 102
    integer (kind=i_k), public, parameter :: FD4_BALANCE_RCB = 103
    integer (kind=i_k), public, parameter :: FD4_BALANCE_HSFC2_PARALLEL = 104
    integer (kind=i_k), public, parameter :: FD4_BALANCE_HSFC2_BINSRCH = 105
    integer (kind=i_k), public, parameter :: FD4_BALANCE_HSFC_DISTR = 120
    integer (kind=i_k), public, allocatable, dimension (:) :: block_migration_distance
Subroutines and functions:
    public subroutine fd4_balance_params (domain, method, opt_ubvec, opt_itr, opt_subset, opt_wfactor, opt_use_adjwgt, opt_use_vsize, opt_force, opt_lbtol, opt_ech, opt_auto, opt_weight, opt_sfctype, opt_binsteps, opt_accuracy, opt_blkmap_update, opt_groupsz, opt_wgtxmeth)
    public subroutine fd4_balance_readjust (domain, err, method, opt_ubvec, opt_itr, opt_subset, opt_wfactor, opt_force, opt_lbtol, opt_stats, opt_ech, opt_auto, opt_weight, opt_sfctype)
    public subroutine fd4_balance_set_partition (domain, err)
    private subroutine fd4_balance_global_empty_update (domain, myempties, status, starttime)
    private subroutine fd4_balance_block_migration (domain, rmblock, myblocks, nnewblocks, err, opt_stats)
    private subroutine fd4_balance_update_ghosts (domain, opt_stats)
    private subroutine fd4_balance_estimate (domain, auto, lb_benefit, balance, starttime, opt_stats)
    private subroutine fd4_balance_required_blocks (domain, blocks_changed)

FD4 load balancing & creation/removal of blocks.

Preprocessor options:

Load balancing methods (method): Choice of SFCs (opt_sfctype):

Author: Matthias Lieber

See also: module fd4_domain, module fd4_part_metis, module fd4_part_sfc, module fd4_part_rcb


Description of Types

fd4_balance_statistics

public type fd4_balance_statistics
    logical :: partition_changed = .false.
    integer (kind=i_k) :: migrated_blocks = 0
    integer (kind=i_k) :: removed_blocks = 0
    integer (kind=i_k) :: created_blocks = 0
    real (kind=r4k) :: last_measured_balance = -1.0
    real (kind=r4k) :: estimated_balance = -1.0
    integer (kind=i_k) :: allocated_blocks = 0
    integer (kind=i_k) :: deallocated_blocks = 0
    character (len=40) :: status
    real (kind=r4k) :: auto_tlost
    real (kind=r4k) :: auto_tbalance
end type fd4_balance_statistics
Components:
partition_changed global: indicates if partition has been tried to be changed
migrated_blocks global: number of migrated blocks
removed_blocks global: number of removed blocks
created_blocks global: number of created blocks
last_measured_balance global: measured load balance based on given block weights
estimated_balance global: estimated load balance of new partitioning
allocated_blocks local: number of block allocations (incl. ghosts, excl. pool cached)
deallocated_blocks local: number of block deallocations (incl. ghosts, excl. pool cached)
status global: tells what fd4_balance_readjust did and why
auto_tlost global: time lost due to imbalance (auto mode only)
auto_tbalance global: estimate time spend for load balancing (auto mode only)
type for fd4_balance_readjust to return some statistics information about load balancing (to be extended)

Description of Subroutines and Functions

fd4_balance_params

public subroutine fd4_balance_params (domain, method, opt_ubvec, opt_itr, opt_subset, opt_wfactor, opt_use_adjwgt, opt_use_vsize, opt_force, opt_lbtol, opt_ech, opt_auto, opt_weight, opt_sfctype, opt_binsteps, opt_accuracy, opt_blkmap_update, opt_groupsz, opt_wgtxmeth)
    type (fd4_domain), intent(inout), target :: domain
    integer (kind=i_k), optional, intent(in) :: method
    real (kind=r4k), optional, intent(in) :: opt_ubvec
    real (kind=r4k), optional, intent(in) :: opt_itr
    integer (kind=i_k), optional, intent(in) :: opt_subset
    real (kind=r4k), optional, intent(in) :: opt_wfactor
    logical, optional, intent(in) :: opt_use_adjwgt
    logical, optional, intent(in) :: opt_use_vsize
    logical, optional, intent(in) :: opt_force
    real (kind=r4k), optional, intent(in) :: opt_lbtol
    logical, optional, intent(in) :: opt_ech
    logical, optional, intent(in) :: opt_auto
    logical, optional, intent(in) :: opt_weight
    integer (kind=i_k), optional, intent(in) :: opt_sfctype
    integer (kind=i_k), optional, intent(in) :: opt_binsteps
    real (kind=r8k), optional, intent(in) :: opt_accuracy
    logical, optional, intent(in) :: opt_blkmap_update
    integer (kind=i_k), optional, intent(in) :: opt_groupsz
    integer (kind=i_k), optional, intent(in) :: opt_wgtxmeth
end subroutine fd4_balance_params
Parameters:
domain the domain
method load balancing method to use (see module doc above), default: FD4_PART_SFC_PARALLEL
opt_ubvec ParMetis ubvec parameter (balance tolerance), default: 1.05
opt_itr ParMetis itr parameter, default: 10.0
opt_subset ParMetis: use only subset processes for ParMetis calculation (<2...off), default: off
opt_wfactor ParMetis: multiply this with block's weight before converting to int, default: 1.0
opt_use_adjwgt ParMetis: use weights for edges? default: .true.
opt_use_vsize ParMetis: use (memory size) weights for vertices? default: .true.
opt_force force to run ghost block allocation and deallocation, default: .false.
opt_lbtol load balance tolerance, 0.0 (max. relaxed) ... 1.0 (strictest), default: 0.95
opt_ech if false, assume that empty info did not change in any block, default: .true. only in adaptive block mode
opt_auto automatic decision whether load balancing beneficial or not, default: .false.
opt_weight if false ignore block weights (i.e. assume each block has same weight), default: .true.
opt_sfctype SFC: space-filling curve type (FD4_PART_SFC_HILBERT or FD4_PART_SFC_MORTON), default: FD4_PART_SFC_HILBERT
opt_binsteps SFC: max. number of steps for binary search methods, default: 100
opt_accuracy SFC: target ratio from best possible load balance for binary steps estimation, must be <= 1.0, default: 0.99 (the SFC methods are exact if opt_accuracy is 1.0 and opt_binsteps is large enough)
opt_blkmap_update SFC: update the complete blkmap after partitioning, can be disabled when using HSFC2 methods which speeds up partitioning, default: .true.
opt_groupsz SFCD: group size for distributed/hierarchical method, default: 16
opt_wgtxmeth SFCD: method for exchanging block weights (FD4_PART_SFCD_WGTX_COLL or FD4_PART_SFCD_WGTX_MASTER), default: FD4_PART_SFCD_WGTX_COLL
Set the default parameters for the load balancing method used in fd4_balance_readjust. These default can be overriden when calling fd4_balance_readjust. One can call this routine multiple types to set a single or multiple parameters.

See also: module fd4_balance_readjust


fd4_balance_readjust

public subroutine fd4_balance_readjust (domain, err, method, opt_ubvec, opt_itr, opt_subset, opt_wfactor, opt_force, opt_lbtol, opt_stats, opt_ech, opt_auto, opt_weight, opt_sfctype)
    type (fd4_domain), intent(inout), target :: domain
    integer (kind=i_k), intent(out) :: err
    integer (kind=i_k), optional, intent(in) :: method
    real (kind=r4k), optional, intent(in) :: opt_ubvec
    real (kind=r4k), optional, intent(in) :: opt_itr
    integer (kind=i_k), optional, intent(in) :: opt_subset
    real (kind=r4k), optional, intent(in) :: opt_wfactor
    logical, optional, intent(in) :: opt_force
    real (kind=r4k), optional, intent(in) :: opt_lbtol
    type (fd4_balance_statistics), optional, intent(out) :: opt_stats
    logical, optional, intent(in) :: opt_ech
    logical, optional, intent(in) :: opt_auto
    logical, optional, intent(in) :: opt_weight
    integer (kind=i_k), optional, intent(in) :: opt_sfctype
end subroutine fd4_balance_readjust
Parameters:
domain the domain
err error status: 0...ok
method load balancing method to use (see module doc above)
opt_ubvec optional ParMetis ubvec parameter
opt_itr optional ParMetis itr parameter
opt_subset use only subset processes for ParMetis calculation (<2...off)
opt_wfactor ParMetis: multiply this with block's weight before converting to int
opt_force force to run ghost block allocation and deallocation
opt_lbtol load balance tolerance, 0.0 (max. relaxed) ... 1.0 (strictest)
opt_stats return some statistics about load balancing
opt_ech if false, assume that empty info did not change in any block
opt_auto automatic decision whether load balancing beneficial or not
opt_weight if false ignore block weights (i.e. assume each block has same weight)
opt_sfctype space-filling curve type (FD4_PART_SFC_HILBERT or FD4_PART_SFC_MORTON)
Readjust global work distrubution after an iteration over blocks.


fd4_balance_set_partition

public subroutine fd4_balance_set_partition (domain, err)
    type (fd4_domain), intent(inout), target :: domain
    integer (kind=i_k), intent(out) :: err
end subroutine fd4_balance_set_partition
Parameters:
domain the domain
err error status: 0...ok
Set partition explicitely. Change domain%blkmap before calling this subroutine. *For experts only!*

fd4_balance_global_empty_update

private subroutine fd4_balance_global_empty_update (domain, myempties, status, starttime)
    type (fd4_domain), intent(inout), target :: domain
    type (stack), intent(inout) :: myempties
    logical, intent(out) :: status
    integer (kind=i8k), intent(out) :: starttime
end subroutine fd4_balance_global_empty_update
Parameters:
domain the domain
myempties stack for internal use
status return status (true if empty info changed)
starttime time after first synchronizing global communication
Update of global empty info (domain%empties) The myempties stack will be returned allocated and can be re-used after the call. If not, myempties should be deallocated right after this function.

Internal routine for fd4_balance_readjust.


fd4_balance_block_migration

private subroutine fd4_balance_block_migration (domain, rmblock, myblocks, nnewblocks, err, opt_stats)
    type (fd4_domain), intent(inout), target :: domain
    type (stack), intent(inout) :: rmblock
    type (stack3), intent(in) :: myblocks
    integer (kind=i_k), intent(out) :: nnewblocks
    integer (kind=i_k), intent(out) :: err
    type (fd4_balance_statistics), optional, intent(inout) :: opt_stats
end subroutine fd4_balance_block_migration
Parameters:
domain the domain
rmblock stack for internal use
myblocks coordinates of local blocks (result from partitioning algorithm)
nnewblocks number of new blocks
err error status: 0...ok
opt_stats statistics about migration
Block migration.

The rmblock stack will be returned allocated and can be re-used after the call. If not, rmblock should be deallocated right after this function.

Internal routine for fd4_balance_readjust.


fd4_balance_update_ghosts

private subroutine fd4_balance_update_ghosts (domain, opt_stats)
    type (fd4_domain), intent(inout), target :: domain
    type (fd4_balance_statistics), optional, intent(inout) :: opt_stats
end subroutine fd4_balance_update_ghosts
Parameters:
domain the domain
opt_stats statistics about ghost (de)allocation
Communication / boundary ghost block update

Notes: These issues are tracked with the domain%glist(FD4_GHOST_CMM)%touched flag (1) and the local variable p_changed (2). If one of them is true after this loop, we call the (local!) partitioning "new" (by counting up the local partition_id). In this case, the ghost communication has to be adapted to the new situation.

Internal routine for fd4_balance_readjust.


fd4_balance_estimate

private subroutine fd4_balance_estimate (domain, auto, lb_benefit, balance, starttime, opt_stats)
    type (fd4_domain), intent(inout) :: domain
    logical, intent(in) :: auto
    logical, intent(out) :: lb_benefit
    real (kind=r4k), intent(out) :: balance
    integer (kind=i8k), optional, intent(out) :: starttime
    type (fd4_balance_statistics), optional, intent(out) :: opt_stats
end subroutine fd4_balance_estimate
Parameters:
domain the domain
auto automatic decision whether load balancing beneficial or not
lb_benefit if auto==.true.: is load balancing beneficial?
balance the load balance estimate: 1 = perfect, 0 = infinitely poor
starttime time after first synchronizing global communication
opt_stats return some statistics about load balancing
Determine the estimated load balance using block weights.

TODO: use only a single MPI_Allreduce

Internal routine for fd4_balance_readjust.


fd4_balance_required_blocks

private subroutine fd4_balance_required_blocks (domain, blocks_changed)
    type (fd4_domain), intent(inout), target :: domain
    logical, intent(out) :: blocks_changed
end subroutine fd4_balance_required_blocks
Parameters:
domain the domain
blocks_changed any blocks created or removed?
Find out which blocks are required.

Creates domain%balance%allblocks, array of blocks (positions) which are needed in the new partitioning using the empties array. This might be costly when using a large number of blocks.