module fd4_globaldef_mod module fd4_mpi_modSubroutines and functions:
private function fd4_part_1d_scan (nblocks, blkweights_ps, pstart, sumload, guess) result (pend) public subroutine fd4_part_1d_binsrch (nblocks, nprocs, blkweights_ps, maxweight, eps, maxsteps, partition, maxload, reqsteps) public subroutine fd4_part_1d_parallel (nblocks, nprocs, blkweights_ps, maxweight, eps, maxsteps, mpi, partition, maxload, reqsteps, err) public subroutine fd4_part_1d_simple_h1 (nblocks, nprocs, blkweights_ps, partition, maxload)
Author: Matthias Lieber
private function fd4_part_1d_scan (nblocks, blkweights_ps, pstart, sumload, guess) result (pend) integer (kind=i_k), intent(in) :: nblocks real (kind=r8k), intent(in), dimension (0:nblocks) :: blkweights_ps integer (kind=i_k), intent(in) :: pstart real (kind=r8k), intent(in) :: sumload integer (kind=i_k), intent(inout) :: guess integer (kind=i_k) :: pend end function fd4_part_1d_scanParameters:
nblocks | number of blocks |
blkweights_ps | prefix sum of ordered block weights, weights(0) must be 0 |
pstart | start index for search |
sumload | upper bound for search in blkweights_ps values |
guess | guess for result value for clever algorithm |
If compiled with CLEVER_SEARCH, use a more sophisticated algorithm which starts searching at index guess. Returns a new value for guess for subsequent searches.
Internal function, called by fd4_part_1d_binsrch and fd4_part_1d_parallel.
public subroutine fd4_part_1d_binsrch (nblocks, nprocs, blkweights_ps, maxweight, eps, maxsteps, partition, maxload, reqsteps) integer (kind=i_k), intent(in) :: nblocks integer (kind=i_k), intent(in) :: nprocs real (kind=r8k), intent(in), dimension (0:nblocks) :: blkweights_ps real (kind=r8k), intent(in) :: maxweight real (kind=r8k), intent(in) :: eps integer (kind=i_k), intent(in) :: maxsteps integer (kind=i_k), intent(out), dimension (0:nprocs) :: partition real (kind=r8k), intent(out) :: maxload integer (kind=i_k), intent(out) :: reqsteps end subroutine fd4_part_1d_binsrchParameters:
nblocks | number of blocks |
nprocs | number of processes |
blkweights_ps | prefix sum of ordered block weights, weights(0) must be 0 |
maxweight | max weight in weight array |
eps | max. difference of the resulting bottleneck value from the minimal |
maxsteps | max. number of binary search steps, may stop earlier depending on eps |
partition | output partition vector, contains start indices of partitions |
maxload | load of max loaded process |
reqsteps | required number of search steps to reach the accuracy specified by eps |
The algorithm is exact for eps=0.0 and large maxsteps.
Algorithm:
Pinar, A. and C. Aykanat: Fast optimal load balancing algorithms for 1D partitioning. Journal of Parallel and Distributed Computing, 64(8):974-996, 2004.
Internal routine.
public subroutine fd4_part_1d_parallel (nblocks, nprocs, blkweights_ps, maxweight, eps, maxsteps, mpi, partition, maxload, reqsteps, err) integer (kind=i_k), intent(in) :: nblocks integer (kind=i_k), intent(in) :: nprocs real (kind=r8k), intent(in), dimension (0:nblocks) :: blkweights_ps real (kind=r8k), intent(in) :: maxweight real (kind=r8k), intent(in) :: eps integer (kind=i_k), intent(in) :: maxsteps type (fd4_mpi), intent(in) :: mpi integer (kind=i_k), intent(out), dimension (0:nprocs) :: partition real (kind=r8k), intent(out) :: maxload integer (kind=i_k), intent(out) :: reqsteps integer (kind=i_k), intent(out) :: err end subroutine fd4_part_1d_parallelParameters:
nblocks | number of blocks |
nprocs | number of processes |
blkweights_ps | prefix sum of ordered block weights, weights(0) must be 0 |
maxweight | max weight in weight array |
eps | max. difference of the resulting bottleneck value from the minimal |
maxsteps | max. number of binary search steps, may stop earlier depending on eps |
mpi | domain's MPI context |
partition | output partition vector, contains start indices of partitions |
maxload | estimation of load of max loaded process, real value is a bit smaller |
reqsteps | required number of search steps to reach the accuracy specified by eps |
err | error status: 0...ok |
The algorithm is exact for eps=0.0 and large maxsteps.
Algorithm:
Pinar, A. and C. Aykanat: Fast optimal load balancing algorithms for 1D partitioning. Journal of Parallel and Distributed Computing, 64(8):974-996, 2004.
Internal routine.
public subroutine fd4_part_1d_simple_h1 (nblocks, nprocs, blkweights_ps, partition, maxload) integer (kind=i_k), intent(in) :: nblocks integer (kind=i_k), intent(in) :: nprocs real (kind=r8k), intent(in), dimension (0:nblocks) :: blkweights_ps integer (kind=i_k), intent(out), dimension (0:nprocs) :: partition real (kind=r8k), intent(out) :: maxload end subroutine fd4_part_1d_simple_h1Parameters:
nblocks | number of blocks |
nprocs | number of processes |
blkweights_ps | prefix sum of ordered block weights, weights(0) must be 0 |
partition | output partition vector, contains start indices of partitions |
maxload | load of max loaded process |
Algorithm:
Miguet, S. und J.-M. Pierson: Heuristics for 1D rectilinear partitioning as a low cost and high quality answer to dynamic load balancing. in: High-Performance Computing and Networking, LNCS, vol. 1225, pages 550-564. Springer, 1997.
Internal routine.