fmm_m2l_ztranslate_work Subroutine

private subroutine fmm_m2l_ztranslate_work(z, pm, pl, vscales, m2l_ztranslate_coef, alpha, src_m, beta, dst_l, work)

Direct M2L translation over OZ axis

Compute the following matrix-vector product: \f[ \mathrm{dst} = \beta \mathrm{dst} + \alpha L_M \mathrm{src}, \f] where \f$ \mathrm{dst} \f$ is a vector of coefficients of output spherical harmonics, \f$ \mathrm{src} \f$ is a vector of coefficients of input spherical harmonics and \f$ L_M \f$ is a matrix of multipole-to-local translation over OZ axis.

@param[in] z: OZ coordinate from new to old centers of harmonics @param[in] src_r: Radius of old (multipole) harmonics @param[in] dst_r: Radius of new (local) harmonics @parma[in] pm: Maximal degree of multipole spherical harmonics @parma[in] pl: Maximal degree of local spherical harmonics @param[in] vscales: Normalization constants for harmonics @param[in] m2l_ztranslate_coef: @param[in] alpha: Scalar multipler for src_m @param[in] src_m: Expansion in old (multipole) harmonics @param[in] beta: Scalar multipler for dst_l @param[inout] dst_l: Expansion in new (local) harmonics @param[out] work: Temporary workspace of a size (pm+2)*(pm+1)

GCC$ unroll 4 GCC$ unroll 4 GCC$ unroll 4 GCC$ unroll 4

Arguments

Type IntentOptional Attributes Name
real(kind=rp), intent(in) :: z
integer, intent(in) :: pm
integer, intent(in) :: pl
real(kind=rp), intent(in) :: vscales((pm+pl+1)*(pm+pl+1))
real(kind=rp), intent(in) :: m2l_ztranslate_coef(pm+1,pl+1,pl+1)
real(kind=rp), intent(in) :: alpha
real(kind=rp), intent(in) :: src_m((pm+1)*(pm+1))
real(kind=rp), intent(in) :: beta
real(kind=rp), intent(inout) :: dst_l((pl+1)*(pl+1))
real(kind=rp), intent(out), target :: work((pm+2)*(pm+1))

Called by

proc~~fmm_m2l_ztranslate_work~~CalledByGraph proc~fmm_m2l_ztranslate_work fmm_m2l_ztranslate_work proc~fmm_m2l_rotation_work fmm_m2l_rotation_work proc~fmm_m2l_rotation_work->proc~fmm_m2l_ztranslate_work proc~fmm_m2l fmm_m2l proc~fmm_m2l->proc~fmm_m2l_rotation_work proc~tree_m2l tree_m2l proc~tree_m2l->proc~fmm_m2l proc~cart_propnear_at_ipart cart_propnear_at_ipart proc~cart_propnear_at_ipart->proc~fmm_m2l proc~fmm_solve_for_multipoles fmm_solve_for_multipoles proc~fmm_solve_for_multipoles->proc~tree_m2l proc~fmm_solve fmm_solve proc~fmm_solve->proc~tree_m2l proc~cart_prop_at_ipart cart_prop_at_ipart proc~cart_prop_at_ipart->proc~cart_propnear_at_ipart proc~prepare_fmm_ext_ipd prepare_fmm_ext_ipd proc~prepare_fmm_ext_ipd->proc~fmm_solve_for_multipoles proc~preapare_fmm_static preapare_fmm_static proc~preapare_fmm_static->proc~fmm_solve_for_multipoles proc~field_extd2d field_extD2D proc~field_extd2d->proc~prepare_fmm_ext_ipd proc~prepare_fmm_ipd prepare_fmm_ipd proc~prepare_fmm_ipd->proc~prepare_fmm_ext_ipd proc~elec_prop_m2m elec_prop_M2M proc~elec_prop_m2m->proc~preapare_fmm_static proc~elec_prop_m2d elec_prop_M2D proc~elec_prop_m2d->proc~preapare_fmm_static proc~tmatvec_otf TMatVec_otf proc~tmatvec_otf->proc~field_extd2d proc~elec_prop_d2m elec_prop_D2M proc~elec_prop_d2m->proc~prepare_fmm_ipd proc~prepare_polelec prepare_polelec proc~prepare_polelec->proc~elec_prop_m2d proc~prepare_polelec->proc~elec_prop_d2m proc~elec_prop_d2d elec_prop_D2D proc~prepare_polelec->proc~elec_prop_d2d proc~prepare_fixedelec prepare_fixedelec proc~prepare_fixedelec->proc~elec_prop_m2m proc~elec_prop_d2d->proc~prepare_fmm_ipd proc~polelec_geomgrad polelec_geomgrad proc~polelec_geomgrad->proc~prepare_polelec proc~ommp_get_polelec_energy ommp_get_polelec_energy proc~ommp_get_polelec_energy->proc~prepare_polelec proc~energy_mm_pol energy_MM_pol proc~ommp_get_polelec_energy->proc~energy_mm_pol proc~ommp_set_external_field ommp_set_external_field proc~ommp_set_external_field->proc~prepare_polelec proc~fixedelec_geomgrad fixedelec_geomgrad proc~fixedelec_geomgrad->proc~prepare_fixedelec proc~energy_mm_pol->proc~prepare_polelec proc~energy_mm_mm energy_MM_MM proc~energy_mm_mm->proc~prepare_fixedelec proc~ommp_polelec_geomgrad ommp_polelec_geomgrad proc~ommp_polelec_geomgrad->proc~polelec_geomgrad proc~c_ommp_get_polelec_energy C_ommp_get_polelec_energy proc~c_ommp_get_polelec_energy->proc~ommp_get_polelec_energy proc~ommp_get_full_ele_energy ommp_get_full_ele_energy proc~ommp_get_full_ele_energy->proc~ommp_get_polelec_energy proc~ommp_get_fixedelec_energy ommp_get_fixedelec_energy proc~ommp_get_full_ele_energy->proc~ommp_get_fixedelec_energy proc~ommp_set_external_field_nomm ommp_set_external_field_nomm proc~ommp_set_external_field_nomm->proc~ommp_set_external_field proc~ommp_full_geomgrad ommp_full_geomgrad proc~ommp_full_geomgrad->proc~polelec_geomgrad proc~ommp_full_geomgrad->proc~fixedelec_geomgrad proc~c_ommp_set_external_field_nomm C_ommp_set_external_field_nomm proc~c_ommp_set_external_field_nomm->proc~ommp_set_external_field proc~c_ommp_set_external_field C_ommp_set_external_field proc~c_ommp_set_external_field->proc~ommp_set_external_field proc~ommp_fixedelec_geomgrad ommp_fixedelec_geomgrad proc~ommp_fixedelec_geomgrad->proc~fixedelec_geomgrad proc~ommp_get_fixedelec_energy->proc~energy_mm_mm proc~c_ommp_polelec_geomgrad C_ommp_polelec_geomgrad proc~c_ommp_polelec_geomgrad->proc~ommp_polelec_geomgrad proc~c_ommp_full_geomgrad C_ommp_full_geomgrad proc~c_ommp_full_geomgrad->proc~ommp_full_geomgrad proc~c_ommp_get_full_ele_energy C_ommp_get_full_ele_energy proc~c_ommp_get_full_ele_energy->proc~ommp_get_full_ele_energy proc~c_ommp_fixedelec_geomgrad C_ommp_fixedelec_geomgrad proc~c_ommp_fixedelec_geomgrad->proc~ommp_fixedelec_geomgrad proc~ommp_get_full_energy ommp_get_full_energy proc~ommp_get_full_energy->proc~ommp_get_full_ele_energy proc~c_ommp_get_fixedelec_energy C_ommp_get_fixedelec_energy proc~c_ommp_get_fixedelec_energy->proc~ommp_get_fixedelec_energy proc~c_ommp_get_full_energy C_ommp_get_full_energy proc~c_ommp_get_full_energy->proc~ommp_get_full_energy

Contents


Source Code

subroutine fmm_m2l_ztranslate_work(z, pm, pl, vscales, &
    & m2l_ztranslate_coef, alpha, src_m, beta, dst_l, work)
! Inputs
integer, intent(in) :: pm, pl
real(rp), intent(in) :: z, vscales((pm+pl+1)*(pm+pl+1)), &
    & m2l_ztranslate_coef(pm+1, pl+1, pl+1), alpha, src_m((pm+1)*(pm+1)), &
    & beta
! Output
real(rp), intent(inout) :: dst_l((pl+1)*(pl+1))
! Temporary workspace
real(rp), intent(out), target :: work((pm+2)*(pm+1))
! Local variables
real(rp) :: tmp1, r1, r2, res1, res2, pow_r2
integer :: j, k, n, indj, indk1, indk2
! Pointers for temporary values of powers
real(rp), pointer :: src_m2(:), pow_r1(:)
! In case alpha is 0.0_rp just do a proper scaling of output
if (abs(alpha) < eps_rp) then
    if (abs(beta) < eps_rp) then
        dst_l = 0.0_rp
    else
        dst_l = beta * dst_l
    end if
    return
end if
! Now alpha is non-0.0_rp
! z cannot be 0.0_rp, as input sphere (multipole) must not intersect with
! output sphere (local)
if (abs(z) < eps_rp) then
    return
end if
! Prepare pointers
n = (pm+1) ** 2
src_m2(1:n) => work(1:n)
pow_r1(1:pm+1) => work(n+1:n+pm+1)
! Get powers of r1 and r2
r1 = 1.0_rp / z
r2 = 1.0_rp / z
! This abs(r1) makes it possible to work with negative z to avoid
! unnecessary rotation to positive z
tmp1 = abs(r1)
do j = 0, pm
    indj = j*j + j + 1
    pow_r1(j+1) = tmp1 / vscales(indj)
    tmp1 = tmp1 * r1
end do
pow_r2 = 1.0_rp
! Reorder source harmonics from (degree, order) to (order, degree)
! 0.0_rp order k=0 at first
do j = 0, pm
    indj = j*j + j + 1
    src_m2(j+1) = pow_r1(j+1) * src_m(indj)
end do
! Non-0.0_rp orders next, a positive k followed by a negative -k
indk1 = pm + 2
do k = 1, pm
    n = pm - k + 1
    indk2 = indk1 + n
    !!GCC$ unroll 4
    do j = k, pm
        indj = j*j + j + 1
        src_m2(indk1+j-k) = pow_r1(j+1) * src_m(indj+k)
        src_m2(indk2+j-k) = pow_r1(j+1) * src_m(indj-k)
    end do
    indk1 = indk2 + n
end do
! Do actual M2L
! Overwrite output if beta is 0.0_rp
if (abs(beta) < eps_rp) then
    do j = 0, pl
        ! Offset for dst_l
        indj = j*j + j + 1
        ! k = 0
        tmp1 = alpha * vscales(indj) * pow_r2
        pow_r2 = pow_r2 * r2
        res1 = 0.0_rp
        !!GCC$ unroll 4
        do n = 0, pm
            res1 = res1 + m2l_ztranslate_coef(n+1, 1, j+1)*src_m2(n+1)
        end do
        dst_l(indj) = tmp1 * res1
        ! k != 0
        !!GCC$ unroll 4
        do k = 1, j
            ! Offsets for src_m2
            indk1 = pm + 2 + (2*pm-k+2)*(k-1)
            indk2 = indk1 + pm - k + 1
            res1 = 0.0_rp
            res2 = 0.0_rp
            !!GCC$ unroll 4
            do n = k, pm
                res1 = res1 + &
                    & m2l_ztranslate_coef(n-k+1, k+1, j-k+1)* &
                    & src_m2(indk1+n-k)
                res2 = res2 + &
                    & m2l_ztranslate_coef(n-k+1, k+1, j-k+1)* &
                    & src_m2(indk2+n-k)
            end do
            dst_l(indj+k) = tmp1 * res1
            dst_l(indj-k) = tmp1 * res2
        end do
    end do
else
    do j = 0, pl
        ! Offset for dst_l
        indj = j*j + j + 1
        ! k = 0
        tmp1 = alpha * vscales(indj) * pow_r2
        pow_r2 = pow_r2 * r2
        res1 = 0.0_rp
        do n = 0, pm
            res1 = res1 + m2l_ztranslate_coef(n+1, 1, j+1)*src_m2(n+1)
        end do
        dst_l(indj) = beta*dst_l(indj) + tmp1*res1
        ! k != 0
        do k = 1, j
            ! Offsets for src_m2
            indk1 = pm + 2 + (2*pm-k+2)*(k-1)
            indk2 = indk1 + pm - k + 1
            res1 = 0.0_rp
            res2 = 0.0_rp
            do n = k, pm
                res1 = res1 + &
                    & m2l_ztranslate_coef(n-k+1, k+1, j-k+1)* &
                    & src_m2(indk1+n-k)
                res2 = res2 + &
                    & m2l_ztranslate_coef(n-k+1, k+1, j-k+1)* &
                    & src_m2(indk2+n-k)
            end do
            dst_l(indj+k) = beta*dst_l(indj+k) + tmp1*res1
            dst_l(indj-k) = beta*dst_l(indj-k) + tmp1*res2
        end do
    end do
end if
end subroutine fmm_m2l_ztranslate_work