fmm_m2l_ztranslate_work

private subroutine fmm_m2l_ztranslate_work(z, pm, pl, vscales, m2l_ztranslate_coef, alpha, src_m, beta, dst_l, work)

Direct M2L translation over OZ axis

Compute the following matrix-vector product: \f[ \mathrm{dst} = \beta \mathrm{dst} + \alpha L_M \mathrm{src}, \f] where \f$ \mathrm{dst} \f$ is a vector of coefficients of output spherical harmonics, \f$ \mathrm{src} \f$ is a vector of coefficients of input spherical harmonics and \f$ L_M \f$ is a matrix of multipole-to-local translation over OZ axis.

@param[in] z: OZ coordinate from new to old centers of harmonics @param[in] src_r: Radius of old (multipole) harmonics @param[in] dst_r: Radius of new (local) harmonics @parma[in] pm: Maximal degree of multipole spherical harmonics @parma[in] pl: Maximal degree of local spherical harmonics @param[in] vscales: Normalization constants for harmonics @param[in] m2l_ztranslate_coef: @param[in] alpha: Scalar multipler for src_m @param[in] src_m: Expansion in old (multipole) harmonics @param[in] beta: Scalar multipler for dst_l @param[inout] dst_l: Expansion in new (local) harmonics @param[out] work: Temporary workspace of a size (pm+2)*(pm+1)

GCC$ unroll 4 GCC$ unroll 4 GCC$ unroll 4 GCC$ unroll 4

Arguments

Type	Intent	Attributes		Name
real(kind=rp),	intent(in)		::	z
integer,	intent(in)		::	pm
integer,	intent(in)		::	pl
real(kind=rp),	intent(in)		::	vscales((pm+pl+1)*(pm+pl+1))
real(kind=rp),	intent(in)		::	m2l_ztranslate_coef(pm+1,pl+1,pl+1)
real(kind=rp),	intent(in)		::	alpha
real(kind=rp),	intent(in)		::	src_m((pm+1)*(pm+1))
real(kind=rp),	intent(in)		::	beta
real(kind=rp),	intent(inout)		::	dst_l((pl+1)*(pl+1))
real(kind=rp),	intent(out),	target	::	work((pm+2)*(pm+1))

Source Code

fmm_m2l_ztranslate_work

Source Code

subroutine fmm_m2l_ztranslate_work(z, pm, pl, vscales, &
    & m2l_ztranslate_coef, alpha, src_m, beta, dst_l, work)
! Inputs
integer, intent(in) :: pm, pl
real(rp), intent(in) :: z, vscales((pm+pl+1)*(pm+pl+1)), &
    & m2l_ztranslate_coef(pm+1, pl+1, pl+1), alpha, src_m((pm+1)*(pm+1)), &
    & beta
! Output
real(rp), intent(inout) :: dst_l((pl+1)*(pl+1))
! Temporary workspace
real(rp), intent(out), target :: work((pm+2)*(pm+1))
! Local variables
real(rp) :: tmp1, r1, r2, res1, res2, pow_r2
integer :: j, k, n, indj, indk1, indk2
! Pointers for temporary values of powers
real(rp), pointer :: src_m2(:), pow_r1(:)
! In case alpha is 0.0_rp just do a proper scaling of output
if (abs(alpha) < eps_rp) then
    if (abs(beta) < eps_rp) then
        dst_l = 0.0_rp
    else
        dst_l = beta * dst_l
    end if
    return
end if
! Now alpha is non-0.0_rp
! z cannot be 0.0_rp, as input sphere (multipole) must not intersect with
! output sphere (local)
if (abs(z) < eps_rp) then
    return
end if
! Prepare pointers
n = (pm+1) ** 2
src_m2(1:n) => work(1:n)
pow_r1(1:pm+1) => work(n+1:n+pm+1)
! Get powers of r1 and r2
r1 = 1.0_rp / z
r2 = 1.0_rp / z
! This abs(r1) makes it possible to work with negative z to avoid
! unnecessary rotation to positive z
tmp1 = abs(r1)
do j = 0, pm
    indj = j*j + j + 1
    pow_r1(j+1) = tmp1 / vscales(indj)
    tmp1 = tmp1 * r1
end do
pow_r2 = 1.0_rp
! Reorder source harmonics from (degree, order) to (order, degree)
! 0.0_rp order k=0 at first
do j = 0, pm
    indj = j*j + j + 1
    src_m2(j+1) = pow_r1(j+1) * src_m(indj)
end do
! Non-0.0_rp orders next, a positive k followed by a negative -k
indk1 = pm + 2
do k = 1, pm
    n = pm - k + 1
    indk2 = indk1 + n
    !!GCC$ unroll 4
    do j = k, pm
        indj = j*j + j + 1
        src_m2(indk1+j-k) = pow_r1(j+1) * src_m(indj+k)
        src_m2(indk2+j-k) = pow_r1(j+1) * src_m(indj-k)
    end do
    indk1 = indk2 + n
end do
! Do actual M2L
! Overwrite output if beta is 0.0_rp
if (abs(beta) < eps_rp) then
    do j = 0, pl
        ! Offset for dst_l
        indj = j*j + j + 1
        ! k = 0
        tmp1 = alpha * vscales(indj) * pow_r2
        pow_r2 = pow_r2 * r2
        res1 = 0.0_rp
        !!GCC$ unroll 4
        do n = 0, pm
            res1 = res1 + m2l_ztranslate_coef(n+1, 1, j+1)*src_m2(n+1)
        end do
        dst_l(indj) = tmp1 * res1
        ! k != 0
        !!GCC$ unroll 4
        do k = 1, j
            ! Offsets for src_m2
            indk1 = pm + 2 + (2*pm-k+2)*(k-1)
            indk2 = indk1 + pm - k + 1
            res1 = 0.0_rp
            res2 = 0.0_rp
            !!GCC$ unroll 4
            do n = k, pm
                res1 = res1 + &
                    & m2l_ztranslate_coef(n-k+1, k+1, j-k+1)* &
                    & src_m2(indk1+n-k)
                res2 = res2 + &
                    & m2l_ztranslate_coef(n-k+1, k+1, j-k+1)* &
                    & src_m2(indk2+n-k)
            end do
            dst_l(indj+k) = tmp1 * res1
            dst_l(indj-k) = tmp1 * res2
        end do
    end do
else
    do j = 0, pl
        ! Offset for dst_l
        indj = j*j + j + 1
        ! k = 0
        tmp1 = alpha * vscales(indj) * pow_r2
        pow_r2 = pow_r2 * r2
        res1 = 0.0_rp
        do n = 0, pm
            res1 = res1 + m2l_ztranslate_coef(n+1, 1, j+1)*src_m2(n+1)
        end do
        dst_l(indj) = beta*dst_l(indj) + tmp1*res1
        ! k != 0
        do k = 1, j
            ! Offsets for src_m2
            indk1 = pm + 2 + (2*pm-k+2)*(k-1)
            indk2 = indk1 + pm - k + 1
            res1 = 0.0_rp
            res2 = 0.0_rp
            do n = k, pm
                res1 = res1 + &
                    & m2l_ztranslate_coef(n-k+1, k+1, j-k+1)* &
                    & src_m2(indk1+n-k)
                res2 = res2 + &
                    & m2l_ztranslate_coef(n-k+1, k+1, j-k+1)* &
                    & src_m2(indk2+n-k)
            end do
            dst_l(indj+k) = beta*dst_l(indj+k) + tmp1*res1
            dst_l(indj-k) = beta*dst_l(indj-k) + tmp1*res2
        end do
    end do
end if
end subroutine fmm_m2l_ztranslate_work

fmm_m2l_ztranslate_work Subroutine

Contents

Source Code

private subroutine fmm_m2l_ztranslate_work(z, pm, pl, vscales, m2l_ztranslate_coef, alpha, src_m, beta, dst_l, work)

Arguments

Called by

Contents

Source Code

Source Code