Direct M2L translation over OZ axis
Compute the following matrix-vector product: \f[ \mathrm{dst} = \beta \mathrm{dst} + \alpha L_M \mathrm{src}, \f] where \f$ \mathrm{dst} \f$ is a vector of coefficients of output spherical harmonics, \f$ \mathrm{src} \f$ is a vector of coefficients of input spherical harmonics and \f$ L_M \f$ is a matrix of multipole-to-local translation over OZ axis.
@param[in] z: OZ coordinate from new to old centers of harmonics
@param[in] src_r: Radius of old (multipole) harmonics
@param[in] dst_r: Radius of new (local) harmonics
@parma[in] pm: Maximal degree of multipole spherical harmonics
@parma[in] pl: Maximal degree of local spherical harmonics
@param[in] vscales: Normalization constants for harmonics
@param[in] m2l_ztranslate_coef:
@param[in] alpha: Scalar multipler for src_m
@param[in] src_m: Expansion in old (multipole) harmonics
@param[in] beta: Scalar multipler for dst_l
@param[inout] dst_l: Expansion in new (local) harmonics
@param[out] work: Temporary workspace of a size (pm+2)*(pm+1)
GCC$ unroll 4 GCC$ unroll 4 GCC$ unroll 4 GCC$ unroll 4
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
real(kind=rp), | intent(in) | :: | z | |||
integer, | intent(in) | :: | pm | |||
integer, | intent(in) | :: | pl | |||
real(kind=rp), | intent(in) | :: | vscales((pm+pl+1)*(pm+pl+1)) | |||
real(kind=rp), | intent(in) | :: | m2l_ztranslate_coef(pm+1,pl+1,pl+1) | |||
real(kind=rp), | intent(in) | :: | alpha | |||
real(kind=rp), | intent(in) | :: | src_m((pm+1)*(pm+1)) | |||
real(kind=rp), | intent(in) | :: | beta | |||
real(kind=rp), | intent(inout) | :: | dst_l((pl+1)*(pl+1)) | |||
real(kind=rp), | intent(out), | target | :: | work((pm+2)*(pm+1)) |
subroutine fmm_m2l_ztranslate_work(z, pm, pl, vscales, &
& m2l_ztranslate_coef, alpha, src_m, beta, dst_l, work)
! Inputs
integer, intent(in) :: pm, pl
real(rp), intent(in) :: z, vscales((pm+pl+1)*(pm+pl+1)), &
& m2l_ztranslate_coef(pm+1, pl+1, pl+1), alpha, src_m((pm+1)*(pm+1)), &
& beta
! Output
real(rp), intent(inout) :: dst_l((pl+1)*(pl+1))
! Temporary workspace
real(rp), intent(out), target :: work((pm+2)*(pm+1))
! Local variables
real(rp) :: tmp1, r1, r2, res1, res2, pow_r2
integer :: j, k, n, indj, indk1, indk2
! Pointers for temporary values of powers
real(rp), pointer :: src_m2(:), pow_r1(:)
! In case alpha is 0.0_rp just do a proper scaling of output
if (abs(alpha) < eps_rp) then
if (abs(beta) < eps_rp) then
dst_l = 0.0_rp
else
dst_l = beta * dst_l
end if
return
end if
! Now alpha is non-0.0_rp
! z cannot be 0.0_rp, as input sphere (multipole) must not intersect with
! output sphere (local)
if (abs(z) < eps_rp) then
return
end if
! Prepare pointers
n = (pm+1) ** 2
src_m2(1:n) => work(1:n)
pow_r1(1:pm+1) => work(n+1:n+pm+1)
! Get powers of r1 and r2
r1 = 1.0_rp / z
r2 = 1.0_rp / z
! This abs(r1) makes it possible to work with negative z to avoid
! unnecessary rotation to positive z
tmp1 = abs(r1)
do j = 0, pm
indj = j*j + j + 1
pow_r1(j+1) = tmp1 / vscales(indj)
tmp1 = tmp1 * r1
end do
pow_r2 = 1.0_rp
! Reorder source harmonics from (degree, order) to (order, degree)
! 0.0_rp order k=0 at first
do j = 0, pm
indj = j*j + j + 1
src_m2(j+1) = pow_r1(j+1) * src_m(indj)
end do
! Non-0.0_rp orders next, a positive k followed by a negative -k
indk1 = pm + 2
do k = 1, pm
n = pm - k + 1
indk2 = indk1 + n
!!GCC$ unroll 4
do j = k, pm
indj = j*j + j + 1
src_m2(indk1+j-k) = pow_r1(j+1) * src_m(indj+k)
src_m2(indk2+j-k) = pow_r1(j+1) * src_m(indj-k)
end do
indk1 = indk2 + n
end do
! Do actual M2L
! Overwrite output if beta is 0.0_rp
if (abs(beta) < eps_rp) then
do j = 0, pl
! Offset for dst_l
indj = j*j + j + 1
! k = 0
tmp1 = alpha * vscales(indj) * pow_r2
pow_r2 = pow_r2 * r2
res1 = 0.0_rp
!!GCC$ unroll 4
do n = 0, pm
res1 = res1 + m2l_ztranslate_coef(n+1, 1, j+1)*src_m2(n+1)
end do
dst_l(indj) = tmp1 * res1
! k != 0
!!GCC$ unroll 4
do k = 1, j
! Offsets for src_m2
indk1 = pm + 2 + (2*pm-k+2)*(k-1)
indk2 = indk1 + pm - k + 1
res1 = 0.0_rp
res2 = 0.0_rp
!!GCC$ unroll 4
do n = k, pm
res1 = res1 + &
& m2l_ztranslate_coef(n-k+1, k+1, j-k+1)* &
& src_m2(indk1+n-k)
res2 = res2 + &
& m2l_ztranslate_coef(n-k+1, k+1, j-k+1)* &
& src_m2(indk2+n-k)
end do
dst_l(indj+k) = tmp1 * res1
dst_l(indj-k) = tmp1 * res2
end do
end do
else
do j = 0, pl
! Offset for dst_l
indj = j*j + j + 1
! k = 0
tmp1 = alpha * vscales(indj) * pow_r2
pow_r2 = pow_r2 * r2
res1 = 0.0_rp
do n = 0, pm
res1 = res1 + m2l_ztranslate_coef(n+1, 1, j+1)*src_m2(n+1)
end do
dst_l(indj) = beta*dst_l(indj) + tmp1*res1
! k != 0
do k = 1, j
! Offsets for src_m2
indk1 = pm + 2 + (2*pm-k+2)*(k-1)
indk2 = indk1 + pm - k + 1
res1 = 0.0_rp
res2 = 0.0_rp
do n = k, pm
res1 = res1 + &
& m2l_ztranslate_coef(n-k+1, k+1, j-k+1)* &
& src_m2(indk1+n-k)
res2 = res2 + &
& m2l_ztranslate_coef(n-k+1, k+1, j-k+1)* &
& src_m2(indk2+n-k)
end do
dst_l(indj+k) = beta*dst_l(indj+k) + tmp1*res1
dst_l(indj-k) = beta*dst_l(indj-k) + tmp1*res2
end do
end do
end if
end subroutine fmm_m2l_ztranslate_work