Transform spherical harmonics in the OXZ plane
This function implements @ref fmm_sph_rotate_oxz with predefined values of parameters \p alpha=1.0 and \p beta=0.0.
@param[in] p: maximum order of spherical harmonics
@param[in] r1xz: 2D transformation matrix in the OXZ plane
@param[in] alpha: Scalar multiplier for src
@param[in] src: Coefficients of initial spherical harmonics
@param[in] beta: Scalar multipler for dst
@param[out] dst: coefficients of rotated spherical harmonics
@param[out] work: Temporary workspace of a size (2(2p+1)(2p+3))
GCC$ unroll 4 GCC$ unroll 4 GCC$ unroll 4 GCC$ unroll 4 GCC$ unroll 4 GCC$ unroll 4 GCC$ unroll 4
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
integer, | intent(in) | :: | p | |||
real(kind=rp), | intent(in) | :: | ctheta | |||
real(kind=rp), | intent(in) | :: | stheta | |||
real(kind=rp), | intent(in) | :: | alpha | |||
real(kind=rp), | intent(in) | :: | src((p+1)**2) | |||
real(kind=rp), | intent(in) | :: | beta | |||
real(kind=rp), | intent(out) | :: | dst((p+1)*(p+1)) | |||
real(kind=rp), | intent(out), | target | :: | work(4*p*p+13*p+4) |
subroutine fmm_sph_rotate_oxz_work(p, ctheta, stheta, alpha, src, beta, dst, &
& work)
! Inputs
integer, intent(in) :: p
real(rp), intent(in) :: ctheta, stheta, alpha, src((p+1)**2), beta
! Output
real(rp), intent(out) :: dst((p+1)*(p+1))
! Temporary workspace
real(rp), intent(out), target :: work(4*p*p+13*p+4)
! Local variables
real(rp) :: u, v, w, fl, fl2, tmp1, tmp2, vu(2), vv(2), vw(2), &
ctheta2, stheta2, cstheta
integer :: l, m, n, ind
! Pointers for a workspace
real(rp), pointer :: r(:, :, :), r_prev(:, :, :), scal_uvw_m(:), &
& scal_u_n(:), scal_v_n(:), scal_w_n(:), r_swap(:, :, :), vsqr(:)
!! Spherical harmonics Y_l^m with negative m transform into harmonics Y_l^m
!! with the same l and negative m, while harmonics Y_l^m with non-negative
!! m transform into harmonics Y_l^m with the same l and non-negative m.
!! Transformations for non-negative m will be stored in r(1, :, :) and for
!! negative m will be in r(2, :, :)
! In case alpha is 0.0 just scale output
if (abs(alpha) < eps_rp) then
! Set output to 0.0 if beta is also 0.0
if (abs(beta) < eps_rp) then
dst = 0.0
else
dst = beta * dst
end if
! Exit subroutine
return
end if
! Now alpha is non-0.0
! In case beta is 0.0 output is just overwritten without being read
if (abs(beta) < eps_rp) then
! Compute rotations/reflections
! l = 0
dst(1) = alpha * src(1)
if (p .eq. 0) then
return
end if
! l = 1
dst(2) = alpha * src(2)
dst(3) = alpha * (src(3)*ctheta - src(4)*stheta)
dst(4) = alpha * (src(3)*stheta + src(4)*ctheta)
if (p .eq. 1) then
return
end if
! Set pointers
l = 2 * (p+1) * (p+1)
r(1:2, 0:p, 0:p) => work(1:l)
m = 2 * l
r_prev(1:2, 0:p, 0:p) => work(l+1:m)
l = m + p + 1
scal_uvw_m(0:p) => work(m+1:l)
m = l + p
scal_u_n(0:p-1) => work(l+1:m)
l = m + p + 1
scal_v_n(0:p) => work(m+1:l)
m = l + p - 2
scal_w_n(1:p-2) => work(l+1:m)
l = m + p
vsqr(1:p) => work(m+1:l)
! l = 2, m >= 0
ctheta2 = ctheta * ctheta
cstheta = ctheta * stheta
stheta2 = stheta * stheta
r(1, 2, 2) = (ctheta2 + 1.0) / 2.0
r(1, 1, 2) = cstheta
r(1, 0, 2) = sqrt(3.0) / 2.0 * stheta2
dst(9) = alpha * (src(9)*r(1, 2, 2) + src(8)*r(1, 1, 2) + &
& src(7)*r(1, 0, 2))
r(1, 2, 1) = -cstheta
r(1, 1, 1) = ctheta2 - stheta2
r(1, 0, 1) = sqrt(3.0) * cstheta
dst(8) = alpha * (src(9)*r(1, 2, 1) + src(8)*r(1, 1, 1) + &
& src(7)*r(1, 0, 1))
r(1, 2, 0) = sqrt(3.0) / 2.0 * stheta2
r(1, 1, 0) = -sqrt(3.0) * cstheta
r(1, 0, 0) = (3.0*ctheta2-1.0) / 2.0
dst(7) = alpha * (src(9)*r(1, 2, 0) + src(8)*r(1, 1, 0) + &
& src(7)*r(1, 0, 0))
! l = 2, m < 0
r(2, 1, 1) = ctheta
r(2, 2, 1) = -stheta
dst(6) = alpha * (src(6)*r(2, 1, 1) + src(5)*r(2, 2, 1))
r(2, 1, 2) = stheta
r(2, 2, 2) = ctheta
dst(5) = alpha * (src(6)*r(2, 1, 2) + src(5)*r(2, 2, 2))
! l > 2
vsqr(1) = 1.0
vsqr(2) = 4.0
do l = 3, p
! Swap previous and current rotation matrices
r_swap => r_prev
r_prev => r
r => r_swap
! Prepare scalar factors
fl = dble(l)
fl2 = fl * fl
vsqr(l) = fl2
scal_uvw_m(0) = 1.0 / fl
!!GCC$ unroll 4
do m = 1, l-1
u = sqrt(fl2 - vsqr(m))
scal_uvw_m(m) = 1.0 / u
end do
u = 2.0 * dble(l)
u = sqrt(dble(u*(u-1.0)))
scal_uvw_m(l) = 1.0 / u
scal_u_n(0) = dble(l)
scal_v_n(0) = -sqrt(dble(l*(l-1))) / sqrt(2.0)
!!GCC$ unroll 4
do n = 1, l-2
u = sqrt(fl2-vsqr(n))
scal_u_n(n) = u
end do
!!GCC$ unroll 4
do n = 1, l-2
v = dble(l+n)
v = sqrt(v*v-v) / 2.0
scal_v_n(n) = v
w = dble(l-n)
w = -sqrt(w*w-w) / 2.0
scal_w_n(n) = w
end do
u = sqrt(dble(2*l-1))
scal_u_n(l-1) = u
v = sqrt(dble((2*l-1)*(2*l-2))) / 2.0
scal_v_n(l-1) = v
v = sqrt(dble(2*l*(2*l-1))) / 2.0
scal_v_n(l) = v
ind = l*l + l + 1
! m = l, n = l and m = -l, n = - l
vv = ctheta*r_prev(:, l-1, l-1) + r_prev(2:1:-1, l-1, l-1)
r(:, l, l) = vv * scal_v_n(l) * scal_uvw_m(l)
tmp1 = src(ind+l) * r(1, l, l)
tmp2 = src(ind-l) * r(2, l, l)
! m = l, n = l-1 and m = -l, n = 1-l
vu = stheta * r_prev(:, l-1, l-1)
vv = ctheta*r_prev(:, l-2, l-1) + r_prev(2:1:-1, l-2, l-1)
r(:, l-1, l) = (vu*scal_u_n(l-1)+vv*scal_v_n(l-1)) * scal_uvw_m(l)
tmp1 = tmp1 + src(ind+l-1)*r(1, l-1, l)
tmp2 = tmp2 + src(ind-l+1)*r(2, l-1, l)
! m = l, n = 1 and m = -l, n = -1
vu = stheta * r_prev(:, 1, l-1)
vv(1) = ctheta * r_prev(1, 0, l-1)
vv(2) = r_prev(1, 0, l-1)
vw = ctheta*r_prev(:, 2, l-1) - r_prev(2:1:-1, 2, l-1)
r(:, 1, l) = vu*scal_u_n(1) + vw*scal_w_n(1) + sqrt(2.0)*scal_v_n(1)*vv
r(:, 1, l) = r(:, 1, l) * scal_uvw_m(l)
tmp1 = tmp1 + src(ind+1)*r(1, 1, l)
tmp2 = tmp2 + src(ind-1)*r(2, 1, l)
! m = l, n = 0
u = stheta * r_prev(1, 0, l-1)
v = ctheta*r_prev(1, 1, l-1) - r_prev(2, 1, l-1)
r(1, 0, l) = (u*scal_u_n(0) + v*scal_v_n(0)) * scal_uvw_m(l)
tmp1 = tmp1 + src(ind)*r(1, 0, l)
! m = l, n = 2..l-2 and m = -l, n = 2-l..-2
!!GCC$ unroll 4
do n = 2, l-2
vu = stheta * r_prev(:, n, l-1)
vv = ctheta*r_prev(:, n-1, l-1) + r_prev(2:1:-1, n-1, l-1)
vw = ctheta*r_prev(:, n+1, l-1) - r_prev(2:1:-1, n+1, l-1)
vu = vu*scal_u_n(n) + vv*scal_v_n(n) + vw*scal_w_n(n)
r(:, n, l) = vu * scal_uvw_m(l)
tmp1 = tmp1 + src(ind+n)*r(1, n, l)
tmp2 = tmp2 + src(ind-n)*r(2, n, l)
end do
dst(ind+l) = alpha * tmp1
dst(ind-l) = alpha * tmp2
! Now deal with m = 0
! n = l and n = -l
v = -stheta * r_prev(1, l-1, 0)
u = scal_v_n(l) * scal_uvw_m(0)
r(1, l, 0) = v * u
tmp1 = src(ind+l) * r(1, l, 0)
! n = l-1
u = ctheta * r_prev(1, l-1, 0)
v = -stheta * r_prev(1, l-2, 0)
w = u*scal_u_n(l-1) + v*scal_v_n(l-1)
r(1, l-1, 0) = w * scal_uvw_m(0)
tmp1 = tmp1 + src(ind+l-1)*r(1, l-1, 0)
! n = 0
u = ctheta * r_prev(1, 0, 0)
v = -stheta * r_prev(1, 1, 0)
w = u*scal_u_n(0) + v*scal_v_n(0)
r(1, 0, 0) = w * scal_uvw_m(0)
tmp1 = tmp1 + src(ind)*r(1, 0, 0)
! n = 1
v = sqrt(2.0)*scal_v_n(1)*r_prev(1, 0, 0) + &
& scal_w_n(1)*r_prev(1, 2, 0)
u = ctheta * r_prev(1, 1, 0)
w = scal_u_n(1)*u - stheta*v
r(1, 1, 0) = w * scal_uvw_m(0)
tmp1 = tmp1 + src(ind+1)*r(1, 1, 0)
! n = 2..l-2
!!GCC$ unroll 4
do n = 2, l-2
v = scal_v_n(n)*r_prev(1, n-1, 0) + &
& scal_w_n(n)*r_prev(1, n+1, 0)
u = ctheta * r_prev(1, n, 0)
w = scal_u_n(n)*u - stheta*v
r(1, n, 0) = w * scal_uvw_m(0)
tmp1 = tmp1 + src(ind+n)*r(1, n, 0)
end do
dst(ind) = alpha * tmp1
! Now deal with m=1..l-1 and m=1-l..-1
!!GCC$ unroll 4
do m = 1, l-1
! n = l and n = -l
vv = -stheta * r_prev(:, l-1, m)
u = scal_v_n(l) * scal_uvw_m(m)
r(:, l, m) = vv * u
tmp1 = src(ind+l) * r(1, l, m)
tmp2 = src(ind-l) * r(2, l, m)
! n = l-1 and n = 1-l
vu = ctheta * r_prev(:, l-1, m)
vv = -stheta * r_prev(:, l-2, m)
vw = vu*scal_u_n(l-1) + vv*scal_v_n(l-1)
r(:, l-1, m) = vw * scal_uvw_m(m)
tmp1 = tmp1 + src(ind+l-1)*r(1, l-1, m)
tmp2 = tmp2 + src(ind-l+1)*r(2, l-1, m)
! n = 0
u = ctheta * r_prev(1, 0, m)
v = -stheta * r_prev(1, 1, m)
w = u*scal_u_n(0) + v*scal_v_n(0)
r(1, 0, m) = w * scal_uvw_m(m)
tmp1 = tmp1 + src(ind)*r(1, 0, m)
! n = 1
v = sqrt(2.0)*scal_v_n(1)*r_prev(1, 0, m) + &
& scal_w_n(1)*r_prev(1, 2, m)
u = ctheta * r_prev(1, 1, m)
w = scal_u_n(1)*u - stheta*v
r(1, 1, m) = w * scal_uvw_m(m)
tmp1 = tmp1 + src(ind+1)*r(1, 1, m)
! n = -1
u = ctheta * r_prev(2, 1, m)
w = -stheta * r_prev(2, 2, m)
v = u*scal_u_n(1) + w*scal_w_n(1)
r(2, 1, m) = v * scal_uvw_m(m)
tmp2 = tmp2 + src(ind-1)*r(2, 1, m)
! n = 2..l-2 and n = 2-l..-2
!!GCC$ unroll 4
do n = 2, l-2
vv = scal_v_n(n)*r_prev(:, n-1, m) + &
& scal_w_n(n)*r_prev(:, n+1, m)
vu = ctheta * r_prev(:, n, m)
vw = scal_u_n(n)*vu - stheta*vv
r(:, n, m) = vw * scal_uvw_m(m)
tmp1 = tmp1 + src(ind+n)*r(1, n, m)
tmp2 = tmp2 + src(ind-n)*r(2, n, m)
end do
dst(ind+m) = alpha * tmp1
dst(ind-m) = alpha * tmp2
end do
end do
else
stop "Not Implemented"
end if
end subroutine fmm_sph_rotate_oxz_work