fmm_sph_rotate_oxz_work Subroutine

private subroutine fmm_sph_rotate_oxz_work(p, ctheta, stheta, alpha, src, beta, dst, work)

Transform spherical harmonics in the OXZ plane

This function implements @ref fmm_sph_rotate_oxz with predefined values of parameters \p alpha=1.0 and \p beta=0.0.

@param[in] p: maximum order of spherical harmonics @param[in] r1xz: 2D transformation matrix in the OXZ plane @param[in] alpha: Scalar multiplier for src @param[in] src: Coefficients of initial spherical harmonics @param[in] beta: Scalar multipler for dst @param[out] dst: coefficients of rotated spherical harmonics @param[out] work: Temporary workspace of a size (2(2p+1)(2p+3))

GCC$ unroll 4 GCC$ unroll 4 GCC$ unroll 4 GCC$ unroll 4 GCC$ unroll 4 GCC$ unroll 4 GCC$ unroll 4

Arguments

Type IntentOptional Attributes Name
integer, intent(in) :: p
real(kind=rp), intent(in) :: ctheta
real(kind=rp), intent(in) :: stheta
real(kind=rp), intent(in) :: alpha
real(kind=rp), intent(in) :: src((p+1)**2)
real(kind=rp), intent(in) :: beta
real(kind=rp), intent(out) :: dst((p+1)*(p+1))
real(kind=rp), intent(out), target :: work(4*p*p+13*p+4)

Called by

proc~~fmm_sph_rotate_oxz_work~~CalledByGraph proc~fmm_sph_rotate_oxz_work fmm_sph_rotate_oxz_work proc~fmm_m2m_rotation_work fmm_m2m_rotation_work proc~fmm_m2m_rotation_work->proc~fmm_sph_rotate_oxz_work proc~fmm_l2l_rotation_work fmm_l2l_rotation_work proc~fmm_l2l_rotation_work->proc~fmm_sph_rotate_oxz_work proc~fmm_m2l_rotation_work fmm_m2l_rotation_work proc~fmm_m2l_rotation_work->proc~fmm_sph_rotate_oxz_work proc~fmm_m2m fmm_m2m proc~fmm_m2m->proc~fmm_m2m_rotation_work proc~fmm_l2l fmm_l2l proc~fmm_l2l->proc~fmm_l2l_rotation_work proc~fmm_m2l fmm_m2l proc~fmm_m2l->proc~fmm_m2l_rotation_work proc~tree_p2m tree_p2m proc~tree_p2m->proc~fmm_m2m proc~cart_propfar_at_ipart cart_propfar_at_ipart proc~cart_propfar_at_ipart->proc~fmm_l2l proc~tree_m2m tree_m2m proc~tree_m2m->proc~fmm_m2m proc~tree_m2l tree_m2l proc~tree_m2l->proc~fmm_m2l proc~tree_l2l tree_l2l proc~tree_l2l->proc~fmm_l2l proc~cart_propnear_at_ipart cart_propnear_at_ipart proc~cart_propnear_at_ipart->proc~fmm_m2l proc~fmm_solve_for_multipoles fmm_solve_for_multipoles proc~fmm_solve_for_multipoles->proc~tree_p2m proc~fmm_solve_for_multipoles->proc~tree_m2m proc~fmm_solve_for_multipoles->proc~tree_m2l proc~fmm_solve_for_multipoles->proc~tree_l2l proc~elec_prop_d2m elec_prop_D2M proc~elec_prop_d2m->proc~cart_propfar_at_ipart proc~prepare_fmm_ipd prepare_fmm_ipd proc~elec_prop_d2m->proc~prepare_fmm_ipd proc~field_extd2d field_extD2D proc~field_extd2d->proc~cart_propfar_at_ipart proc~prepare_fmm_ext_ipd prepare_fmm_ext_ipd proc~field_extd2d->proc~prepare_fmm_ext_ipd proc~elec_prop_d2d elec_prop_D2D proc~elec_prop_d2d->proc~cart_propfar_at_ipart proc~elec_prop_d2d->proc~prepare_fmm_ipd proc~elec_prop_m2m elec_prop_M2M proc~elec_prop_m2m->proc~cart_propfar_at_ipart proc~preapare_fmm_static preapare_fmm_static proc~elec_prop_m2m->proc~preapare_fmm_static proc~elec_prop_m2d elec_prop_M2D proc~elec_prop_m2d->proc~cart_propfar_at_ipart proc~elec_prop_m2d->proc~preapare_fmm_static proc~cart_prop_at_ipart cart_prop_at_ipart proc~cart_prop_at_ipart->proc~cart_propfar_at_ipart proc~cart_prop_at_ipart->proc~cart_propnear_at_ipart proc~fmm_solve fmm_solve proc~fmm_solve->proc~tree_m2m proc~fmm_solve->proc~tree_m2l proc~fmm_solve->proc~tree_l2l proc~prepare_fmm_ext_ipd->proc~fmm_solve_for_multipoles proc~preapare_fmm_static->proc~fmm_solve_for_multipoles proc~prepare_polelec prepare_polelec proc~prepare_polelec->proc~elec_prop_d2m proc~prepare_polelec->proc~elec_prop_d2d proc~prepare_polelec->proc~elec_prop_m2d proc~prepare_fixedelec prepare_fixedelec proc~prepare_fixedelec->proc~elec_prop_m2m proc~tmatvec_otf TMatVec_otf proc~tmatvec_otf->proc~field_extd2d proc~prepare_fmm_ipd->proc~prepare_fmm_ext_ipd proc~ommp_get_polelec_energy ommp_get_polelec_energy proc~ommp_get_polelec_energy->proc~prepare_polelec proc~energy_mm_pol energy_MM_pol proc~ommp_get_polelec_energy->proc~energy_mm_pol proc~ommp_set_external_field ommp_set_external_field proc~ommp_set_external_field->proc~prepare_polelec proc~polelec_geomgrad polelec_geomgrad proc~polelec_geomgrad->proc~prepare_polelec proc~energy_mm_pol->proc~prepare_polelec proc~fixedelec_geomgrad fixedelec_geomgrad proc~fixedelec_geomgrad->proc~prepare_fixedelec proc~energy_mm_mm energy_MM_MM proc~energy_mm_mm->proc~prepare_fixedelec proc~ommp_get_full_ele_energy ommp_get_full_ele_energy proc~ommp_get_full_ele_energy->proc~ommp_get_polelec_energy proc~ommp_get_fixedelec_energy ommp_get_fixedelec_energy proc~ommp_get_full_ele_energy->proc~ommp_get_fixedelec_energy proc~c_ommp_get_polelec_energy C_ommp_get_polelec_energy proc~c_ommp_get_polelec_energy->proc~ommp_get_polelec_energy proc~ommp_set_external_field_nomm ommp_set_external_field_nomm proc~ommp_set_external_field_nomm->proc~ommp_set_external_field proc~c_ommp_set_external_field C_ommp_set_external_field proc~c_ommp_set_external_field->proc~ommp_set_external_field proc~c_ommp_set_external_field_nomm C_ommp_set_external_field_nomm proc~c_ommp_set_external_field_nomm->proc~ommp_set_external_field proc~ommp_polelec_geomgrad ommp_polelec_geomgrad proc~ommp_polelec_geomgrad->proc~polelec_geomgrad proc~ommp_full_geomgrad ommp_full_geomgrad proc~ommp_full_geomgrad->proc~polelec_geomgrad proc~ommp_full_geomgrad->proc~fixedelec_geomgrad proc~ommp_fixedelec_geomgrad ommp_fixedelec_geomgrad proc~ommp_fixedelec_geomgrad->proc~fixedelec_geomgrad proc~ommp_get_fixedelec_energy->proc~energy_mm_mm proc~ommp_get_full_energy ommp_get_full_energy proc~ommp_get_full_energy->proc~ommp_get_full_ele_energy proc~c_ommp_polelec_geomgrad C_ommp_polelec_geomgrad proc~c_ommp_polelec_geomgrad->proc~ommp_polelec_geomgrad proc~c_ommp_full_geomgrad C_ommp_full_geomgrad proc~c_ommp_full_geomgrad->proc~ommp_full_geomgrad proc~c_ommp_fixedelec_geomgrad C_ommp_fixedelec_geomgrad proc~c_ommp_fixedelec_geomgrad->proc~ommp_fixedelec_geomgrad proc~c_ommp_get_full_ele_energy C_ommp_get_full_ele_energy proc~c_ommp_get_full_ele_energy->proc~ommp_get_full_ele_energy proc~c_ommp_get_fixedelec_energy C_ommp_get_fixedelec_energy proc~c_ommp_get_fixedelec_energy->proc~ommp_get_fixedelec_energy proc~c_ommp_get_full_energy C_ommp_get_full_energy proc~c_ommp_get_full_energy->proc~ommp_get_full_energy

Contents


Source Code

    subroutine fmm_sph_rotate_oxz_work(p, ctheta, stheta, alpha, src, beta, dst, &
        & work)
    ! Inputs
    integer, intent(in) :: p
    real(rp), intent(in) :: ctheta, stheta, alpha, src((p+1)**2), beta
    ! Output
    real(rp), intent(out) :: dst((p+1)*(p+1))
    ! Temporary workspace
    real(rp), intent(out), target :: work(4*p*p+13*p+4)
    ! Local variables
    real(rp) :: u, v, w, fl, fl2, tmp1, tmp2, vu(2), vv(2), vw(2), &
        ctheta2, stheta2, cstheta
    integer :: l, m, n, ind
    ! Pointers for a workspace
    real(rp), pointer :: r(:, :, :), r_prev(:, :, :), scal_uvw_m(:), &
        & scal_u_n(:), scal_v_n(:), scal_w_n(:), r_swap(:, :, :), vsqr(:)
    !! Spherical harmonics Y_l^m with negative m transform into harmonics Y_l^m
    !! with the same l and negative m, while harmonics Y_l^m with non-negative
    !! m transform into harmonics Y_l^m with the same l and non-negative m.
    !! Transformations for non-negative m will be stored in r(1, :, :) and for
    !! negative m will be in r(2, :, :)
    ! In case alpha is 0.0 just scale output
    if (abs(alpha) < eps_rp) then
        ! Set output to 0.0 if beta is also 0.0
        if (abs(beta) < eps_rp) then
            dst = 0.0
        else
            dst = beta * dst
        end if
        ! Exit subroutine
        return
    end if
    ! Now alpha is non-0.0
    ! In case beta is 0.0 output is just overwritten without being read
    if (abs(beta) < eps_rp) then
        ! Compute rotations/reflections
        ! l = 0
        dst(1) = alpha * src(1)
        if (p .eq. 0) then
            return
        end if
        ! l = 1
        dst(2) = alpha * src(2)
        dst(3) = alpha * (src(3)*ctheta - src(4)*stheta)
        dst(4) = alpha * (src(3)*stheta + src(4)*ctheta)
        if (p .eq. 1) then
            return
        end if
        ! Set pointers
        l = 2 * (p+1) * (p+1)
        r(1:2, 0:p, 0:p) => work(1:l)
        m = 2 * l
        r_prev(1:2, 0:p, 0:p) => work(l+1:m)
        l = m + p + 1
        scal_uvw_m(0:p) => work(m+1:l)
        m = l + p
        scal_u_n(0:p-1) => work(l+1:m)
        l = m + p + 1
        scal_v_n(0:p) => work(m+1:l)
        m = l + p - 2
        scal_w_n(1:p-2) => work(l+1:m)
        l = m + p
        vsqr(1:p) => work(m+1:l)
        ! l = 2, m >= 0
        ctheta2 = ctheta * ctheta
        cstheta = ctheta * stheta
        stheta2 = stheta * stheta
        r(1, 2, 2) = (ctheta2 + 1.0) / 2.0
        r(1, 1, 2) = cstheta
        r(1, 0, 2) = sqrt(3.0) / 2.0 * stheta2
        dst(9) = alpha * (src(9)*r(1, 2, 2) + src(8)*r(1, 1, 2) + &
            & src(7)*r(1, 0, 2))
        r(1, 2, 1) = -cstheta
        r(1, 1, 1) = ctheta2 - stheta2
        r(1, 0, 1) = sqrt(3.0) * cstheta
        dst(8) = alpha * (src(9)*r(1, 2, 1) + src(8)*r(1, 1, 1) + &
            & src(7)*r(1, 0, 1))
        r(1, 2, 0) = sqrt(3.0) / 2.0 * stheta2
        r(1, 1, 0) = -sqrt(3.0) * cstheta
        r(1, 0, 0) = (3.0*ctheta2-1.0) / 2.0
        dst(7) = alpha * (src(9)*r(1, 2, 0) + src(8)*r(1, 1, 0) + &
            & src(7)*r(1, 0, 0))
        ! l = 2,  m < 0
        r(2, 1, 1) = ctheta
        r(2, 2, 1) = -stheta
        dst(6) = alpha * (src(6)*r(2, 1, 1) + src(5)*r(2, 2, 1))
        r(2, 1, 2) = stheta
        r(2, 2, 2) = ctheta
        dst(5) = alpha * (src(6)*r(2, 1, 2) + src(5)*r(2, 2, 2))
        ! l > 2
        vsqr(1) = 1.0
        vsqr(2) = 4.0
        do l = 3, p
            ! Swap previous and current rotation matrices
            r_swap => r_prev
            r_prev => r
            r => r_swap
            ! Prepare scalar factors
            fl = dble(l)
            fl2 = fl * fl
            vsqr(l) = fl2
            scal_uvw_m(0) = 1.0 / fl
            !!GCC$ unroll 4
            do m = 1, l-1
                u = sqrt(fl2 - vsqr(m))
                scal_uvw_m(m) = 1.0 / u
            end do
            u = 2.0 * dble(l)
            u = sqrt(dble(u*(u-1.0)))
            scal_uvw_m(l) = 1.0 / u
            scal_u_n(0) = dble(l)
            scal_v_n(0) = -sqrt(dble(l*(l-1))) / sqrt(2.0)
            !!GCC$ unroll 4
            do n = 1, l-2
                u = sqrt(fl2-vsqr(n))
                scal_u_n(n) = u
            end do
            !!GCC$ unroll 4
            do n = 1, l-2
                v = dble(l+n)
                v = sqrt(v*v-v) / 2.0
                scal_v_n(n) = v
                w = dble(l-n)
                w = -sqrt(w*w-w) / 2.0
                scal_w_n(n) = w
            end do
            u = sqrt(dble(2*l-1))
            scal_u_n(l-1) = u
            v = sqrt(dble((2*l-1)*(2*l-2))) / 2.0
            scal_v_n(l-1) = v
            v = sqrt(dble(2*l*(2*l-1))) / 2.0
            scal_v_n(l) = v
            ind = l*l + l + 1
            ! m = l, n = l and m = -l, n = - l
            vv = ctheta*r_prev(:, l-1, l-1) + r_prev(2:1:-1, l-1, l-1)
            r(:, l, l) = vv * scal_v_n(l) * scal_uvw_m(l)
            tmp1 = src(ind+l) * r(1, l, l)
            tmp2 = src(ind-l) * r(2, l, l)
            ! m = l, n = l-1 and m = -l, n = 1-l
            vu = stheta * r_prev(:, l-1, l-1)
            vv = ctheta*r_prev(:, l-2, l-1) + r_prev(2:1:-1, l-2, l-1)
            r(:, l-1, l) = (vu*scal_u_n(l-1)+vv*scal_v_n(l-1)) * scal_uvw_m(l)
            tmp1 = tmp1 + src(ind+l-1)*r(1, l-1, l)
            tmp2 = tmp2 + src(ind-l+1)*r(2, l-1, l)
            ! m = l, n = 1 and m = -l, n = -1
            vu = stheta * r_prev(:, 1, l-1)
            vv(1) = ctheta * r_prev(1, 0, l-1)
            vv(2) = r_prev(1, 0, l-1)
            vw = ctheta*r_prev(:, 2, l-1) - r_prev(2:1:-1, 2, l-1)
            r(:, 1, l) = vu*scal_u_n(1) + vw*scal_w_n(1) + sqrt(2.0)*scal_v_n(1)*vv
            r(:, 1, l) = r(:, 1, l) * scal_uvw_m(l)
            tmp1 = tmp1 + src(ind+1)*r(1, 1, l)
            tmp2 = tmp2 + src(ind-1)*r(2, 1, l)
            ! m = l, n = 0
            u = stheta * r_prev(1, 0, l-1)
            v = ctheta*r_prev(1, 1, l-1) - r_prev(2, 1, l-1)
            r(1, 0, l) = (u*scal_u_n(0) + v*scal_v_n(0)) * scal_uvw_m(l)
            tmp1 = tmp1 + src(ind)*r(1, 0, l)
            ! m = l, n = 2..l-2 and m = -l, n = 2-l..-2
            !!GCC$ unroll 4
            do n = 2, l-2
                vu = stheta * r_prev(:, n, l-1)
                vv = ctheta*r_prev(:, n-1, l-1) + r_prev(2:1:-1, n-1, l-1)
                vw = ctheta*r_prev(:, n+1, l-1) - r_prev(2:1:-1, n+1, l-1)
                vu = vu*scal_u_n(n) + vv*scal_v_n(n) + vw*scal_w_n(n)
                r(:, n, l) = vu * scal_uvw_m(l)
                tmp1 = tmp1 + src(ind+n)*r(1, n, l)
                tmp2 = tmp2 + src(ind-n)*r(2, n, l)
            end do
            dst(ind+l) = alpha * tmp1
            dst(ind-l) = alpha * tmp2
            ! Now deal with m = 0
            ! n = l and n = -l
            v = -stheta * r_prev(1, l-1, 0)
            u = scal_v_n(l) * scal_uvw_m(0)
            r(1, l, 0) = v * u
            tmp1 = src(ind+l) * r(1, l, 0)
            ! n = l-1
            u = ctheta * r_prev(1, l-1, 0)
            v = -stheta * r_prev(1, l-2, 0)
            w = u*scal_u_n(l-1) + v*scal_v_n(l-1)
            r(1, l-1, 0) = w * scal_uvw_m(0)
            tmp1 = tmp1 + src(ind+l-1)*r(1, l-1, 0)
            ! n = 0
            u = ctheta * r_prev(1, 0, 0)
            v = -stheta * r_prev(1, 1, 0)
            w = u*scal_u_n(0) + v*scal_v_n(0)
            r(1, 0, 0) = w * scal_uvw_m(0)
            tmp1 = tmp1 + src(ind)*r(1, 0, 0)
            ! n = 1
            v = sqrt(2.0)*scal_v_n(1)*r_prev(1, 0, 0) + &
                & scal_w_n(1)*r_prev(1, 2, 0)
            u = ctheta * r_prev(1, 1, 0)
            w = scal_u_n(1)*u - stheta*v
            r(1, 1, 0) = w * scal_uvw_m(0)
            tmp1 = tmp1 + src(ind+1)*r(1, 1, 0)
            ! n = 2..l-2
            !!GCC$ unroll 4
            do n = 2, l-2
                v = scal_v_n(n)*r_prev(1, n-1, 0) + &
                    & scal_w_n(n)*r_prev(1, n+1, 0)
                u = ctheta * r_prev(1, n, 0)
                w = scal_u_n(n)*u - stheta*v
                r(1, n, 0) = w * scal_uvw_m(0)
                tmp1 = tmp1 + src(ind+n)*r(1, n, 0)
            end do
            dst(ind) = alpha * tmp1
            ! Now deal with m=1..l-1 and m=1-l..-1
            !!GCC$ unroll 4
            do m = 1, l-1
                ! n = l and n = -l
                vv = -stheta * r_prev(:, l-1, m)
                u = scal_v_n(l) * scal_uvw_m(m)
                r(:, l, m) = vv * u
                tmp1 = src(ind+l) * r(1, l, m)
                tmp2 = src(ind-l) * r(2, l, m)
                ! n = l-1 and n = 1-l
                vu = ctheta * r_prev(:, l-1, m)
                vv = -stheta * r_prev(:, l-2, m)
                vw = vu*scal_u_n(l-1) + vv*scal_v_n(l-1)
                r(:, l-1, m) = vw * scal_uvw_m(m)
                tmp1 = tmp1 + src(ind+l-1)*r(1, l-1, m)
                tmp2 = tmp2 + src(ind-l+1)*r(2, l-1, m)
                ! n = 0
                u = ctheta * r_prev(1, 0, m)
                v = -stheta * r_prev(1, 1, m)
                w = u*scal_u_n(0) + v*scal_v_n(0)
                r(1, 0, m) = w * scal_uvw_m(m)
                tmp1 = tmp1 + src(ind)*r(1, 0, m)
                ! n = 1
                v = sqrt(2.0)*scal_v_n(1)*r_prev(1, 0, m) + &
                    & scal_w_n(1)*r_prev(1, 2, m)
                u = ctheta * r_prev(1, 1, m)
                w = scal_u_n(1)*u - stheta*v
                r(1, 1, m) = w * scal_uvw_m(m)
                tmp1 = tmp1 + src(ind+1)*r(1, 1, m)
                ! n = -1
                u = ctheta * r_prev(2, 1, m)
                w = -stheta * r_prev(2, 2, m)
                v = u*scal_u_n(1) + w*scal_w_n(1)
                r(2, 1, m) = v * scal_uvw_m(m)
                tmp2 = tmp2 + src(ind-1)*r(2, 1, m)
                ! n = 2..l-2 and n = 2-l..-2
                !!GCC$ unroll 4
                do n = 2, l-2
                    vv = scal_v_n(n)*r_prev(:, n-1, m) + &
                        & scal_w_n(n)*r_prev(:, n+1, m)
                    vu = ctheta * r_prev(:, n, m)
                    vw = scal_u_n(n)*vu - stheta*vv
                    r(:, n, m) = vw * scal_uvw_m(m)
                    tmp1 = tmp1 + src(ind+n)*r(1, n, m)
                    tmp2 = tmp2 + src(ind-n)*r(2, n, m)
                end do
                dst(ind+m) = alpha * tmp1
                dst(ind-m) = alpha * tmp2
            end do
        end do
    else
        stop "Not Implemented"
    end if
end subroutine fmm_sph_rotate_oxz_work