Source code for shenfun.legendre.dlt

import importlib
from copy import copy
from scipy.special import gammaln
import numpy as np
from numpy.polynomial import chebyshev as n_cheb
from mpi4py import MPI
from mpi4py_fft import fftw
from mpi4py_fft.fftw.utilities import FFTW_MEASURE, FFTW_PRESERVE_INPUT
from shenfun.optimization import runtimeoptimizer
from shenfun.optimization import cython
from shenfun.spectralbase import islicedict, slicedict

__all__ = ['DLT', 'leg2cheb', 'cheb2leg', 'Leg2chebHaleTownsend',
           'Leg2Cheb', 'Cheb2Leg', 'FMMLeg2Cheb', 'FMMCheb2Leg']

Leg2Cheb = getattr(cython, 'Leg2Cheb', None)
Cheb2Leg = getattr(cython, 'Cheb2Leg', None)
Lambda = getattr(cython, 'Lambda', None)


[docs]
class DLT:
    r"""Discrete Legendre Transform

    A class for performing fast FFT-based discrete Legendre transforms, both
    forwards and backwards. Based on::

        Nicholas Hale and Alex Townsend "A fast FFT-based discrete Legendre
        transform", IMA Journal of Numerical Analysis (2015)
        (https://arxiv.org/abs/1505.00354)

    Parameters
    ----------
    input_array : real or complex array
    s : sequence of ints, optional
        Not used - included for compatibility with Numpy
    axes : integer or 1-tuple of int, optional
        Axis over which to compute the DLT. Named axes for compatibility.
    threads : int, optional
        Number of threads used in computing DLT.
    kind : str, optional
        Either one of

            - 'forward'
            - 'backward'
            - 'scalar product'

        The scalar product is exactly like the forward transform, except that
        the Legendre mass matrix is not applied to the output.
    flags : sequence of ints, optional
        Flags from

            - FFTW_MEASURE
            - FFTW_EXHAUSTIVE
            - FFTW_PATIENT
            - FFTW_DESTROY_INPUT
            - FFTW_PRESERVE_INPUT
            - FFTW_UNALIGNED
            - FFTW_CONSERVE_MEMORY
            - FFTW_ESTIMATE
    output_array : complex array, optional
        Array to be used as output array. Must be of correct shape, type,
        strides and alignment

    Note
    ----
    The Legendre series expansion of :math:`f(x)` is

    .. math::

        f(x) = \sum_{k=0}^{N-1} \hat{f}_k L_k(x)

    The series evaluated at quadrature points :math:`\{x_j\}_{j=0}^{N-1}` gives the
    vector :math:`\{f(x_j)\}_{j=0}^{N-1}`. We define a forward transform as
    computing :math:`\{\hat{f}_k\}_{k=0}^{N-1}` from :math:`\{f(x_j)\}_{j=0}^{N-1}`.
    The other way around, a backward transform, computes :math:`\{f(x_j)\}_{j=0}^{N-1}`
    given :math:`\{\hat{f}_k\}_{k=0}^{N-1}`. This is in agreement with shenfun's
    definition of forward/backward directions, but it disagrees with the definitions
    used by Hale and Townsend.

    Also note that this `fast` transform is actually slower than the default recursive
    version for approximately :math:`N<1000`.

    Example
    -------
    >>> import numpy as np
    >>> from shenfun import legendre, FunctionSpace, Function, Array
    >>> N = 4
    >>> L = FunctionSpace(N, 'L')
    >>> u = Function(L)
    >>> u[1] = 1
    >>> c = Array(L)
    >>> c = L.backward(u, c, kind='fast')
    >>> print(c)
    [-0.86113631 -0.33998104  0.33998104  0.86113631]
    >>> u[:] = 1
    >>> np.alltrue(np.abs(u - u.backward().forward()) < 1e-12)
    True

    """
    def __init__(self, input_array, s=None, axes=(-1,), threads=1, kind='backward',
                 flags=(FFTW_MEASURE, FFTW_PRESERVE_INPUT), output_array=None):
        from . import fastgl
        if isinstance(axes, tuple):
            assert len(axes) == 1
            axis = axes[-1]
        elif isinstance(axes, int):
            axis = axes
        self.axis = axis
        assert kind in ('forward', 'scalar product', 'backward')
        self.kind = kind
        self.sl = slicedict(axis=axis, dimensions=input_array.ndim)
        self.si = islicedict(axis=axis, dimensions=input_array.ndim)
        N = self.N = input_array.shape[axis]
        xl, wl = fastgl.leggauss(N)
        xc = n_cheb.chebgauss(N)[0]
        thetal = np.arccos(xl)[::-1]
        thetac = np.arccos(xc)
        s = [None]*input_array.ndim
        s[axis] = slice(None)
        s = tuple(s) # broadcast to ndims
        self.dtheta = (thetal - thetac)[s]
        self.n = np.arange(N, dtype=float)[s]
        if kind in ('forward', 'scalar product'):
            self.wl = wl[s]
            self.nsign = np.ones(N)
            self.nsign[1::2] = -1
            if kind == 'forward': # apply inverse mass matrix as well
                self.nsign = self.nsign[s]*(self.n+0.5)
            else:
                self.nsign = self.nsign[s]
        ck = np.full(N, np.pi/2); ck[0] = np.pi
        self.ck = ck[s]
        U = input_array
        V = output_array if output_array is not None else U.copy()
        self.plan(U, V, kind, threads, flags)
        ##self.leg2chebclass = Leg2Cheb(U, axis=axis, maxs=100, use_direct=500)
        self.leg2chebclass = Leg2Cheb(U, domains=2, diagonals=16, axis=axis, maxs=100, use_direct=1000)


[docs]
    def plan(self, U, V, kind, threads, flags):
        Uc = U.copy()
        Vc = V.copy()
        # dct and dst use the same input/output arrays
        if kind in ('forward', 'scalar product'):
            self.dct = DCT(Uc, axis=self.axis, type=2, threads=threads, flags=flags, output_array=Vc)
            self.dst = DST(Uc, axis=self.axis, type=2, threads=threads, flags=flags, output_array=Vc)
        else:
            self.dct = DCT(Uc, axis=self.axis, type=3, threads=threads, flags=flags, output_array=Vc)
            self.dst = DST(Uc, axis=self.axis, type=3, threads=threads, flags=flags, output_array=Vc)
        self._input_array = U
        self._output_array = V


    @property
    def input_array(self):
        return self._input_array

    @property
    def output_array(self):
        return self._output_array


[docs]
    def __call__(self, input_array=None, output_array=None, **kw):
        if input_array is not None:
            self._input_array[:] = input_array

        # Set up x, which is input array to dct/dst
        # Some dcts apparently destroys the input_array, so need copy
        x = np.zeros_like(self.input_array)
        x[:] = self._input_array
        if self.kind in ('forward', 'scalar product'):
            x *= self.wl
        else:
            x = self.leg2chebclass(x.copy(), x)

        fk = self.dct(x).copy()
        nfac = 1
        y = 1
        n = 1
        converged = False
        while not converged:
            even = n % 2 == 0
            deven = (n+1)//2 % 2 == 0
            nfac *= n
            if self.kind == 'backward':
                x *= self.n
                y *= self.dtheta
            else:
                x *= self.dtheta
                y *= self.n
            sign = 1 if deven else -1
            fft = self.dct if even else self.dst
            h = fft(x)
            df = sign/nfac*y*h
            fk += df
            error = np.linalg.norm(df)
            #print(f"{n:4d} {error:2.4e}")
            converged = error < 1e-16
            n += 1

        if self.kind in ('forward', 'scalar product'):
            fk = self.leg2chebclass(fk.copy(), fk, transpose=True)
            fk *= self.nsign
        else:
            fk[:] = fk[self.sl[slice(-1, None, -1)]] # reverse
        if output_array is not None:
            output_array[:] = fk
            return output_array
        self._output_array[:] = fk
        return self._output_array



class DCT:
    """Discrete cosine transform with appropriate scaling

    input_array : array
    s : sequence of ints, optional
        Not used - included for compatibility with Numpy
    axes : sequence of ints, optional
        Axes over which to compute the real-to-real dct.
    type : int, optional
        Type of `dct <http://www.fftw.org/fftw3_doc/Real_002dto_002dReal-Transform-Kinds.html>`_

            - 1 - FFTW_REDFT00
            - 2 - FFTW_REDFT10,
            - 3 - FFTW_REDFT01,
            - 4 - FFTW_REDFT11
    threads : int, optional
        Number of threads used in computing dct.
    flags : sequence of ints, optional
        Flags from

            - FFTW_MEASURE
            - FFTW_EXHAUSTIVE
            - FFTW_PATIENT
            - FFTW_DESTROY_INPUT
            - FFTW_PRESERVE_INPUT
            - FFTW_UNALIGNED
            - FFTW_CONSERVE_MEMORY
            - FFTW_ESTIMATE
    output_array : array, optional
        Array to be used as output array. Must be of correct shape, type,
        strides and alignment

    """
    def __init__(self, input_array, type=3, s=None, axis=-1, threads=1,
                 flags=(FFTW_MEASURE,), output_array=None):
        self.axis = axis
        self.type = type
        self.dct = fftw.dctn(input_array, axes=(axis,), type=type, threads=threads,
                             flags=flags, output_array=output_array)
        s = [slice(None)]*input_array.ndim
        s[self.axis] = slice(0, 1)
        self.s = tuple(s)

    @property
    def input_array(self):
        return self.dct.input_array

    @property
    def output_array(self):
        return self.dct.output_array

    def __call__(self, input_array=None, output_array=None):
        if input_array is not None:
            self.input_array[:] = input_array
        out = self.dct()
        if self.type == 3:
            out += self.dct.input_array[self.s]
        out /= 2
        if output_array is not None:
            output_array[:] = out
            return output_array
        return out

class DST:
    """Discrete sine transform with appropriate scaling

    Parameters
    ----------
    input_array : array
    s : sequence of ints, optional
        Not used - included for compatibility with Numpy
    axes : sequence of ints, optional
        Axes over which to compute the real-to-real dst.
    type : int, optional
        Type of `dst <http://www.fftw.org/fftw3_doc/Real_002dto_002dReal-Transform-Kinds.html>`_

            - 1 - FFTW_RODFT00
            - 2 - FFTW_RODFT10
            - 3 - FFTW_RODFT01
            - 4 - FFTW_RODFT11
    threads : int, optional
        Number of threads used in computing dst.
    flags : sequence of ints, optional
        Flags from

            - FFTW_MEASURE
            - FFTW_EXHAUSTIVE
            - FFTW_PATIENT
            - FFTW_DESTROY_INPUT
            - FFTW_PRESERVE_INPUT
            - FFTW_UNALIGNED
            - FFTW_CONSERVE_MEMORY
            - FFTW_ESTIMATE
    output_array : array, optional
        Array to be used as output array. Must be of correct shape, type,
        strides and alignment

    """
    def __init__(self, input_array, type=3, s=None, axis=-1, threads=1,
                 flags=None, output_array=None):
        self.axis = axis
        self.type = type
        self.dst = fftw.dstn(input_array, axes=(axis,), type=type, threads=threads,
                             flags=flags, output_array=output_array)
        self.sl = slicedict(axis=axis, dimensions=input_array.ndim)
        self.si = islicedict(axis=axis, dimensions=input_array.ndim)

    @property
    def input_array(self):
        return self.dst.input_array

    @property
    def output_array(self):
        return self.dst.output_array

    def destroy(self):
        self.dst.destroy()

    def __call__(self, input_array=None, output_array=None):
        if input_array is not None:
            self.input_array[:] = input_array
        x = self.input_array
        if self.type == 3:
            x[self.sl[slice(0, -1)]] = x[self.sl[slice(1, None)]]
            x[self.si[-1]] = 0
            out = self.dst()
        elif self.type == 2:
            out = self.dst()
            out[self.sl[slice(1, None)]] = out[self.sl[slice(0, -1)]]
            out[self.si[0]] = 0
        out /= 2
        if output_array is not None:
            output_array[:] = out
            return output_array
        return out

#Lambda = lambda z: np.exp(gammaln(z+0.5) - gammaln(z+1))

LxyE = lambda x, y: -0.5/(2*x+2*y+1)/(y-x)*Lambda(y-x-1)*Lambda(y+x-0.5)
MxyE = lambda x, y: Lambda(y-x)*Lambda(y+x)
Mxy = lambda x, y: Lambda((y-x)/2)*Lambda((y+x)/2)
Lxy = lambda x, y: -1/(x+y+1)/(y-x)*Lambda((y-x-2)/2)*Lambda((y+x-1)/2)
Hxy = lambda x, y: Lambda((y-x)/2)*Lambda((y+x)/2)/np.sqrt((x+y)*(y-x))

@runtimeoptimizer
def leg2cheb(cl, cc=None, axis=0, transpose=False):
    r"""Compute Chebyshev coefficients from Legendre coefficients

    .. math::

            \hat{c}^{cheb} = M \hat{c}^{leg}

    where :math:`\hat{c}^{cheb} \in \mathbb{R}^N` are the Chebyshev
    coefficients, :math:`\hat{c}^{leg} \in \mathbb{R}^N` the Legendre
    coefficients and :math:`M\in\mathbb{R}^{N \times N}` the matrix for the
    conversion. Note that if keyword transpose is true, then we compute

    .. math::

        \hat{a} = M^T \hat{b}

    for some vectors :math:`\hat{a}` and :math:`\hat{b}`.

    Paramters
    ---------
    cl : array
        The Legendre coefficients (if transpose is False)
    cc : array (return array), optional
        The Chebyshev coefficients (if transpose is False)
    axis : int, optional
        The axis over which to take the transform
    transpose : bool
        Whether to perform the transpose operation

    Note
    ----
    This is a low-memory implementation of the direct matrix vector
    product. The matrix :math:`M` is given by Alpert and Rokhlin 'A Fast Algorithm
    for the Evaluation of Legendre Expansions', SIAM Journal on Scientific
    and Statistical Computing, 12, 1, 158-179, (1991), 10.1137/0912009.
    The matrix is not explicitely created.
    """
    if cc is None:
        cc = np.zeros_like(cl)
    else:
        cc.fill(0)

    if axis > 0:
        cl = np.moveaxis(cl, axis, 0)
        cc = np.moveaxis(cc, axis, 0)
    N = cl.shape[0]
    k = np.arange(N)
    a = Lambda(k)
    sl = [None]*cc.ndim
    ck = np.full(N, np.pi/2); ck[0] = np.pi
    sl[0] = slice(None)
    ck = ck[tuple(sl)]
    if transpose is False:
        for n in range(0, N, 2):
            sl[0] = slice(n//2, N-n//2)
            cc[:(N-n)] += a[n//2]*a[tuple(sl)]*cl[n:]
        cc /= ck

    else:
        cx = cl/ck
        for n in range(0, N, 2):
            sl[0] = slice(n//2, N-n//2)
            cc[n:] += a[n//2]*a[tuple(sl)]*cx[:(N-n)]

    if axis > 0:
        cl = np.moveaxis(cl, 0, axis)
        cc = np.moveaxis(cc, 0, axis)
    return cc

@runtimeoptimizer
def cheb2leg(cc, cl=None, axis=0):
    r"""Compute Legendre coefficients from Chebyshev coefficients

    .. math::

            \hat{c}^{leg} = L \hat{c}^{cheb}

    where :math:`\hat{c}^{cheb} \in \mathbb{R}^N` are the Chebyshev
    coefficients, :math:`\hat{c}^{leg} \in \mathbb{R}^N` the Legendre
    coefficients and :math:`L\in\mathbb{R}^{N \times N}` the matrix for the
    conversion.

    Paramters
    ---------
    cc : array
        The Chebyshev coefficients
    cl : array
        The Legendre coefficients (return array)
    axis : int, optional
        The axis over which to take the transform

    Note
    ----
    This is a low-memory implementation of the direct matrix vector
    product. The matrix :math:`L` is given by Alpert and Rokhlin 'A Fast Algorithm
    for the Evaluation of Legendre Expansions', SIAM Journal on Scientific
    and Statistical Computing, 12, 1, 158-179, (1991), 10.1137/0912009.
    The matrix is not explicitely created.
    """
    if cl is None:
        cl = np.zeros_like(cc)
    else:
        cl.fill(0)
    if axis > 0:
        cl = np.moveaxis(cl, axis, 0)
        cc = np.moveaxis(cc, axis, 0)
    N = cc.shape[0]
    k = np.arange(N, dtype=float)
    k[0] = 1
    vn = cc*k
    a = 1/(2*Lambda(k)*k*(k+0.5))
    k[0] = 0
    a[0] = 2/np.sqrt(np.pi)
    cl[:] = np.sqrt(np.pi)*a*vn
    for n in range(2, N, 2):
        dn = Lambda(np.array([(n-2)/2]))/n
        cl[:(N-n)] -= dn*a[n//2:(N-n//2)]*vn[n:]
    cl *= (k+0.5)
    if axis > 0:
        cl = np.moveaxis(cl, 0, axis)
        cc = np.moveaxis(cc, 0, axis)
    return cl


[docs]
class Leg2chebHaleTownsend:
    """Class for computing Chebyshev coefficients from Legendre coefficients

    Algorithm from::

        Nicholas Hale and Alex Townsend "A fast, simple and stable Chebyshev-
        Legendre transform using an asymptotic formula", SIAM J Sci Comput (2014)
        (https://epubs.siam.org/doi/pdf/10.1137/130932223)

    Parameters
    ----------
    input_array : array
        Legendre coefficients
    output_array : array
        The returned array
    axis : int
        The axis over which to perform the computation in case the input_array
        is multidimensional.
    nM : int
        Parameter, see Hale and Townsend (2014). Note that one must have N >> nM.
    Nmin : int
        Parameter. Choose direct matvec approach for N < Nmin
    """
    def __init__(self, input_array, output_array=None, axis=0, nM=50, Nmin=40000):
        self.axis = axis
        self.N = input_array.shape[axis]
        self.L = None
        self.T = None
        self.U = None
        self.a = None
        self.Nmin = Nmin
        self._input_array = input_array
        self._output_array = output_array if output_array is not None else input_array.copy()
        self.sl = slicedict(axis=axis, dimensions=input_array.ndim)
        if self.N > Nmin:
            from shenfun import config
            mod = config['optimization']['mode']
            self.lib = importlib.import_module('.'.join(('shenfun.optimization', mod, 'transforms')))
            N = self.N
            self.thetak = (np.arange(N)+0.5)*np.pi/N
            self.sintk = np.sin(self.thetak)
            alpha = self.alpha = min(1/np.log(N/nM), 0.5)
            if N < 1000:
                K = 1
            elif N < 10000:
                K = 2
            elif N < 1000000:
                K = 3
            else:
                K = 4
            self.K = K
            self.ix = {0: 0}
            for k in range(1, K+1):
                self.ix[k] = int((N + 1) / np.pi * np.arcsin(nM / N / alpha**k))

    @property
    def input_array(self):
        return self._input_array

    @property
    def output_array(self):
        return self._output_array

    def _Um(self, m):
        return np.sin((m+0.5)*(np.pi/2-self.thetak))/(2**(m+0.5)*self.sintk**(m-0.5))

    def _Vm(self, m):
        return np.cos((m+0.5)*(np.pi/2-self.thetak))/(2*self.sintk)**(m+0.5)

    @staticmethod
    def _Cn(n):
        return np.sqrt(4/np.pi)*np.exp(gammaln(n+1) - gammaln(n+3/2))


[docs]
    def plan(self, input_array):
        from shenfun import FunctionSpace
        assert input_array.shape[self.axis] == self.N
        if self.T is not None:
            assert self.T.N == self.N
            return

        self.L = FunctionSpace(self.N, 'L')
        self.U = FunctionSpace(self.N, 'U', quad='GC')
        self.T = FunctionSpace(self.N, 'C', quad='GC')
        self.a = self.L.get_recursion_matrix(self.N+3, self.N+3).diags('dia').data

        if input_array.ndim > 1:
            self.T.plan(input_array.shape, self.axis, input_array.dtype, {})
            self.U.plan(input_array.shape, self.axis, input_array.dtype, {})



[docs]
    def __call__(self, input_array=None, output_array=None, transpose=False):
        r"""Compute Chebyshev coefficients from Legendre. That is, compute

        .. math::

            \hat{c}^{cheb} = M \hat{c}^{leg}

        where :math:`\hat{c}^{cheb} \in \mathbb{R}^N` are the Chebyshev
        coefficients, :math:`\hat{c}^{leg} \in \mathbb{R}^N` the Legendre
        coefficients and :math:`M\in\mathbb{R}^{N \times N}` the matrix for the
        conversion. Note that if keyword 'transpose' is true, then we compute

        .. math::

            \hat{a} = M^T \hat{b}

        for some vectors :math:`\hat{a}` and :math:`\hat{b}`.

        The Chebyshev and Legendre coefficients are the regular coefficients to
        the series

        .. math::

            p_l(x) = \sum_{k=0}^{N} \hat{c}_k^{leg}L_k(x) \\
            p_c(x) = \sum_{k=0}^{N} \hat{c}_k^{cheb}T_k(x)

        and we get :math:`\{\hat{c}_k^{cheb}\}_{k=0}^N` by setting :math:`p_l(x)=p_c(x)`
        for :math:`x=\{x_i\}_{i=0}^N`, where :math:`x_i=\cos(i+0.5)\pi/N`.

        Parameters
        ----------
        input_array : array
        output_array : array
        transpose : bool
            Whether to compute the transpose of the regular transform

        Note
        ----
        For small N we use a direct method that costs approximately :math:`0.25 N^2`
        operations. For larger N (see 'Nmin' parameter) we use the fast routine of

            Hale and Townsend 'A fast, simple and stable Chebyshev-Legendre
            transform using an asymptotic formula', SIAM J Sci Comput (2014)

        """
        if input_array is not None:
            self._input_array[:] = input_array

        axis = self.axis
        N = self.N
        if N <= self.Nmin:
            out = leg2cheb(self.input_array, self.output_array, axis=axis, transpose=transpose)
            if output_array is not None:
                output_array[:] = out
                return output_array
            return out

        self.plan(self.input_array)
        sn = [None]*self.input_array.ndim
        sn[axis] = slice(None); sn = tuple(sn)
        hmn = np.ones(self.N)
        Tc = np.zeros_like(self.input_array)
        Uc = np.zeros_like(self.input_array)
        z = np.zeros_like(self.input_array)
        cn = self._Cn(np.arange(N))[sn]
        xi, wi = self.T.points_and_weights()

        if transpose is False:
            cn = self.input_array*cn
            na = np.arange(N)
            for m in range(10):
                if m > 0:
                    hmn *= (m-0.5)**2/(m*(na+m+0.5))
                cm = cn*hmn[sn]
                um = self._Um(m)[sn]
                vm = self._Vm(m)[sn]
                for k in range(1, self.K+1):
                    Tc[:] = 0
                    Uc[:] = 0
                    si = self.sl[slice(self.ix[k], N-self.ix[k])]
                    sk = self.sl[slice(int(self.alpha**k*N), int(self.alpha**(k-1)*N))]
                    Tc[sk] = cm[sk]
                    Uc[self.sl[slice(0, -1)]] = Tc[self.sl[slice(1, None)]]
                    z[si] += (vm*self.T.backward(Tc) + um*self.U.backward(Uc))[si]
            si = self.sl[slice(0, self.ix[1])]
            zx = np.zeros_like(z[si])
            self.lib.evaluate_expansion_all(self.input_array, zx, xi[si[axis]], self.axis, self.a) # recursive eval
            z[si] += zx
            si = self.sl[slice(N-self.ix[1], None)]
            self.lib.evaluate_expansion_all(self.input_array, zx, xi[si[axis]], self.axis, self.a)
            z[si] += zx
            for k in range(1, self.K):
                si = self.sl[slice(self.ix[k], self.ix[k+1])]
                sk = self.sl[slice(0, int(self.alpha**k*self.N))]
                zx = np.zeros_like(z[si])
                self.lib.evaluate_expansion_all(self.input_array[sk], zx, xi[si[axis]], axis, self.a)
                z[si] += zx
                si = self.sl[slice(self.N-self.ix[k+1], self.N-self.ix[k])]
                self.lib.evaluate_expansion_all(self.input_array[sk], zx, xi[si[axis]], axis, self.a)
                z[si] += zx
            si = self.sl[slice(self.ix[self.K], self.N-self.ix[self.K])]
            sk = self.sl[slice(0, int(self.alpha**(self.K)*self.N))]
            zx = np.zeros_like(z[si])
            self.lib.evaluate_expansion_all(self.input_array[sk], zx, xi[si[axis]], axis, self.a)
            z[si] += zx
            self._output_array = self.T.forward(z, self._output_array)

        else: # transpose
            ck = np.full(N, np.pi/2); ck[0] = np.pi
            ctilde = self.T.backward(self.input_array/ck[sn]).copy() # TN^{-T} * cl
            wu = self.U.points_and_weights()[1]
            U1 = np.zeros_like(self.input_array)
            for m in range(10):
                if m > 0:
                    hmn *= (m-0.5)**2/(m*(np.arange(N)+m+0.5))
                um = self._Um(m)[sn]
                vm = self._Vm(m)[sn]
                for k in range(1, self.K+1):
                    Tc[:] = 0
                    Uc[:] = 0
                    si = self.sl[slice(self.ix[k], self.N-self.ix[k])]
                    sk = self.sl[slice(int(self.alpha**k*self.N), int(self.alpha**(k-1)*self.N))]
                    Tc[si] = ctilde[si]
                    T0 = self.T.scalar_product(Tc*vm)
                    U0 = self.U.scalar_product(Tc/wu[sn]*wi[sn]*um)
                    U1[self.sl[slice(1, None)]] = U0[self.sl[slice(0, -1)]]
                    z[sk] += (cn*hmn[sn]*(T0+U1))[sk]
            sk = self.sl[slice(0, int(self.alpha**self.K*self.N))]
            zx = np.zeros_like(z[sk])
            z[sk] += restricted_product(self.L, wi[sn]*ctilde, zx, xi, 0, self.N, 0, axis, self.a)
            for k in range(self.K):
                sk = self.sl[slice(int(self.alpha**(k+1)*self.N), int(self.alpha**k*self.N))]
                zx = np.zeros_like(z[sk])
                z[sk] += restricted_product(self.L, wi[sn]*ctilde, zx, xi, 0, self.ix[k+1], sk[axis].start, axis, self.a)
                z[sk] += restricted_product(self.L, wi[sn]*ctilde, zx, xi, N-self.ix[k+1], N, sk[axis].start, axis, self.a)
            self._output_array[:] = z

        if output_array is not None:
            output_array[:] = self._output_array
            return output_array
        return self._output_array



@runtimeoptimizer
def restricted_product(L, input_array, output_array, xi, i0, i1, a0, axis, a):
    r"""Returns the restricted matrix vector product

    .. math::

        \sum_{i=i_0}^{i_1} \sum_{k=k_0}^{k_1} \hat{u}_k P_k(x_i)

    where the matrix :math:`P_k(x_i)` is the :math:`k`'th basis function of a
    family of orthogonal polynomials, at points :math:`\{x_i\}_{i_0}^{i_1}`.
    The array :math:`\boldsymbol{\hat{u}}` can be multidimensional, in which
    case the product is applied along the indicated axis.

    Parameters
    ----------
    L : instance of :class:`.SpectralBase`
    input_array : array
    output_array : array
    xi : Quadrature points for space L
    i0, i1 : integers
        Start and stop indices for xi
    a0 : int
        Start index for basis functions :math:`P_k`
    axis : int
        The axis of :math:`\hat{u}_k` to take the restricted product over in
        case :math:`\boldsymbol{\hat{u}}` is multidimensional
    a : array
        Recurrence coefficients for the orthogonal polynomials

    """
    N = xi.shape[0]
    Lnm = L.evaluate_basis(xi[i0:i1], i=a0)
    Ln = L.evaluate_basis(xi[i0:i1], i=a0+1)
    if a.shape[0] == 3:
        anm = a[0] # a_{n-1, n}
        ann = a[1] # a_{n, n}
        anp = a[2] # a_{n+1, n}
    else:
        anm = a[0]
        anp = a[1]
        ann = np.zeros(N+2)
    Lnp = (xi[i0:i1]-ann[1+a0])/anm[1+a0]*Ln - anp[1+a0]/anm[1+a0]*Lnm
    for k in range(len(output_array)):
        kp = k+a0
        s1 = 1/anm[kp+2]
        s2 = anp[kp+2]/anm[kp+2]
        a00 = ann[kp+2]
        s = 0.0
        for i in range(len(Ln)):
            s += Lnm[i]*input_array[i0+i]
            Lnm[i] = Ln[i]
            Ln[i] = Lnp[i]
            Lnp[i] = s1*(xi[i0+i]-a00)*Ln[i] - s2*Lnm[i]
        output_array[k] = s
    return output_array

def getChebyshev(level, D, s, diags, A, N, l2c=True):
    """Low-rank computation of Chebyshev coefficients

    Computes Chebyshev coefficients for all submatrices on a given
    level of a hierarchical decomposition.

    Parameters
    ----------
    level : int
        The level in the hierarchical decomposition
    D : int
        Domains size on level
    s : int
        Size of the smallest submatrices
    diags : int
        The number of neglected diagonals, that are treated using a
        direct approach
    A : list
        Each item is a flattened low-rank matrix of coefficients
    N : list
        Each item is the shape of the corresponding matrix item in A
    l2c : bool
        If True, the transform goes from Legendre to Chebyshev, and
        vice versa if False
    """
    from shenfun import FunctionSpace, TensorProductSpace
    h = s*get_h(level, D)
    i0, j0 = get_ij(level, 0, s, D, diags)
    Nb = get_number_of_blocks(level, D)
    T0 = FunctionSpace(100, 'C', domain=[j0+2*h, j0+4*h])
    fun = Mxy if l2c == True else Lxy
    w0 = fun(2*h-1, T0.mesh())
    m0 = T0.forward(w0)
    z = np.where(np.diff(abs(m0)) > 0)[0]
    Nx = 100 if len(z) < 2 else z[1]
    T0.domain = [j0+4*h, j0+6*h]
    w0 = fun(2*h-1, T0.mesh())
    m0 = T0.forward(w0)
    z = np.where(np.diff(abs(m0)) > 0)[0]
    Nx2 = 100 if len(z) < 2 else z[1]
    Nx2 = min(Nx2, Nx)
    #Nx, Nx2 = 50, 50
    T1 = FunctionSpace(Nx, 'C')
    S = TensorProductSpace(MPI.COMM_SELF, (T1, T1))
    f = np.zeros((Nx, Nx))
    dctn = fftw.dctn(np.zeros((Nx, Nx)), axes=(0, 1))
    xj = np.cos((np.arange(Nx)+0.5)*np.pi/Nx)
    xj2 = np.cos((np.arange(Nx2)+0.5)*np.pi/(Nx2))
    dctn2 = fftw.dctn(np.zeros((Nx2, Nx2)), axes=(0, 1))
    X = np.zeros((Nx, 1))
    Y = np.zeros((1, Nx))
    X2 = np.zeros((Nx2, 1))
    Y2 = np.zeros((1, Nx2))
    Mmax = 0
    for k in range(Nb):
        i0, j0 = get_ij(level, k, s, D, diags)
        for j in range(D[level]):
            S.bases[1].domain = 2*(j0+(j+1)*h), 2*(j0+(j+2)*h)
            for i in range(j+1):
                S.bases[0].domain = 2*(i0+i*h), 2*(i0+(i+1)*h)
                f[:] = 0
                if j > i:
                    Y2[0, :] = S.bases[1].map_true_domain(xj2)
                    X2[:, 0] = S.bases[0].map_true_domain(xj2)
                    f[:Nx2, :Nx2] = dctn2(fun(X2, Y2))/Nx2**2
                else:
                    Y[0, :] = S.bases[1].map_true_domain(xj)
                    X[:, 0] = S.bases[0].map_true_domain(xj)
                    f[:] = dctn(fun(X, Y))/Nx**2
                f[0] /= 2
                f[:, 0] /= 2
                z = np.where(np.diff(abs(f[0, 2:])) >= 0)[0]
                #z = np.where(abs(f[0]) < 1e-15)[0]
                Mmin = z[0]+2 if len(z) > 1 else Nx
                #Mmin = Nx
                A.append(f[:Mmin, :Mmin].ravel().copy())
                N.append(Mmin)
                Mmax = max(Mmax, Mmin)
    S.destroy()
    return Mmax

class FMMLevel:
    """Abstract base class for hierarchical matrix
    """
    def __init__(self, N, domains=None, levels=None, l2c=True, maxs=100, use_direct=-1):
        self.N = N
        self.use_direct = use_direct
        self._output_array = np.array([0])
        if N <= use_direct:
            self.Nn = N
            return

        if domains is not None:
            if isinstance(domains, int):
                if levels is None:
                    doms = np.cumsum(domains**np.arange(16))
                    levels = np.where(N/(2*(1+doms)) <= maxs)[0][0]
                levels = max(1, levels)
                self.D = np.full(levels, domains, dtype=int)
            else:
                domains = np.atleast_1d(domains)
                levels = len(domains)
                self.D = domains
        else:
            if levels is None:
                domains = 2
                doms = np.cumsum(domains**np.arange(20))
                levels = np.where(N/(2*(1+doms)) <= maxs)[0][0]
                levels = max(1, levels)
            else:
                for domains in range(2, 100):
                    Nb = np.cumsum(domains**np.arange(levels+1))[levels]
                    s = N//(2*(1+Nb))
                    if s <= maxs:
                        break
            self.D = np.full(levels, domains, dtype=int)

        self.L = max(1, levels)
        Nb = get_number_of_blocks(self.L, self.D)
        #self.diags = diagonals
        self.axis = 0
        # Create new (even) N with minimal size according to diags and N
        # Pad input array with zeros
        #s, rest = divmod(N//2-diagonals, Nb)
        s = np.ceil(N/(2*(1+Nb))).astype(int)
        Nn = 2*s*(1+Nb)
        #Nn = N
        #if rest > 0 or Nn%2 == 1:
        #    s += 1
        #    Nn = 2*(diagonals+s*Nb)

        self.Nn = Nn
        self.s = s
        self.diags = s
        fk = []
        Nk = []
        self.Mmin = np.zeros(self.L, dtype=int)
        for level in range(self.L-1, -1, -1):
            Nx = getChebyshev(level, self.D, s, s, fk, Nk, l2c)
            self.Mmin[level] = Nx
        self.fk = np.hstack(fk)
        self.Nk = np.array(Nk, dtype=int)
        Mmax = self.Mmin.max()
        TT = {}
        for level in range(self.L-1, -1, -1):
            if self.D[level] not in TT:
                TT[self.D[level]] = conversionmatrix(self.D[level], Mmax)
        self.Th = np.concatenate([TT[d] for d in np.unique(self.D)])
        self.ThT = np.array([copy(self.Th[i].transpose()) for i in range(self.Th.shape[0])])
        self.T = np.zeros((2, s, self.Mmin[-1]))
        Ti = n_cheb.chebvander(np.linspace(-1, 1, 2*s+1)[:-1], self.Mmin[-1]-1)
        self.T[0] = Ti[::2]
        self.T[1] = Ti[1::2]

    def get_M_shapes(self):
        M = []
        for level in range(self.L):
            Nb = get_number_of_blocks(level, self.D)
            M.append([np.sum((self.Nk[level][1:])**2), Nb*self.D[level]*(self.D[level]+1)//2])
        return M

    def plan(self, shape, dtype, axis, output_array=None, use_direct=-1):
        if self._output_array.shape == shape and axis == self.axis and self._output_array.dtype == dtype:
            return
        self.axis = axis
        self.sl = slicedict(axis=axis, dimensions=len(shape))
        self.si = islicedict(axis=axis, dimensions=len(shape))
        if shape[axis] > use_direct:
            oddevenshape = list(shape)
            oddevenshape[axis] = self.Nn//2
            self.cont_input_array = np.zeros(oddevenshape, dtype=dtype)
            self.cont_output_array = np.zeros(oddevenshape, dtype=dtype)
        self._output_array = np.zeros(shape, dtype=dtype) if output_array is None else output_array

    def plotrank(self):
        import matplotlib.pyplot as plt
        z = np.zeros((self.Nn, self.Nn))
        ik = 0
        for level in range(self.L-1, -1, -1):
            for block in range(get_number_of_blocks(level, self.D)):
                h = 2 * self.s * get_h(level, self.D)
                i0, j0 = get_ij(level, block, self.s, self.D, self.diags)
                for j in range(self.D[level]):
                    for i in range(j + 1):
                        z[2*i0+i*h:2*i0+(i+1)*h, 2*j0+(j+1)*h:2*j0+(j+2)*h] = self.Nk[ik]
                        ik += 1
        plt.figure()
        plt.imshow(z, cmap='gray')
        plt.title('Submatrix rank')
        plt.colorbar()
        plt.show()

def get_ij(level, block, s, D, diagonals):
    i0 = block*D[level]*s*get_h(level, D)
    j0 = diagonals+i0
    for i in range(level+1, len(D)):
        j0 += s*get_h(i, D)
    return (i0, j0)

def get_h(level, D):
    #return (self.domains-1)**(self.L-1-level) # const D
    return np.prod(D[level+1:])

def get_number_of_blocks(level, D):
    #return np.sum(D[0]**np.arange(level+1))   # const D
    return 1+np.sum(np.cumprod(np.flip(D[:level])))

def get_number_of_submatrices(D):
    Ns = 0
    for level in range(len(D)):
        Ns += get_number_of_blocks(level, D)*D[level]*(D[level]+1)//2
    return Ns


[docs]
class FMMLeg2Cheb(FMMLevel):
    """Transform Legendre coefficients to Chebyshev coefficients

    Parameters
    ----------
    input : int or input array
        The length of the array to transform or the array itself
    diagonals : int
        The number of neglected diagonals, that are treated using a
        direct approach
    domains : None, int or sequence of ints
        The domain sizes for all levels

        If domains=None then an appropriate domain size is computed
        according to the given number of levels and maxs. If the
        number of levels is also None, then domains is set to 2
        and the number of levels is computed from maxs.

        If domains is an integer, then this integer is used for each
        level, and the number of levels is either given or computed
        according to maxs

        If domains is a sequence of integers, then these are the
        domain sizes for all the levels and the length of this
        sequence is the number of levels.

    levels : None or int
        The number of levels in the hierarchical matrix

        If levels is None, then it is computed according to domains
        and maxs

    l2c : bool
        If True, the transform goes from Legendre to Chebyshev, and
        vice versa if False

    maxs : int
        The maximum size of the smallest submatrices (on the highest
        level). This number is used if the number of levels or domain
        sizes need to be computed.

    axis : int
        The axis over which to apply the transform if the input array
        is a multidimensional array.

    use_direct : int
        Use direct method if N is smaller than this number

    """
    def __init__(self, input, output_array=None, domains=None, levels=None, maxs=100, axis=0, use_direct=-1):
        if isinstance(input, int):
            N = input
            shape = (N,)
            dtype = float
        elif isinstance(input, np.ndarray):
            N = input.shape[axis]
            shape = input.shape
            dtype = input.dtype
        FMMLevel.__init__(self, N, domains=domains, levels=levels, maxs=maxs, use_direct=use_direct)
        self.a = Lambda(np.arange(self.Nn, dtype=float))
        self.plan(shape, dtype, axis, output_array, use_direct)


[docs]
    def __call__(self, input_array, output_array=None, transpose=False):
        """Execute transform

        Parameters
        ----------
        input_array : array
            The array to be transformed
        output_array : None or array
            The return array. Will be created if None.
        transpose : bool
            If True, then apply the transpose operation :math:`A^Tu`
            instead of the default :math:`Au`, where :math:`A` is the
            matrix and :math:`u` is the array of Legendre coefficients.

        """
        self.plan(input_array.shape, input_array.dtype, self.axis, output_array, self.use_direct)
        if input_array.shape[self.axis] <= self.use_direct and input_array.ndim == 1:
            self._output_array[...] = 0
            _leg2cheb(input_array, self._output_array, self.a, transpose)
            if output_array is not None:
                output_array[...] = self._output_array
                return output_array
            return self._output_array
        if transpose is True:
            input_array = input_array*2/np.pi # makes copy (preserve input)
            input_array[self.si[0]] *= 0.5
        if input_array.shape[self.axis] <= self.use_direct:
            self._output_array.fill(0)
            FMMdirect1(input_array, self._output_array, self.axis, self.a, self.N//2, transpose)
        else:
            self.apply(input_array, self._output_array, transpose)
        if transpose is False:
            self._output_array *= (2/np.pi)
            self._output_array[self.si[0]] *= 0.5
        return self._output_array



[docs]
    def apply(self, input_array, output_array, transpose):
        FMMcheb(input_array, output_array, self.axis, self.Nn, self.fk, self.Nk, self.T, self.Th, self.ThT, self.D, self.Mmin, self.s, self.diags, transpose)
        FMMdirect1(input_array, output_array, self.axis, self.a, self.diags, transpose)
        FMMdirect2(input_array, output_array, self.axis, self.a, 2*self.s, get_number_of_blocks(self.L, self.D), 2*self.diags, transpose)





[docs]
class FMMCheb2Leg(FMMLevel):
    """Transform Chebyshev coefficients to Legendre coefficients

    Parameters
    ----------
    input : int or input array
        The length of the array to transform or the array itself
    diagonals : int
        The number of neglected diagonals, that are treated using a
        direct approach
    domains : None, int or sequence of ints
        The domain sizes for all levels

        If domains=None then an appropriate domain size is computed
        according to the given number of levels and maxs. If the
        number of levels is also None, then domains is set to 2
        and the number of levels is computed from maxs.

        If domains is an integer, then this integer is used for each
        level, and the number of levels is either given or computed
        according to maxs

        If domains is a sequence of integers, then these are the
        domain sizes for all the levels and the length of this
        sequence is the number of levels.

    levels : None or int
        The number of levels in the hierarchical matrix

        If levels is None, then it is computed according to domains
        and maxs

    l2c : bool
        If True, the transform goes from Legendre to Chebyshev, and
        vice versa if False

    maxs : int
        The maximum size of the smallest submatrices (on the highest
        level). This number is used if the number of levels or domain
        sizes need to be computed.

    axis : int
        The axis over which to apply the transform if the input array
        is a multidimensional array.

    use_direct : int
        Use direct method if N is smaller than this number
    """
    def __init__(self, input, output_array=None, diagonals=16, domains=None, levels=None, maxs=100, axis=0, use_direct=-1):
        if isinstance(input, int):
            N = input
            shape = (N,)
            dtype = float
        elif isinstance(input, np.ndarray):
            N = input.shape[axis]
            shape = input.shape
            dtype = input.dtype
        FMMLevel.__init__(self, N, domains=domains,
                          diagonals=diagonals, levels=levels, maxs=maxs, l2c=False, use_direct=use_direct)
        k = np.arange(self.Nn, dtype='d')
        k[0] = 1
        self.dn = Lambda((k[::2]-2)/2)/k[::2]
        self.a = 1/(2*Lambda(k)*k*(k+0.5))
        self.a[0] = 2/np.sqrt(np.pi)
        self.plan(shape, dtype, axis, output_array, use_direct)


[docs]
    def __call__(self, input_array, output_array=None):
        """Execute transform

        Parameters
        ----------
        input_array : array
            The array to be transformed
        output_array : None or array
            The return array. Will be created if None.

        """
        assert isinstance(input_array, np.ndarray)
        self.plan(input_array.shape, input_array.dtype, self.axis, output_array, self.use_direct)
        if input_array.shape[self.axis] <= self.use_direct and input_array.ndim == 1:
            self._output_array[...] = 0
            _cheb2leg(input_array, self._output_array, self.dn, self.a)
            if output_array is not None:
                output_array[...] = self._output_array
                return output_array
            return self._output_array

        self.sl = sl = slicedict(axis=self.axis, dimensions=input_array.ndim)
        si = [None]*input_array.ndim
        si[self.axis] = slice(None)
        w0 = input_array.copy()
        w0[sl[slice(1, self.N)]] *= np.arange(1, self.N)[tuple(si)]

        if input_array.shape[self.axis] <= self.use_direct:
            self._output_array.fill(0)
            FMMdirect4(w0, self._output_array, self.axis, self.dn[:self.N//2], self.a[:self.N], self.N//2)
        else:
            self.apply(w0, self._output_array)

        self._output_array *= (np.arange(self.N)+0.5)[tuple(si)]
        if output_array is not None:
            output_array[...] = self._output_array
            return output_array
        return self._output_array



[docs]
    def apply(self, input_array, output_array):
        FMMcheb(input_array, output_array, self.axis, self.Nn, self.fk, self.Nk, self.T, self.Th, self.ThT, self.D, self.Mmin, self.s, self.diags, False)
        FMMdirect4(input_array, output_array, self.axis, self.dn[:self.N//2], self.a[:self.N], self.diags)
        FMMdirect3(input_array, output_array, self.axis, self.dn, self.a, 2*self.s, get_number_of_blocks(self.L, self.D), 2*self.diags)



@runtimeoptimizer
def _cheb2leg(u, v, dn, a):
    pass

@runtimeoptimizer
def _leg2cheb(u, v, a, trans):
    pass

@runtimeoptimizer
def FMMdirect1(u, v, axis, a, n0, trans):
    N = u.shape[0]
    if trans is False:
        for n in range(0, n0):
            v[:(N-2*n)] += a[n]*a[n:(N-n)]*u[2*n:]
    else:
        for n in range(0, n0):
            v[2*n:] += a[n]*a[n:(N-n)]*u[:(N-2*n)]

@runtimeoptimizer
def FMMdirect2(u, v, axis, a, h, Nb, n0, trans):
    N = v.shape[0]
    for k in range(Nb):
        i0 = k*h
        j0 = n0+i0
        for n in range(0, h, 2):
            a0 = i0+(n0+n)//2
            Nm = min(N-(j0+n), h-n)
            if trans is True:
                v[j0+n:(j0+Nm)] += a[(n+n0)//2]*a[a0:a0+Nm]*u[i0:i0+Nm]
            else:
                v[i0:(i0+Nm)] += a[(n+n0)//2]*a[a0:a0+Nm]*u[j0+n:j0+n+Nm]

#from shenfun import optimization
#FMMdirect2 = optimization.numba.transforms.FMMdirect2
#FMMdirect3 = optimization.numba.transforms.FMMdirect3

@runtimeoptimizer
def FMMdirect3(u, v, axis, dn, a, h, Nb, n0):
    N = v.shape[0]
    for k in range(Nb):
        i0 = k*h
        j0 = n0+i0
        for n in range(0, h, 2):
            a0 = i0+(n0+n)//2
            Nm = min(N-(j0+n), h-n)
            v[i0:(i0+Nm)] -= dn[(n+n0)//2]*a[a0:a0+Nm]*u[j0+n:j0+n+Nm]

@runtimeoptimizer
def FMMdirect4(u, v, axis, dn, a, n0):
    N = u.shape[0]
    v[:] += np.sqrt(np.pi)*a*u
    for n in range(1, n0):
        v[:(N-2*n)] -= dn[n]*a[n:(N-n)]*u[2*n:]

@runtimeoptimizer
def FMMcheb(input_array, output_array, axis, Nn, fk, Nk, T, Th, ThT, D, Mmin, s, diags, transpose):
    assert input_array.ndim == 1, 'Use Cython for multidimensional'
    L = len(D)
    Mmax = max(Mmin)
    N = input_array.shape[0]
    cia = np.zeros(Nn//2, dtype=input_array.dtype)
    coa = np.zeros(Nn//2, dtype=input_array.dtype)
    uD = np.hstack((0, np.unique(D)))
    output_array[:] = 0
    wk = [None]*L
    ck = [None]*L
    for level in range(L):
        wk[level] = np.zeros((get_number_of_blocks(level, D), D[level], Mmax))
        ck[level] = np.zeros((get_number_of_blocks(level, D), D[level], Mmax))
    for sigma in (0, 1):
        cia[:] = 0
        coa[:] = 0
        cia[:N//2+(N%2)*(1-sigma)] = input_array[sigma::2]
        if sigma == 1:
            for level in range(L):
                wk[level][:] = 0
                ck[level][:] = 0

        ik = 0
        Nc = 0
        if transpose is False:
            for level in range(L-1, -1, -1):
                h = s*get_h(level, D)
                M0 = Mmin[level]
                s0 = np.searchsorted(uD, D[level])-1
                s1 = np.cumsum(uD)[s0]
                TT = Th[s1:s1+D[level]]
                #if level == L-1:
                #    wk[level][...] = np.dot(cia[diags+s:].reshape((D[L-1]*get_number_of_blocks(L-1, D), s)), T[sigma]).reshape((get_number_of_blocks(L-1, D), D[L-1], Mmax))

                for block in range(get_number_of_blocks(level, D)):
                    i0, j0 = get_ij(level, block, s, D, diags)
                    b0, q0 = divmod(block-1, D[level-1])
                    for q in range(D[level]):
                        if level == L-1:
                            wk[level][block, q, :M0] = np.dot(cia[j0+(q+1)*h:j0+(q+2)*h], T[sigma, :, :M0])
                        if level > 0 and block > 0:
                            wk[level-1][b0, q0, :M0] += np.dot(TT[q, :M0, :M0], wk[level][block, q, :M0])
                        for p in range(q+1):
                            M = Nk[ik]
                            ck[level][block, p, :M] += np.dot(fk[Nc:(Nc+M*M)].reshape(M, M), wk[level][block, q, :M])
                            Nc += M*M
                            ik += 1

            for level in range(L):
                M0 = Mmin[level]
                if level < L-1:
                    s0 = np.searchsorted(uD, D[level+1])-1
                    s1 = np.cumsum(uD)[s0]
                    TT = ThT[s1:s1+D[level+1]]
                    for block in range(get_number_of_blocks(level+1, D)-1):
                        b0, p0 = divmod(block, D[level])
                        for p in range(D[level+1]):
                            ck[level+1][block, p, :M0] += np.dot(TT[p, :M0, :M0], ck[level][b0, p0, :M0])

                else:
                    for block in range(get_number_of_blocks(level, D)):
                        i0, j0 = get_ij(level, block, s, D, diags)
                        for p in range(D[level]):
                            coa[i0+p*s:i0+(p+1)*s] = np.dot(T[sigma], ck[level][block, p])

        else:
            for level in range(L-1, -1, -1):
                h = s*get_h(level, D)
                M0 = Mmin[level]
                s0 = np.searchsorted(uD, D[level])-1
                s1 = np.cumsum(uD)[s0]

                for block in range(get_number_of_blocks(level, D)):
                    j0, i0 = get_ij(level, block, s, D, diags)
                    b0, q0 = divmod(block, D[level-1])
                    TT = Th[s1:s1+D[level]]
                    for p in range(D[level]):
                        for q in range(p+1):
                            if q == p:
                                if level == L-1:
                                    wk[level][block, q, :M0] = np.dot(cia[j0+q*h:j0+(q+1)*h], T[sigma, :, :M0])
                                if level > 0 and block < get_number_of_blocks(level, D)-1:
                                    wk[level-1][b0, q0, :M0] += np.dot(TT[q, :M0, :M0], wk[level][block, q, :M0])
                            M = Nk[ik]
                            ck[level][block, p, :M] += np.dot(wk[level][block, q, :M], fk[Nc:(Nc+M*M)].reshape(M, M))
                            Nc += M*M
                            ik += 1

            for level in range(L):
                M0 = Mmin[level]
                if level < L-1:
                    s0 = np.searchsorted(uD, D[level+1])-1
                    s1 = np.cumsum(uD)[s0]
                    TT = Th[s1:s1+D[level+1]]
                    for block in range(1, get_number_of_blocks(level+1, D)):
                        for p in range(D[level+1]):
                            b0, p0 = divmod(block-1, D[level])
                            ck[level+1][block, p, :M0] += np.dot(ck[level][b0, p0, :M0], TT[p, :M0, :M0])

                else:
                    for block in range(get_number_of_blocks(level, D)):
                        j0, i0 = get_ij(level, block, s, D, diags)
                        for p in range(D[level]):
                            coa[i0+(p+1)*s:i0+(p+2)*s] = np.dot(T[sigma], ck[level][block, p])

        output_array[sigma::2] = coa[:N//2+(N%2)*(1-sigma)]

def conversionmatrix(D : int, M : int) -> np.ndarray:
    from mpi4py_fft.fftw import dctn
    T = np.zeros((D, M, M))
    k = np.arange(M)
    X = np.cos((k+0.5)*np.pi/M)[None, :]
    dct = dctn(np.zeros((M, M)), axes=(1,))
    for q in range(D):
        T[q] = dct(np.cos(k[:, None]*np.arccos((X+1+2*q-D)/D)))/M
        T[q, :, 0] /= 2
    return T
Source code for shenfun.legendre.dlt

shenfun

Navigation

Related Topics