Skip to content

Filters

Filters for data analysis, e.g. foreground filtering.

LSQfitting

Source code in fastbox/filters.py
class LSQfitting(object):

    def __init__(self, box):
        """Perform a least-squares model fit on a data cube.

        Uses a synchrotron-like power law model for the component to be removed.

        Parameters:
            box (CosmoBox):
                Object containing a simulation box.
        """
        self.box = box


    def resid_synch(self, params, freqs, data, **kwargs):
        """Synchrotron model residuals."""
        freqS = kwargs['freqS']
        noise = kwargs['noise']
        betaS = params['betaS']
        ampS = params['ampS']

        x_ghz = np.array(freqs)
        tot = ampS * (x_ghz / freqS) ** (betaS)
        weights = 1./noise**2
        return weights * (tot - data)


    def do_loop(self, ii, bits, data, noise, freqs, bsval, syamp, ffamp, mod, 
                bidea, freeind, queue):
        """TODO"""
        nfreqs = freqs.size
        star = bits[ii]
        enl = bits[ii+1]

        for xno in range(star, enl):

            tval = data[xno, :]
            noval = noise[xno, :]
            bgu = bidea[xno]

            kwsdict = {'noise':noval, 'freqS':freqs[0]}
            params = Parameters()
            params.add('betaS', value=bgu, min=bgu*1.1, max=bgu*0.9)
            params.add('ampS', value=tval[0]*0.9, min=tval[0]*0.5, max=tval[0]*1.5)

            resultpre = Minimizer(self.resid_synch, params, fcn_args=(freqs, tval), fcn_kws=kwsdict)
            result = resultpre.minimize('least_sqaures')
            val_dic = result.params

            bsval[xno] = np.array(val_dic["betaS"])

            #getting amps from fitted specs
            specs = np.zeros((nfreqs, 2))
            specs[:, 0] = (freqs / freqs[0]) ** np.array(val_dic["betaS"])
            specs[:, 1] = (freqs / freqs[0]) ** np.array(freeind)

            num = np.dot(specs.T, tval)
            denom = np.linalg.inv(np.dot(specs.T, specs))
            amps = np.dot(num, denom)

            syamp[xno] = amps[0]
            ffamp[xno] = amps[1]

            mod[xno,:] = np.dot(amps, specs.T)

        queue.put([bsval[star:enl], syamp[star:enl], ffamp[star:enl], 
                   mod[star:enl, :], star, enl])


    def run_fit(self, psm, maps, freqs, numpix, tpsmean, freeind):
        """Perform a fit to the data.

        Parameters:
            psm (PlanckSkyModel instance):
                Instance of the PlanckSkyModel from the ``fastbox.foregrounds`` 
                module.

            maps (array_like):
                Data cube.

            freqs (array_like):
                Frequencies.
        """
        nfreqs = freqs.size

        # Noise maps; assumes noise is at the level of free-free emission
        _, free_amp, _ = psm.synch_freefree_maps(ref_freq=900., free_idx=freeind)
        sigma = np.std(free_amp)
        sigmas = sigma * (freqs/900.)**(freeind)
        noise = np.array([np.random.normal(loc=0.0, scale=sigmas[i], size=numpix) 
                          for i in range(nfreqs)])

        # Subtract mean point source temp. from data
        data = maps.reshape(numpix, nfreqs)- tpsmean.reshape(nfreqs, 1).T

        # Set initial parameter values in each angular pixel
        bsval = np.zeros((numpix))
        syamp = np.zeros((numpix))
        ffamp = np.zeros((numpix))
        mod = np.zeros(( numpix, nfreqs))

        bput = np.log(data[:,3] / data[:,0]) / np.log(freqs[3] / freqs[0])

        # Create parallel jobs
        queue = Queue()
        bits = np.linspace(0, numpix, 8).astype('int')
        processes = [Process(target=self.do_loop, 
                             args=(intv, bits, data, noise.T, freqs, bsval, \
                                   syamp, ffamp, mod, bput, freeind, queue)) 
                     for intv in range(8-1)]

        # Start processes
        for p in processes:
            p.start()
        for p in processes:
            result = queue.get()
            syamp[result[4]:result[5]] = result[1]
            bsval[result[4]:result[5]] = result[0]
            ffamp[result[4]:result[5]] = result[2]
            mod[result[4]:result[5], :] = result[3]
        for p in processes:
            p.join()

        # Clean up and return residual
        del queue, p, result
        return data - mod, bsval


    def give_hest(self, T_obs, freeind, psaveind, flux_cutoff, indspread, redshift=None):
        """
        Fit point source model to data cube.
        """
        # Frequencies and angular coordinates
        freqs = self.box.freq_array(redshift=redshift) # MHz
        ang_x, ang_y = self.box.pixel_array(redshift=redshift)
        xside = ang_x.size
        yside = ang_y.size

        # Build model of the mean point source temperature vs frequency
        psmodel = PointSourceModel(self.box)
        _, tpsmean = psmodel.construct_cube(flux_cutoff=flux_cutoff, 
                                            beta=psaveind, 
                                            delta_beta=freeind)

        # Run fit
        res, spec = self.run_fit(T_obs, freqs, xside*yside, tpsmean, freeind)
        residual = res.reshape(freqs.size, xside, yside)
        bspec = spec.reshape(xside, yside)

        return residual, bspec

__init__(self, box) special

Perform a least-squares model fit on a data cube.

Uses a synchrotron-like power law model for the component to be removed.

Parameters:

Name Type Description Default
box CosmoBox

Object containing a simulation box.

required
Source code in fastbox/filters.py
def __init__(self, box):
    """Perform a least-squares model fit on a data cube.

    Uses a synchrotron-like power law model for the component to be removed.

    Parameters:
        box (CosmoBox):
            Object containing a simulation box.
    """
    self.box = box

do_loop(self, ii, bits, data, noise, freqs, bsval, syamp, ffamp, mod, bidea, freeind, queue)

TODO

Source code in fastbox/filters.py
def do_loop(self, ii, bits, data, noise, freqs, bsval, syamp, ffamp, mod, 
            bidea, freeind, queue):
    """TODO"""
    nfreqs = freqs.size
    star = bits[ii]
    enl = bits[ii+1]

    for xno in range(star, enl):

        tval = data[xno, :]
        noval = noise[xno, :]
        bgu = bidea[xno]

        kwsdict = {'noise':noval, 'freqS':freqs[0]}
        params = Parameters()
        params.add('betaS', value=bgu, min=bgu*1.1, max=bgu*0.9)
        params.add('ampS', value=tval[0]*0.9, min=tval[0]*0.5, max=tval[0]*1.5)

        resultpre = Minimizer(self.resid_synch, params, fcn_args=(freqs, tval), fcn_kws=kwsdict)
        result = resultpre.minimize('least_sqaures')
        val_dic = result.params

        bsval[xno] = np.array(val_dic["betaS"])

        #getting amps from fitted specs
        specs = np.zeros((nfreqs, 2))
        specs[:, 0] = (freqs / freqs[0]) ** np.array(val_dic["betaS"])
        specs[:, 1] = (freqs / freqs[0]) ** np.array(freeind)

        num = np.dot(specs.T, tval)
        denom = np.linalg.inv(np.dot(specs.T, specs))
        amps = np.dot(num, denom)

        syamp[xno] = amps[0]
        ffamp[xno] = amps[1]

        mod[xno,:] = np.dot(amps, specs.T)

    queue.put([bsval[star:enl], syamp[star:enl], ffamp[star:enl], 
               mod[star:enl, :], star, enl])

give_hest(self, T_obs, freeind, psaveind, flux_cutoff, indspread, redshift=None)

Fit point source model to data cube.

Source code in fastbox/filters.py
def give_hest(self, T_obs, freeind, psaveind, flux_cutoff, indspread, redshift=None):
    """
    Fit point source model to data cube.
    """
    # Frequencies and angular coordinates
    freqs = self.box.freq_array(redshift=redshift) # MHz
    ang_x, ang_y = self.box.pixel_array(redshift=redshift)
    xside = ang_x.size
    yside = ang_y.size

    # Build model of the mean point source temperature vs frequency
    psmodel = PointSourceModel(self.box)
    _, tpsmean = psmodel.construct_cube(flux_cutoff=flux_cutoff, 
                                        beta=psaveind, 
                                        delta_beta=freeind)

    # Run fit
    res, spec = self.run_fit(T_obs, freqs, xside*yside, tpsmean, freeind)
    residual = res.reshape(freqs.size, xside, yside)
    bspec = spec.reshape(xside, yside)

    return residual, bspec

resid_synch(self, params, freqs, data, **kwargs)

Synchrotron model residuals.

Source code in fastbox/filters.py
def resid_synch(self, params, freqs, data, **kwargs):
    """Synchrotron model residuals."""
    freqS = kwargs['freqS']
    noise = kwargs['noise']
    betaS = params['betaS']
    ampS = params['ampS']

    x_ghz = np.array(freqs)
    tot = ampS * (x_ghz / freqS) ** (betaS)
    weights = 1./noise**2
    return weights * (tot - data)

run_fit(self, psm, maps, freqs, numpix, tpsmean, freeind)

Perform a fit to the data.

Parameters:

Name Type Description Default
psm PlanckSkyModel instance

Instance of the PlanckSkyModel from the fastbox.foregrounds module.

required
maps array_like

Data cube.

required
freqs array_like

Frequencies.

required
Source code in fastbox/filters.py
def run_fit(self, psm, maps, freqs, numpix, tpsmean, freeind):
    """Perform a fit to the data.

    Parameters:
        psm (PlanckSkyModel instance):
            Instance of the PlanckSkyModel from the ``fastbox.foregrounds`` 
            module.

        maps (array_like):
            Data cube.

        freqs (array_like):
            Frequencies.
    """
    nfreqs = freqs.size

    # Noise maps; assumes noise is at the level of free-free emission
    _, free_amp, _ = psm.synch_freefree_maps(ref_freq=900., free_idx=freeind)
    sigma = np.std(free_amp)
    sigmas = sigma * (freqs/900.)**(freeind)
    noise = np.array([np.random.normal(loc=0.0, scale=sigmas[i], size=numpix) 
                      for i in range(nfreqs)])

    # Subtract mean point source temp. from data
    data = maps.reshape(numpix, nfreqs)- tpsmean.reshape(nfreqs, 1).T

    # Set initial parameter values in each angular pixel
    bsval = np.zeros((numpix))
    syamp = np.zeros((numpix))
    ffamp = np.zeros((numpix))
    mod = np.zeros(( numpix, nfreqs))

    bput = np.log(data[:,3] / data[:,0]) / np.log(freqs[3] / freqs[0])

    # Create parallel jobs
    queue = Queue()
    bits = np.linspace(0, numpix, 8).astype('int')
    processes = [Process(target=self.do_loop, 
                         args=(intv, bits, data, noise.T, freqs, bsval, \
                               syamp, ffamp, mod, bput, freeind, queue)) 
                 for intv in range(8-1)]

    # Start processes
    for p in processes:
        p.start()
    for p in processes:
        result = queue.get()
        syamp[result[4]:result[5]] = result[1]
        bsval[result[4]:result[5]] = result[0]
        ffamp[result[4]:result[5]] = result[2]
        mod[result[4]:result[5], :] = result[3]
    for p in processes:
        p.join()

    # Clean up and return residual
    del queue, p, result
    return data - mod, bsval

angular_bandpass_filter(field, kmin, kmax, d=1.0)

Apply a top-hat bandpass filter to the field in each frequency channel (i.e. each 2D slice) of a datacube.

Parameters:

Name Type Description Default
field array_like

3D array containing the field that the filter will be applied to. NOTE: This assumes that the 3rd axis of the array is frequency.

required
kmin, kmax (array_like

The Fourier wavenumbers defining the edges of the bandpass filter. The units are defined by fft.fftfreq. The bandpass filter is applied to the magnitude of the 2D wavevector, k_perp = sqrt(k_x^2 + k_y^2).

required
d float

The pixel width parameter used by fft.fftfreq.

1.0

Returns:

Type Description
filtered_field (array_like)

3D array containing the field with the angular bandpass filter applied.

Source code in fastbox/filters.py
def angular_bandpass_filter(field, kmin, kmax, d=1.):
    """
    Apply a top-hat bandpass filter to the field in each frequency channel 
    (i.e. each 2D slice) of a datacube.

    Parameters:
        field (array_like):
            3D array containing the field that the filter will be applied to. 
            NOTE: This assumes that the 3rd axis of the array is frequency.

        kmin, kmax (array_like):
            The Fourier wavenumbers defining the edges of the bandpass filter. The 
            units are defined by ``fft.fftfreq``. The bandpass filter is applied to 
            the magnitude of the 2D wavevector, k_perp = sqrt(k_x^2 + k_y^2).

        d (float, optional):
            The pixel width parameter used by ``fft.fftfreq``.

    Returns:
        filtered_field (array_like):
            3D array containing the field with the angular bandpass filter applied.
    """
    # 2D FFT of field in transverse direction only
    field_k = np.fft.fftn(field, axes=[0,1])

    # Get frequencies
    kx = fft.fftfreq(field.shape[0], d=d)
    kx, ky = np.meshgrid(kx, kx)
    k = np.sqrt(kx**2. + ky**2.)

    # Filter frequencies that are out of range
    field_k[~np.logical_and(k >= kmin, k < kmax)] *= 0.
    return np.fft.ifftn(field_k, axes=[0,1])

bandpower_pca_filter(field, nbands, modes)

Generate a series of bandpass-filtered datacubes and then PCA filter each of them. The number of modes subtracted can be chosen separately for each sub-band.

N.B. The bandpass filters are contiguous top-hat filters of equal width in Fourier space.

Parameters:

Name Type Description Default
field array_like

3D array containing the field that the filter will be applied to. NOTE: This assumes that the 3rd axis of the array is frequency.

required
nbands int

How many sub-bands to divide the band into.

required
nmodes array_like or int

Number of eigenmodes to filter out in each sub-band. This can be an array with a value per sub-band, or a single integer for all bands.

required

Returns:

Type Description
cleaned_field (array_like)

Foreground-cleaned field.

Source code in fastbox/filters.py
def bandpower_pca_filter(field, nbands, modes):
    """
    Generate a series of bandpass-filtered datacubes and then PCA filter each 
    of them. The number of modes subtracted can be chosen separately for each 
    sub-band.

    N.B. The bandpass filters are contiguous top-hat filters of equal width in 
    Fourier space.

    Parameters:
        field (array_like):
            3D array containing the field that the filter will be applied to. 
            NOTE: This assumes that the 3rd axis of the array is frequency.

        nbands (int):
            How many sub-bands to divide the band into.

        nmodes (array_like or int):
            Number of eigenmodes to filter out in each sub-band.
            This can be an array with a value per sub-band, 
            or a single integer for all bands.

    Returns:
        cleaned_field (array_like):
            Foreground-cleaned field.
    """
    # Expand modes array if needed
    if isinstance(modes, (int, np.integer)):
        modes = modes * np.ones(nbands, dtype=int)

    # Check for correct number of modes/bin edges
    assert nbands == len(modes), \
        "len(modes) must equal nbands"

    # Get min/max frequencies and use to define the sub-bands
    kx = fft.fftfreq(field.shape[0], d=1.)
    kx, ky = np.meshgrid(kx, kx)
    k = np.sqrt(kx**2. + ky**2.)
    band_edges = np.linspace(np.min(k), np.max(k), nbands+1)

    # Mean-subtracted field
    x = mean_spectrum_filter(field)

    # Loop over bands
    bpf_cleaned = 0
    for i in range(len(band_edges)-1):
        # Apply bandpass filter
        bpf_cube = angular_bandpass_filter(x, 
                                           kmin=band_edges[i], 
                                           kmax=band_edges[i+1])

        # Apply PCA cleaning
        _bpf_cleaned = fastbox.filters.pca_filter(bpf_cube, 
                                                  nmodes=modes[i], 
                                                  return_filter=False)
        bpf_cleaned += _bpf_cleaned
    return bpf_cleaned

gpr_filter(field, kernels=None, return_filter=False, opt_messages=True, opt_num_restarts=10)

Fit the data with a Gaussian Process Regression model in the frequency direction. The hyperparameters of the kernel(s) are optimised automatically.

See https://github.com/paulassoares/gpr4im and arXiv:2105.12665 for a more sophisticated implementation.

Parameters:

Name Type Description Default
field array_like

3D array containing the field that the filter will be applied to. NOTE: This assumes that the 3rd axis of the array is frequency.

required
kernel list of ``GPy.kern``

List of kernel objects. If not specified, the sum of an RBF and Exponential kernel will be used. Note that the foreground component is assumed to be the first in the list.

NOTE: For each kernel k passed-in manually, make sure to call the k.variance.constrain_bounded and k.lengthscale.constrain_bounded methods to set the prior ranges of the hyperparameters. By default these are set to reasonable ranges for the input signal, assuming spectrally-smooth foregrounds dominate the variance, and a non-smooth, low-variance signal.

NOTE: The length scales of the kernel are normalised so that the full frequency range is on the unit interval, [0, 1].

required
return_filter bool

Whether to also return the GPR filter object.

False
opt_messages bool

Whether to print status messages as the optimiser runs.

True
opt_num_restarts int

Number of optimiser restarts to allow.

10

Returns:

Type Description
cleaned_field (array_like), gpr (GPy.models.GPRegression object)

Foreground-cleaned field and GPR handler object.

  • cleaned_field (array_like): Foreground-cleaned field.

  • gpr (GPy.models.GPRegression object): GPy GPR handler object.

Source code in fastbox/filters.py
def gpr_filter(field, kernels=None, return_filter=False, opt_messages=True, 
               opt_num_restarts=10):
    """
    Fit the data with a Gaussian Process Regression model in the frequency 
    direction. The hyperparameters of the kernel(s) are optimised automatically.

    See ``https://github.com/paulassoares/gpr4im`` and arXiv:2105.12665 for a 
    more sophisticated implementation.

    Parameters:
        field (array_like):
            3D array containing the field that the filter will be applied to. 
            NOTE: This assumes that the 3rd axis of the array is frequency.

        kernel (list of ``GPy.kern``):
            List of kernel objects. If not specified, the sum of an RBF and 
            Exponential kernel will be used. Note that the foreground component is 
            assumed to be the first in the list.

            NOTE: For each kernel ``k`` passed-in manually, make sure to call the 
            ``k.variance.constrain_bounded`` and ``k.lengthscale.constrain_bounded`` 
            methods to set the prior ranges of the hyperparameters. By default 
            these are set to reasonable ranges for the input signal, assuming 
            spectrally-smooth foregrounds dominate the variance, and a non-smooth, 
            low-variance signal.

            NOTE: The length scales of the kernel are normalised so that the full 
            frequency range is on the unit interval, [0, 1].

        return_filter (bool, optional):
            Whether to also return the GPR filter object.

        opt_messages (bool, optional):
            Whether to print status messages as the optimiser runs.

        opt_num_restarts (int, optional):
            Number of optimiser restarts to allow.

    Returns:
        cleaned_field (array_like), gpr (GPy.models.GPRegression object): 
            Foreground-cleaned field and GPR handler object.

        - ``cleaned_field (array_like)``:
            Foreground-cleaned field.

        - ``gpr (GPy.models.GPRegression object)``:
            GPy GPR handler object.
    """
    # Check that GPy is available
    try:
        GPy.__version__
    except:
        raise ImportError("The GPy module must be installed to use this function.")

    # Reshape and mean-centre the input data
    x = mean_spectrum_filter(field).reshape((-1, field.shape[-1])).T

    # Frequency on the unit interval
    Nfreq, Npix = x.shape
    nu = np.linspace(0., 1., Nfreq)

    # Construct total kernel
    if kernels is None:

        # Foreground kernel
        kernel_fg = GPy.kern.RBF(input_dim=1, variance=1., lengthscale=0.1)
        var = np.var(x)
        kernel_fg.variance.constrain_bounded(1e-4*var, 1e2*var) # most of variance is FGs
        kernel_fg.lengthscale.constrain_bounded(1e-3, 1e2) # O(1) lengthscale

        # 21cm signal kernel
        kernel_signal = GPy.kern.Exponential(1)
        kernel_signal.variance.constrain_bounded(1e-14*var, 1e-4*var) # small variance
        kernel_signal.lengthscale.constrain_bounded(1e-6, 1e-3) # small lengthscale

        # Combine kernels
        kernel = kernel_fg + kernel_signal
    else:
        kernel = 0
        for k in kernels:
            kernel += k

    # Construct GPR object
    model = GPy.models.GPRegression(X=nu[:,np.newaxis], Y=x, kernel=kernel)

    # Optimise the hyperparameters
    model.optimize(messages=opt_messages)
    model.optimize_restarts(num_restarts=opt_num_restarts)

    # Get foreground component and predict values at each datapoint
    kernel_fg = model.kern.parts[0]
    x_fg, cov_fg = model.predict(nu[:,np.newaxis], full_cov=True, 
                                   kern=kernel_fg, include_likelihood=False)

    # Construct residual
    x_clean = (x - x_fg).T.reshape(field.shape)

    # Return residual (and optionally GPR object)
    if return_filter:
        return x_clean, model
    else:
        return x_clean

ica_filter(field, nmodes, return_filter=False, **kwargs_ica)

Apply an Independent Component Analysis (ICA) filter to a field. This subtracts off functions in the frequency direction that correspond to the highest SNR statistically independent modes of the empirical frequency-frequency covariance.

Uses sklearn.decomposition.FastICA. For more details, see: https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.FastICA.html

Parameters:

Name Type Description Default
field array_like

3D array containing the field that the filter will be applied to. NOTE: This assumes that the 3rd axis of the array is frequency.

required
nmodes int

Number of eigenmodes to filter out.

required
return_filter bool

Whether to also return the linear FG filter operator and coefficients.

False
**kwargs_ica dict

Keyword arguments for the sklearn.decomposition.FastICA

{}

Returns:

Type Description
cleaned_field (array_like), transformer (sklearn.decomposition.FastICA instance, optional)

Foreground-filtered field and ICA filter object.

  • cleaned_field (array_like): Foreground-cleaned field.

  • transformer (sklearn.decomposition.FastICA instance, optional): Contains the ICA filter. Only returned if return_operator = True. To get the foreground model, you can do the following: x = field - mean_field # shape (Npix, Nfreq) x_trans = transformer.fit_transform(x.T) # mode amplitudes per pixel x_fg = transformer.inverse_transform(x_trans).T # foreground model

Source code in fastbox/filters.py
def ica_filter(field, nmodes, return_filter=False, **kwargs_ica):
    """
    Apply an Independent Component Analysis (ICA) filter to a field. This 
    subtracts off functions in the frequency direction that correspond to the 
    highest SNR *statistically independent* modes of the empirical 
    frequency-frequency covariance.

    Uses `sklearn.decomposition.FastICA`. For more details, see:
    https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.FastICA.html

    Parameters:
        field (array_like):
            3D array containing the field that the filter will be applied to. 
            NOTE: This assumes that the 3rd axis of the array is frequency.

        nmodes (int):
            Number of eigenmodes to filter out.

        return_filter (bool, optional):
            Whether to also return the linear FG filter operator and coefficients. 

        **kwargs_ica (dict, optional):
            Keyword arguments for the `sklearn.decomposition.FastICA`

    Returns:
        cleaned_field (array_like), transformer (sklearn.decomposition.FastICA instance, optional): Foreground-filtered field and ICA filter object.

        - ``cleaned_field (array_like)``:
            Foreground-cleaned field.

        - ``transformer (sklearn.decomposition.FastICA instance, optional)``:
            Contains the ICA filter. Only returned if `return_operator = True`. 
            To get the foreground model, you can do the following: 
                ```
                x = field - mean_field # shape (Npix, Nfreq)
                x_trans = transformer.fit_transform(x.T) # mode amplitudes per pixel
                x_fg = transformer.inverse_transform(x_trans).T # foreground model
                ```
    """
    # Subtract mean vs. frequency
    x = mean_spectrum_filter(field).reshape((-1, field.shape[-1])).T

    # Build ICA model and get amplitudes for each mode per pixel
    transformer = FastICA(n_components=nmodes, **kwargs_ica)
    x_trans = transformer.fit_transform(x.T)

    # Construct foreground operator
    x_fg = transformer.inverse_transform(x_trans).T

    # Subtract foreground operator
    x_clean = (x - x_fg).T.reshape(field.shape)

    # Return FG-subtracted data (and, optionally, the ICA filter instance)
    if return_filter:
        return x_clean, transformer
    else:
        return x_clean

kernel_pca_filter(field, nmodes, return_filter=False, **kwargs_pca)

Apply a Kernel Principal Component Analysis (KPCA) filter to a field. This subtracts off functions in the frequency direction that correspond to the highest SNR modes of the empirical frequency-frequency covariance, with some non-linear weighting by a specified kernel.

(WARNING: Can use a lot of memory)

Uses sklearn.decomposition.KernelPCA. For more details, see: https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.KernelPCA.html

Parameters:

Name Type Description Default
field array_like

3D array containing the field that the filter will be applied to. NOTE: This assumes that the 3rd axis of the array is frequency.

required
nmodes int

Number of eigenmodes to filter out (modes are ordered by SNR).

required
return_filter bool

Whether to also return the linear FG filter operator and coefficients.

False
**kwargs_pca dict

Keyword arguments for the sklearn.decomposition.KernelPCA

{}

Returns:

Type Description
cleaned_field (array_like), transformer (sklearn.decomposition.KernelPCA instance, optional)

Foreground-filtered field and KPCA filter object.

  • cleaned_field (array_like): Foreground-cleaned field.

  • transformer sklearn.decomposition.KernelPCA instance, optional): Contains the KPCA filter. Only returned if return_operator = True. To get the foreground model, you can do the following: x = field - mean_field # shape (Npix, Nfreq) x_trans = transformer.fit_transform(x.T) # mode amplitudes per pixel x_fg = transformer.inverse_transform(x_trans).T # foreground model

Source code in fastbox/filters.py
def kernel_pca_filter(field, nmodes, return_filter=False, **kwargs_pca):
    """
    Apply a Kernel Principal Component Analysis (KPCA) filter to a field. This 
    subtracts off functions in the frequency direction that correspond to the 
    highest SNR modes of the empirical frequency-frequency covariance, with 
    some non-linear weighting by a specified kernel.

    (WARNING: Can use a lot of memory)

    Uses `sklearn.decomposition.KernelPCA`. For more details, see:
    https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.KernelPCA.html

    Parameters:
        field (array_like):
            3D array containing the field that the filter will be applied to. 
            NOTE: This assumes that the 3rd axis of the array is frequency.

        nmodes (int):
            Number of eigenmodes to filter out (modes are ordered by SNR).

        return_filter (bool, optional):
            Whether to also return the linear FG filter operator and coefficients. 

        **kwargs_pca (dict, optional):
            Keyword arguments for the `sklearn.decomposition.KernelPCA`

    Returns:
        cleaned_field (array_like), transformer (sklearn.decomposition.KernelPCA instance, optional): Foreground-filtered field and KPCA filter object.

        - ``cleaned_field (array_like)``:
            Foreground-cleaned field.

        - ``transformer sklearn.decomposition.KernelPCA instance, optional)``:
            Contains the KPCA filter. Only returned if `return_operator = True`. 
            To get the foreground model, you can do the following: 
                ```
                x = field - mean_field # shape (Npix, Nfreq)
                x_trans = transformer.fit_transform(x.T) # mode amplitudes per pixel
                x_fg = transformer.inverse_transform(x_trans).T # foreground model
                ```
    """
    # Subtract mean vs. frequency
    x = mean_spectrum_filter(field).reshape((-1, field.shape[-1])).T

    # Build PCA model and get amplitudes for each mode per pixel
    transformer = KernelPCA(n_components=nmodes, fit_inverse_transform=True, **kwargs_pca)
    x_trans = transformer.fit_transform(x.T)

    # Manually perform inverse transform, using the remaining eigenmode with 
    # the smallest eigenvalue
    X = transformer.alphas_[:,-1:] * np.sqrt(transformer.lambdas_[-1:]) # = x_trans
    K = transformer._get_kernel(X, transformer.X_transformed_fit_[:,-1:])
    n_samples = transformer.X_transformed_fit_.shape[0]
    K.flat[::n_samples + 1] += transformer.alpha
    x_clean = np.dot(K, transformer.dual_coef_).reshape(field.shape)

    # Return FG-subtracted data (and, optionally, the PCA filter instance)
    if return_filter:
        return x_clean, transformer
    else:
        return x_clean

kernel_pca_filter_legacy(field, nmodes, return_filter=False, **kwargs_pca)

Apply a Kernel Principal Component Analysis (KPCA) filter to a field. This subtracts off functions in the frequency direction that correspond to the highest SNR modes of the empirical frequency-frequency covariance, with some non-linear weighting by a specified kernel.

NOTE: The sklearn KernelPCA function changed behaviour sometime after v.0.22, so this function doesn't seem to work very well any more.

(WARNING: Can use a lot of memory)

Uses sklearn.decomposition.KernelPCA. For more details, see: https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.KernelPCA.html

Parameters:

Name Type Description Default
field array_like

3D array containing the field that the filter will be applied to. NOTE: This assumes that the 3rd axis of the array is frequency.

required
nmodes int

Number of eigenmodes to filter out (modes are ordered by SNR).

required
return_filter bool

Whether to also return the linear FG filter operator and coefficients.

False
**kwargs_pca dict

Keyword arguments for the sklearn.decomposition.KernelPCA

{}

Returns:

Type Description
cleaned_field (array_like), transformer (sklearn.decomposition.KernelPCA instance, optional)

Foreground-filtered field and KPCA filter object.

  • cleaned_field (array_like): Foreground-cleaned field.

  • transformer (sklearn.decomposition.KernelPCA instance, optional): Contains the KPCA filter. Only returned if return_operator = True. To get the foreground model, you can do the following: x = field - mean_field # shape (Npix, Nfreq) x_trans = transformer.fit_transform(x.T) # mode amplitudes per pixel x_fg = transformer.inverse_transform(x_trans).T # foreground model

Source code in fastbox/filters.py
def kernel_pca_filter_legacy(field, nmodes, return_filter=False, **kwargs_pca):
    """
    Apply a Kernel Principal Component Analysis (KPCA) filter to a field. This 
    subtracts off functions in the frequency direction that correspond to the 
    highest SNR modes of the empirical frequency-frequency covariance, with 
    some non-linear weighting by a specified kernel.

    NOTE: The sklearn KernelPCA function changed behaviour sometime after 
    v.0.22, so this function doesn't seem to work very well any more.

    (WARNING: Can use a lot of memory)

    Uses `sklearn.decomposition.KernelPCA`. For more details, see:
    https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.KernelPCA.html

    Parameters:
        field (array_like):
            3D array containing the field that the filter will be applied to. 
            NOTE: This assumes that the 3rd axis of the array is frequency.

        nmodes (int):
            Number of eigenmodes to filter out (modes are ordered by SNR).

        return_filter (bool, optional):
            Whether to also return the linear FG filter operator and coefficients. 

        **kwargs_pca (dict, optional):
            Keyword arguments for the `sklearn.decomposition.KernelPCA`

    Returns:
        cleaned_field (array_like), transformer (sklearn.decomposition.KernelPCA instance, optional): Foreground-filtered field and KPCA filter object.

        - ``cleaned_field (array_like)``:
            Foreground-cleaned field.

        - ``transformer (sklearn.decomposition.KernelPCA instance, optional)``:
            Contains the KPCA filter. Only returned if `return_operator = True`. 
            To get the foreground model, you can do the following: 
                ```
                x = field - mean_field # shape (Npix, Nfreq)
                x_trans = transformer.fit_transform(x.T) # mode amplitudes per pixel
                x_fg = transformer.inverse_transform(x_trans).T # foreground model
                ```
    """
    # Subtract mean vs. frequency
    x = mean_spectrum_filter(field).reshape((-1, field.shape[-1])).T

    # Build PCA model and get amplitudes for each mode per pixel
    transformer = KernelPCA(n_components=nmodes, fit_inverse_transform=True, **kwargs_pca)
    x_trans = transformer.fit_transform(x.T)

    # Construct foreground operator
    x_fg = transformer.inverse_transform(x_trans).T

    # Subtract foreground operator
    x_clean = (x - x_fg).T.reshape(field.shape)

    # Return FG-subtracted data (and, optionally, the PCA filter instance)
    if return_filter:
        return x_clean, transformer
    else:
        return x_clean

mean_spectrum_filter(field)

Subtract the mean from each frequency slice.

Parameters:

Name Type Description Default
field array_like

3D array containing the field that the filter will be applied to. NOTE: This assumes that the 3rd axis of the array is frequency.

required

Returns:

Type Description
sub_field (array_like)

3D array containing the field with the mean at each frequency subtracted.

Source code in fastbox/filters.py
def mean_spectrum_filter(field):
    """
    Subtract the mean from each frequency slice.

    Parameters:
        field (array_like):
            3D array containing the field that the filter will be applied to. 
            NOTE: This assumes that the 3rd axis of the array is frequency.

    Returns:
        sub_field (array_like):
            3D array containing the field with the mean at each frequency 
            subtracted.
    """
    # Calculate freq-freq covariance matrix
    d = field.reshape((-1, field.shape[-1])) # (Nxpix * Nypix, Nfreqs)

    # Calculate average spectrum (avg. over pixels, as a function of frequency)
    d_mean = np.mean(d, axis=0)[np.newaxis,:]
    x = d - d_mean # mean-subtracted data
    return x.reshape(field.shape)

nmf_filter(field, nmodes, return_filter=False, **kwargs_nmf)

Apply a Non-Negative Matrix Factorisation (NMF) filter to a field. This finds two non-negative matrices whose product approximates the (strictly non-negative) input signal.

Uses sklearn.decomposition.NMF. For more details, see: https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html

Parameters:

Name Type Description Default
field array_like

3D array containing the field that the filter will be applied to. NOTE: This assumes that the 3rd axis of the array is frequency.

required
nmodes int

Number of eigenmodes to filter out.

required
return_filter bool

Whether to also return the linear FG filter operator and coefficients.

False
**kwargs_nmf dict

Keyword arguments for the sklearn.decomposition.NMF

{}

Returns:

Type Description
cleaned_field (array_like), transformer (sklearn.decomposition.NMF instance, optional)

Foreground-filtered field and NMF filter object.

  • cleaned_field (array_like): Foreground-cleaned field.

  • transformer (sklearn.decomposition.NMF instance, optional): Contains the NMF filter. Only returned if return_operator = True. To get the foreground model, you can do the following: x = field - mean_field # shape (Npix, Nfreq) x_trans = transformer.fit_transform(x.T) # mode amplitudes per pixel x_fg = transformer.inverse_transform(x_trans).T # foreground model

Source code in fastbox/filters.py
def nmf_filter(field, nmodes, return_filter=False, **kwargs_nmf):
    """
    Apply a Non-Negative Matrix Factorisation (NMF) filter to a field. This 
    finds two non-negative matrices whose product approximates the (strictly 
    non-negative) input signal. 

    Uses `sklearn.decomposition.NMF`. For more details, see:
    https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html

    Parameters:
        field (array_like):
            3D array containing the field that the filter will be applied to. 
            NOTE: This assumes that the 3rd axis of the array is frequency.

        nmodes (int):
            Number of eigenmodes to filter out.

        return_filter (bool, optional):
            Whether to also return the linear FG filter operator and coefficients. 

        **kwargs_nmf (dict, optional):
            Keyword arguments for the `sklearn.decomposition.NMF`

    Returns:
        cleaned_field (array_like), transformer (sklearn.decomposition.NMF instance, optional): Foreground-filtered field and NMF filter object.

        - ``cleaned_field (array_like)``:
            Foreground-cleaned field.

        - ``transformer (sklearn.decomposition.NMF instance, optional)``:
            Contains the NMF filter. Only returned if `return_operator = True`. 
            To get the foreground model, you can do the following: 
                ```
                x = field - mean_field # shape (Npix, Nfreq)
                x_trans = transformer.fit_transform(x.T) # mode amplitudes per pixel
                x_fg = transformer.inverse_transform(x_trans).T # foreground model
                ```
    """
    # Calculate freq-freq covariance matrix
    d = field.reshape((-1, field.shape[-1])).T # (Nfreqs, Nxpix * Nypix)

    # Calculate average spectrum (avg. over pixels, as a function of frequency)
    d_mean = np.mean(d, axis=-1)[:,np.newaxis]
    x = d

    # Build NMF model and get amplitudes for each mode per pixel
    transformer = NMF(n_components=nmodes, **kwargs_nmf)
    x_trans = transformer.fit_transform(x.T)

    # Construct foreground operator
    x_fg = transformer.inverse_transform(x_trans).T

    # Subtract foreground operator
    x_clean = (x - x_fg).T.reshape(field.shape)

    # Return FG-subtracted data (and, optionally, the NMF filter instance)
    if return_filter:
        return x_clean, transformer
    else:
        return x_clean

pca_filter(field, nmodes, fit_powerlaw=False, return_filter=False)

Apply a Principal Component Analysis (PCA) filter to a field. This subtracts off functions in the frequency direction that correspond to the highest SNR modes of the empirical frequency-frequency covariance.

Note that the mean as a function of frequency (i.e. average over x and y pixels for each frequency channel) is subtracted from the data before calculating the PCA modes. The mean is then added back into the foreground model at the end.

See Sect. 3.2 of Alonso et al. [arXiv:1409.8667] for details. N.B. Proper inverse-noise weighting is not currently used.

Parameters:

Name Type Description Default
field array_like

3D array containing the field that the filter will be applied to. NOTE: This assumes that the 3rd axis of the array is frequency.

required
nmodes int

Number of eigenmodes to filter out (modes are ordered by SNR).

required
fit_powerlaw bool

If True, fit a power-law to the mean as a function of frequency. This may help prevent over-fitting of the mean relation. If False, the simple mean as a function of frequency will be used.

False
return_filter bool

Whether to also return the linear FG filter operator and coefficients.

False

Returns:

Type Description
cleaned_field (array_like), U_fg (array_like, optional), fg_amps (array_like, optional)

Cleaned field, foreground filter operator, and foreground mode amplitudes.

  • cleaned_field (array_like): Foreground-cleaned field.

  • U_fg (array_like, optional): Foreground operator, shape (Nfreq, Nmodes). Only returned if return_operator = True.

  • fg_amps (array_like, optional): Foreground mode amplitudes per pixel, shape (Nmodes, Npix). Only returned if return_operator = True.

Source code in fastbox/filters.py
def pca_filter(field, nmodes, fit_powerlaw=False, return_filter=False):
    """
    Apply a Principal Component Analysis (PCA) filter to a field. This 
    subtracts off functions in the frequency direction that correspond to the 
    highest SNR modes of the empirical frequency-frequency covariance.

    Note that the mean as a function of frequency (i.e. average over x and y 
    pixels for each frequency channel) is subtracted from the data before 
    calculating the PCA modes. The mean is then added back into the foreground 
    model at the end.

    See Sect. 3.2 of Alonso et al. [arXiv:1409.8667] for details.
    N.B. Proper inverse-noise weighting is not currently used.

    Parameters:
        field (array_like):
            3D array containing the field that the filter will be applied to. 
            NOTE: This assumes that the 3rd axis of the array is frequency.

        nmodes (int):
            Number of eigenmodes to filter out (modes are ordered by SNR).

        fit_powerlaw (bool, optional):
            If True, fit a power-law to the mean as a function of frequency. This 
            may help prevent over-fitting of the mean relation. If False, the 
            simple mean as a function of frequency will be used.

        return_filter (bool, optional):
            Whether to also return the linear FG filter operator and coefficients. 

    Returns:
        cleaned_field (array_like), U_fg (array_like, optional), fg_amps (array_like, optional): 
            Cleaned field, foreground filter operator, and foreground mode amplitudes.

        - ``cleaned_field (array_like)``:
            Foreground-cleaned field.

        - ``U_fg (array_like, optional)``:
            Foreground operator, shape (Nfreq, Nmodes). Only returned if 
            `return_operator = True`.    

        - ``fg_amps (array_like, optional)``:
            Foreground mode amplitudes per pixel, shape (Nmodes, Npix). Only 
            returned if `return_operator = True`.
    """
    # Reshape field
    d = field.reshape((-1, field.shape[-1])).T # (Nfreqs, Nxpix * Nypix)

    # Calculate average spectrum (avg. over pixels, as a function of frequency)
    d_mean = np.mean(d, axis=-1)[:,np.newaxis]

    # Fit power law to the mean and subtract that instead
    # (FIXME: Needs to be tested more thoroughly)
    if fit_powerlaw:
        freqs = np.linspace(1., 10., d.shape[0])

        def fn(nu, amp, beta):
            return amp * (nu / nu[0])**beta

        p0 = [d_mean[0][0], -2.7]
        pfit, _ = curve_fit(fn, freqs, d_mean.flatten(), p0=p0)
        d_mean = fn(freqs, pfit[0], pfit[1])[:,np.newaxis] # use power-law fit instead

    # Calculate freq-freq covariance matrix
    x = d - d_mean
    cov = np.cov(x) # (Nfreqs x Nfreqs)

    # Do eigendecomposition of covariance matrix
    eigvals, eigvecs = np.linalg.eig(cov)

    # Sort by eigenvalue
    idxs = np.argsort(eigvals)[::-1] # reverse order (biggest eigenvalue first)
    eigvals = eigvals[idxs]
    eigvecs = eigvecs[:,idxs]

    # Construct foreground filter operator by keeping only nmodes eigenmodes
    U_fg = eigvecs[:,:nmodes] # (Nfreqs, Nmodes)

    # Calculate foreground amplitudes for each line of sight
    fg_amps = np.dot(U_fg.T, x) # (Nmodes, Npix)

    # Construct FG field and subtract from input data
    fg_field = np.dot(U_fg, fg_amps) + d_mean # Operator times amplitudes + mean
    fg_field = fg_field.T.reshape(field.shape)
    cleaned_field = field - fg_field

    # Return filtered field (and optionally the filter operator + amplitudes)
    if return_filter:
        return cleaned_field, U_fg, fg_amps
    else:
        return cleaned_field