from numbers import Number
from typing import List, Tuple
import numpy as np
import pybaselines
from . import PreprocessingStep
"""
List of the available methods for spectral baseline correction offered by the package.
"""
class PybaselinesCorrector(PreprocessingStep):
"""
A class that wraps pybaselines baseline correction function and converts them to an executable :class:`.Preprocessing` object.
"""
@staticmethod
def _pybaselines_baseline_extractor(func):
# Wrapper that extracts the baseline result and discard the params result from the given pybaselines method.
def wrap(*args, **kwargs):
baseline, params = func(*args, **kwargs)
return baseline
return wrap
@staticmethod
def _pybaselines_broadcaster(func):
# Wrapper that applies the given pybaselines function over the n-dimensional data provided.
def wrap(intensity_data, spectral_axis, **kwargs):
extractor_func = PybaselinesCorrector._pybaselines_baseline_extractor(func)
baseline_data = np.apply_along_axis(extractor_func, axis=-1, arr=intensity_data, x_data=spectral_axis, **kwargs)
preprocessed_spectral_data = intensity_data - baseline_data
return preprocessed_spectral_data, spectral_axis
return wrap
def __init__(self, pybaselines_function, **kwargs):
# Using the wrappers in the class, create a PreprocessingStep object that applies the pybaselines function provided.
broadcastable_pybaselines_function = PybaselinesCorrector._pybaselines_broadcaster(pybaselines_function)
super().__init__(broadcastable_pybaselines_function, **kwargs)
[docs]
class ASLS(PybaselinesCorrector):
"""
Baseline correction based on Asymmetric Least Squares (AsLS).
Parameters
----------
**kwargs :
Check original `implementation <https://pybaselines.readthedocs.io/en/latest/api/pybaselines/api/index.html#pybaselines.api.Baseline.asls>`_ for information about parameters.
.. note :: Implementation based on `pybaselines <https://pybaselines.readthedocs.io>`_.
References
----------
Eilers, P. A Perfect Smoother. Analytical Chemistry, 2003, 75(14), 3631-3636.
Eilers, P., et al. Baseline correction with asymmetric least squares smoothing. Leiden University Medical Centre Report, 2005, 1(1).
"""
[docs]
def __init__(self, *, lam=1e6, p=1e-2, diff_order=2, max_iter=50, tol=1e-3, weights=None):
super().__init__(pybaselines.whittaker.asls, lam=lam, p=p, diff_order=diff_order, max_iter=max_iter,
tol=tol, weights=weights)
[docs]
class IASLS(PybaselinesCorrector):
"""
Baseline correction based on Improved Asymmetric Least Squares (IAsLS).
Parameters
----------
**kwargs :
Check original `implementation <https://pybaselines.readthedocs.io/en/latest/api/pybaselines/api/index.html#pybaselines.api.Baseline.iasls>`_ for information about parameters.
.. note :: Implementation based on `pybaselines <https://pybaselines.readthedocs.io>`_.
References
----------
He, S., et al. Baseline correction for raman spectra using an improved asymmetric least squares method, Analytical Methods, 2014, 6(12), 4402-4407.
"""
[docs]
def __init__(self, *, lam=1e6, p=1e-2, lam_1=1e-4, max_iter=50, tol=1e-3, weights=None, diff_order=2):
super().__init__(pybaselines.whittaker.iasls, lam=lam, p=p, lam_1=lam_1, max_iter=max_iter, tol=tol, weights=weights, diff_order=diff_order)
[docs]
class AIRPLS(PybaselinesCorrector):
"""
Baseline correction based on Adaptive Iteratively Reweighted Penalized Least Squares (airPLS).
Parameters
----------
**kwargs :
Check original `implementation <https://pybaselines.readthedocs.io/en/latest/api/pybaselines/api/index.html#pybaselines.api.Baseline.airpls>`_ for information about parameters.
.. note :: Implementation based on `pybaselines <https://pybaselines.readthedocs.io>`_.
References
----------
Zhang, Z.M., et al. Baseline correction using adaptive iteratively reweighted penalized least squares. Analyst, 2010, 135(5), 1138-1146.
"""
[docs]
def __init__(self, *, lam=1e6, diff_order=2, max_iter=50, tol=1e-3, weights=None):
super().__init__(pybaselines.whittaker.airpls, lam=lam, diff_order=diff_order, max_iter=max_iter, tol=tol, weights=weights)
[docs]
class ARPLS(PybaselinesCorrector):
"""
Baseline correction based on Asymmetrically Reweighted Penalized Least Squares (arPLS).
Parameters
----------
**kwargs :
Check original `implementation <https://pybaselines.readthedocs.io/en/latest/api/pybaselines/api/index.html#pybaselines.api.Baseline.arpls>`_ for information about parameters.
.. note :: Implementation based on `pybaselines <https://pybaselines.readthedocs.io>`_.
References
----------
Baek, S.J., et al. Baseline correction using asymmetrically reweighted penalized least squares smoothing. Analyst, 2015, 140, 250-257.
"""
[docs]
def __init__(self, *, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=None):
super().__init__(pybaselines.whittaker.arpls, lam=lam, diff_order=diff_order, max_iter=max_iter, tol=tol, weights=weights)
[docs]
class DRPLS(PybaselinesCorrector):
"""
Baseline correction based on Doubly Reweighted Penalized Least Squares (drPLS).
Parameters
----------
**kwargs :
Check original `implementation <https://pybaselines.readthedocs.io/en/latest/api/pybaselines/api/index.html#pybaselines.api.Baseline.drpls>`_ for information about parameters.
.. note :: Implementation based on `pybaselines <https://pybaselines.readthedocs.io>`_.
References
----------
Xu, D. et al. Baseline correction method based on doubly reweighted penalized least squares, Applied Optics, 2019, 58, 3913-3920.
"""
[docs]
def __init__(self, *, lam=1e5, eta=0.5, max_iter=50, tol=1e-3, weights=None, diff_order=2):
super().__init__(pybaselines.whittaker.drpls, lam=lam, eta=eta, max_iter=max_iter, tol=tol, weights=weights, diff_order=diff_order)
[docs]
class IARPLS(PybaselinesCorrector):
"""
Baseline correction based on Improved Asymmetrically Reweighted Penalized Least Squares (IarPLS).
Parameters
----------
**kwargs :
Check original `implementation <https://pybaselines.readthedocs.io/en/latest/api/pybaselines/api/index.html#pybaselines.api.Baseline.iarpls>`_ for information about parameters.
.. note :: Implementation based on `pybaselines <https://pybaselines.readthedocs.io>`_.
References
----------
Ye, J., et al. Baseline correction method based on improved asymmetrically reweighted penalized least squares for Raman spectrum. Applied Optics, 2020, 59, 10933-10943.
"""
[docs]
def __init__(self, *, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=None):
super().__init__(pybaselines.whittaker.iarpls, lam=lam, diff_order=diff_order, max_iter=max_iter, tol=tol, weights=weights)
[docs]
class ASPLS(PybaselinesCorrector):
"""
Baseline correction based on Adaptive Smoothness Penalized Least Squares (asPLS).
Parameters
----------
**kwargs :
Check original `implementation <https://pybaselines.readthedocs.io/en/latest/api/pybaselines/api/index.html#pybaselines.api.Baseline.aspls>`_ for information about parameters.
.. note :: Implementation based on `pybaselines <https://pybaselines.readthedocs.io>`_.
References
----------
Zhang, F., et al. Baseline correction for infrared spectra using adaptive smoothness parameter penalized least squares method. Spectroscopy Letters, 2020, 53(3), 222-233.
"""
[docs]
def __init__(self, *, lam=1e5, diff_order=2, max_iter=100, tol=1e-3, weights=None, alpha=None):
super().__init__(pybaselines.whittaker.aspls, lam=lam, diff_order=diff_order, max_iter=max_iter, tol=tol, weights=weights, alpha=alpha)
[docs]
class Poly(PybaselinesCorrector):
"""
Baseline correction based on polynomial fitting.
Parameters
----------
poly_order : int, optional
Order of polynomial to fit. Default is 2.
regions : list of tuples, optional
Describes the regions (in cm^{-1}) used for selective masking. If ``None`` (default), all points are used.
Examples:
[(None, 300)] - only uses the bands < 300cm-1
[(3000, None)] - only uses the bands > 3000cm-1
[(700, 1800)] - only uses the bands between 700 and 1800 (i.e. the "fingerprint" region)
[(300, 400), (500, 600)] - only uses the bands between 300 and 400, and 500 and 600
.. note :: Implementation based on `pybaselines <https://pybaselines.readthedocs.io>`_.
"""
[docs]
def __init__(self, *, poly_order=2, regions: List[Tuple[Number or None, Number or None]] = None):
if regions is not None:
for region in regions:
if len(region) != 2:
raise ValueError("Region must be a list of tuples of two elements")
super().__init__(pybaselines.polynomial.poly, poly_order=poly_order, weights=regions)
def _get_mask(self, spectral_axis):
mask = np.zeros_like(spectral_axis)
for region in self.kwargs['weights']:
start = spectral_axis[0] if region[0] is None else region[0]
end = spectral_axis[-1] if region[1] is None else region[1]
if start > end:
# swap
start, end = end, start
ones = np.logical_and(start <= spectral_axis, spectral_axis <= end)
mask[ones] = 1
return mask
def __call__(self, spectral_data, spectral_axis, **kwargs):
# use selective masking if regions are provided
weights = self._get_mask(spectral_axis) if self.kwargs['weights'] is not None else None
# exchange weights in kwargs with the mask
kwargs = {k: v for k, v in self.kwargs.items() if k != 'weights'}
kwargs['weights'] = weights
return self.method(spectral_data, spectral_axis, **kwargs)
[docs]
class ModPoly(PybaselinesCorrector):
"""
Baseline correction based on modified polynomial fitting.
Parameters
----------
**kwargs :
Check original `implementation <https://pybaselines.readthedocs.io/en/latest/api/pybaselines/api/index.html#pybaselines.polynomial.modpoly>`_ for information about parameters.
.. note :: Implementation based on `pybaselines <https://pybaselines.readthedocs.io>`_.
References
----------
Lieber, C., et al. Automated method for subtraction of fluorescence from biological raman spectra. Applied Spectroscopy, 2003, 57(11), 1363-1367.
Gan, F., et al. Baseline correction by improved iterative polynomial fitting with automatic threshold. Chemometrics and Intelligent Laboratory Systems, 2006, 82, 59-65.
"""
[docs]
def __init__(self, *, poly_order=2, tol=0.001, max_iter=250, weights=None, use_original=False, mask_initial_peaks=False):
super().__init__(pybaselines.polynomial.modpoly, poly_order=poly_order, tol=tol, max_iter=max_iter, weights=weights, use_original=use_original, mask_initial_peaks=mask_initial_peaks)
[docs]
class PenalisedPoly(PybaselinesCorrector):
"""
Baseline correction based on penalised polynomial fitting.
Parameters
----------
**kwargs :
Check original `implementation <https://pybaselines.readthedocs.io/en/latest/api/pybaselines/api/index.html#pybaselines.polynomial.penalized_poly>`_ for information about parameters.
.. note :: Implementation based on `pybaselines <https://pybaselines.readthedocs.io>`_.
References
----------
Mazet, V., et al. Background removal from spectra by designing and minimising a non-quadratic cost function. Chemometrics and Intelligent Laboratory Systems, 2005, 76(2), 121-133.
"""
[docs]
def __init__(self, *, poly_order=2, tol=0.001, max_iter=250, weights=None, cost_function='asymmetric_truncated_quadratic', threshold=None, alpha_factor=0.99):
super().__init__(pybaselines.polynomial.penalized_poly, poly_order=poly_order, tol=tol, max_iter=max_iter, weights=weights, cost_function=cost_function, threshold=threshold, alpha_factor=alpha_factor)
[docs]
class IModPoly(PybaselinesCorrector):
"""
Baseline correction based on improved modified polynomial fitting.
Parameters
----------
**kwargs :
Check original `implementation <https://pybaselines.readthedocs.io/en/latest/api/pybaselines/api/index.html#pybaselines.polynomial.imodpoly>`_ for information about parameters.
.. note :: Implementation based on `pybaselines <https://pybaselines.readthedocs.io>`_.
References
----------
Zhao, J., et al. Automated Autofluorescence Background Subtraction Algorithm for Biomedical Raman Spectroscopy, Applied Spectroscopy, 2007, 61(11), 1225-1232.
"""
[docs]
def __init__(self, *, poly_order=2, tol=0.001, max_iter=250, weights=None, use_original=False, mask_initial_peaks=True, num_std=1):
super().__init__(pybaselines.polynomial.imodpoly, poly_order=poly_order, tol=tol, max_iter=max_iter, weights=weights, use_original=use_original, mask_initial_peaks=mask_initial_peaks, num_std=num_std)
[docs]
class Goldindec(PybaselinesCorrector):
"""
Baseline correction based on Goldindec.
Parameters
----------
**kwargs :
Check original `implementation <https://pybaselines.readthedocs.io/en/latest/api/pybaselines/api/index.html#pybaselines.polynomial.goldinec>`_ for information about parameters.
.. note :: Implementation based on `pybaselines <https://pybaselines.readthedocs.io>`_.
References
----------
Liu, J., et al. Goldindec: A Novel Algorithm for Raman Spectrum Baseline Correction. Applied Spectroscopy, 2015, 69(7), 834-842.
"""
[docs]
def __init__(self, *, poly_order=2, tol=0.001, max_iter=250, weights=None, cost_function='asymmetric_indec', peak_ratio=0.5, alpha_factor=0.99, tol_2=0.001, tol_3=1e-06, max_iter_2=100):
super().__init__(pybaselines.polynomial.goldindec, poly_order=poly_order, tol=tol, max_iter=max_iter, weights=weights, cost_function=cost_function, peak_ratio=peak_ratio, alpha_factor=alpha_factor, tol_2=tol_2, tol_3=tol_3, max_iter_2=max_iter_2)
[docs]
class IRSQR(PybaselinesCorrector):
"""
Baseline correction based on Iterative Reweighted Spline Quantile Regression (IRSQR).
Parameters
----------
**kwargs :
Check original `implementation <https://pybaselines.readthedocs.io/en/latest/api/pybaselines/api/index.html#pybaselines.spline.irsqr>`_ for information about parameters.
.. note :: Implementation based on `pybaselines <https://pybaselines.readthedocs.io>`_.
References
----------
Han, Q., et al. Iterative Reweighted Quantile Regression Using Augmented Lagrangian Optimization for Baseline Correction. 2018 5th International Conference on Information Science and Control Engineering (ICISCE), 2018, 280-284.
"""
[docs]
def __init__(self, *, lam=100, quantile=0.05, num_knots=100, spline_degree=3, diff_order=3, max_iter=100, tol=1e-06, weights=None, eps=None):
super().__init__(pybaselines.spline.irsqr, lam=lam, quantile=quantile, num_knots=num_knots, spline_degree=spline_degree, diff_order=diff_order, max_iter=max_iter, tol=tol, weights=weights, eps=eps)
[docs]
class CornerCutting(PybaselinesCorrector):
"""
Baseline correction based on Corner Cutting.
Parameters
----------
**kwargs :
Check original `implementation <https://pybaselines.readthedocs.io/en/latest/api/pybaselines/api/index.html#pybaselines.spline.corner_cutting>`_ for information about parameters.
.. note :: Implementation based on `pybaselines <https://pybaselines.readthedocs.io>`_.
References
----------
Liu, Y.J., et al. A Concise Iterative Method with Bezier Technique for Baseline Construction. Analyst, 2015, 140(23), 7984-7996.
"""
[docs]
def __init__(self, *, max_iter=100):
super().__init__(pybaselines.spline.corner_cutting, max_iter=max_iter)
[docs]
class FABC(PybaselinesCorrector):
"""
Baseline correction based on Fully automatic baseline correction (FABC).
Parameters
----------
**kwargs :
Check original `implementation <https://pybaselines.readthedocs.io/en/latest/api/pybaselines/api/index.html#pybaselines.classification.fabc>`_ for information about parameters.
.. note :: Implementation based on `pybaselines <https://pybaselines.readthedocs.io>`_.
References
----------
Cobas, J.C., Bernstein, M.A., MartÃn-Pastor, M. and Tahoces, P.G., 2006. A new general-purpose fully automatic baseline-correction procedure for 1D and 2D NMR data. Journal of Magnetic Resonance, 183(1), pp.145-151.
"""
[docs]
def __init__(self, *, lam=1000000.0, scale=None, num_std=3.0, diff_order=2, min_length=2, weights=None, weights_as_mask=False, x_data=None, **pad_kwargs):
super().__init__(pybaselines.classification.fabc, lam=lam, scale=scale, num_std=num_std, diff_order=diff_order, min_length=min_length, weights=weights, weights_as_mask=weights_as_mask, x_data=x_data, **pad_kwargs)