Source code for xdem.coreg.biascorr

"""Bias corrections (i.e., non-affine coregistration) classes."""
from __future__ import annotations

import inspect
from typing import Any, Callable, Iterable, Literal, TypeVar

import geopandas as gpd
import geoutils as gu
import numpy as np
import pandas as pd
import rasterio as rio
import scipy

import xdem.spatialstats
from xdem._typing import NDArrayb, NDArrayf
from xdem.coreg.base import Coreg
from xdem.fit import (
    polynomial_1d,
    polynomial_2d,
    robust_nfreq_sumsin_fit,
    robust_norder_polynomial_fit,
    sumsin_1d,
)

fit_workflows = {
    "norder_polynomial": {"func": polynomial_1d, "optimizer": robust_norder_polynomial_fit},
    "nfreq_sumsin": {"func": sumsin_1d, "optimizer": robust_nfreq_sumsin_fit},
}

BiasCorrType = TypeVar("BiasCorrType", bound="BiasCorr")



[docs]
class BiasCorr(Coreg):
    """
    Parent class of bias correction methods: non-rigid coregistrations.

    Made to be subclassed to pass default parameters/dimensions more intuitively, or to provide wrappers for specific
    types of bias corrections (directional, terrain, etc).
    """


[docs]
    def __init__(
        self,
        fit_or_bin: Literal["bin_and_fit"] | Literal["fit"] | Literal["bin"] = "fit",
        fit_func: Callable[..., NDArrayf]
        | Literal["norder_polynomial"]
        | Literal["nfreq_sumsin"] = "norder_polynomial",
        fit_optimizer: Callable[..., tuple[NDArrayf, Any]] = scipy.optimize.curve_fit,
        bin_sizes: int | dict[str, int | Iterable[float]] = 10,
        bin_statistic: Callable[[NDArrayf], np.floating[Any]] = np.nanmedian,
        bin_apply_method: Literal["linear"] | Literal["per_bin"] = "linear",
        bias_var_names: Iterable[str] = None,
        subsample: float | int = 1.0,
    ):
        """
        Instantiate a bias correction object.
        """
        # Raise error if fit_or_bin is not defined
        if fit_or_bin not in ["fit", "bin", "bin_and_fit"]:
            raise ValueError(f"Argument `fit_or_bin` must be 'bin_and_fit', 'fit' or 'bin', got {fit_or_bin}.")

        # Pass the arguments to the class metadata
        if fit_or_bin in ["fit", "bin_and_fit"]:

            # Check input types for "fit" to raise user-friendly errors
            if not (callable(fit_func) or (isinstance(fit_func, str) and fit_func in fit_workflows.keys())):
                raise TypeError(
                    "Argument `fit_func` must be a function (callable) "
                    "or the string '{}', got {}.".format("', '".join(fit_workflows.keys()), type(fit_func))
                )
            if not callable(fit_optimizer):
                raise TypeError(
                    "Argument `fit_optimizer` must be a function (callable), " "got {}.".format(type(fit_optimizer))
                )

            # If a workflow was called, override optimizer and pass proper function
            if isinstance(fit_func, str) and fit_func in fit_workflows.keys():
                # Looks like a typing bug here, see: https://github.com/python/mypy/issues/10740
                fit_optimizer = fit_workflows[fit_func]["optimizer"]  # type: ignore
                fit_func = fit_workflows[fit_func]["func"]  # type: ignore

        if fit_or_bin in ["bin", "bin_and_fit"]:

            # Check input types for "bin" to raise user-friendly errors
            if not (
                isinstance(bin_sizes, int)
                or (isinstance(bin_sizes, dict) and all(isinstance(val, (int, Iterable)) for val in bin_sizes.values()))
            ):
                raise TypeError(
                    "Argument `bin_sizes` must be an integer, or a dictionary of integers or iterables, "
                    "got {}.".format(type(bin_sizes))
                )

            if not callable(bin_statistic):
                raise TypeError(
                    "Argument `bin_statistic` must be a function (callable), " "got {}.".format(type(bin_statistic))
                )

            if not isinstance(bin_apply_method, str):
                raise TypeError(
                    "Argument `bin_apply_method` must be the string 'linear' or 'per_bin', "
                    "got {}.".format(type(bin_apply_method))
                )

        list_bias_var_names = list(bias_var_names) if bias_var_names is not None else None

        # Now we write the relevant attributes to the class metadata
        # For fitting
        if fit_or_bin == "fit":
            meta_fit = {"fit_func": fit_func, "fit_optimizer": fit_optimizer, "bias_var_names": list_bias_var_names}
            # Somehow mypy doesn't understand that fit_func and fit_optimizer can only be callables now,
            # even writing the above "if" in a more explicit "if; else" loop with new variables names and typing
            super().__init__(meta=meta_fit)  # type: ignore

        # For binning
        elif fit_or_bin == "bin":
            meta_bin = {
                "bin_sizes": bin_sizes,
                "bin_statistic": bin_statistic,
                "bin_apply_method": bin_apply_method,
                "bias_var_names": list_bias_var_names,
            }
            super().__init__(meta=meta_bin)  # type: ignore

        # For both
        else:
            meta_bin_and_fit = {
                "fit_func": fit_func,
                "fit_optimizer": fit_optimizer,
                "bin_sizes": bin_sizes,
                "bin_statistic": bin_statistic,
                "bias_var_names": list_bias_var_names,
            }
            super().__init__(meta=meta_bin_and_fit)  # type: ignore

        # Add subsample attribute
        self._meta["subsample"] = subsample

        # Add number of dimensions attribute (length of bias_var_names, counted generically for iterator)
        self._meta["nd"] = sum(1 for _ in bias_var_names) if bias_var_names is not None else None

        # Update attributes
        self._fit_or_bin = fit_or_bin
        self._is_affine = False
        self._needs_vars = True


    def _fit_biascorr(  # type: ignore
        self,
        ref_elev: NDArrayf,
        tba_elev: NDArrayf,
        inlier_mask: NDArrayb,
        transform: rio.transform.Affine,  # Never None thanks to Coreg.fit() pre-process
        crs: rio.crs.CRS,  # Never None thanks to Coreg.fit() pre-process
        z_name: str,
        bias_vars: None | dict[str, NDArrayf] = None,
        weights: None | NDArrayf = None,
        verbose: bool = False,
        **kwargs,
    ) -> None:
        """
        Generic fit method for all biascorr subclasses, expects either 2D arrays for rasters or 1D arrays for points.
        Should only be called through subclassing.
        """

        # This is called by subclasses, so the bias_var should always be defined
        if bias_vars is None:
            raise ValueError("At least one `bias_var` should be passed to the fitting function, got None.")

        # Check number of variables
        nd = self._meta["nd"]
        if nd is not None and len(bias_vars) != nd:
            raise ValueError(
                "A number of {} variable(s) has to be provided through the argument 'bias_vars', "
                "got {}.".format(nd, len(bias_vars))
            )

        # If bias var names were explicitly passed at instantiation, check that they match the one from the dict
        if self._meta["bias_var_names"] is not None:
            if not sorted(bias_vars.keys()) == sorted(self._meta["bias_var_names"]):
                raise ValueError(
                    "The keys of `bias_vars` do not match the `bias_var_names` defined during "
                    "instantiation: {}.".format(self._meta["bias_var_names"])
                )
        # Otherwise, store bias variable names from the dictionary
        else:
            self._meta["bias_var_names"] = list(bias_vars.keys())

        # Compute difference and mask of valid data
        # TODO: Move the check up to Coreg.fit()?

        diff = ref_elev - tba_elev
        valid_mask = np.logical_and.reduce(
            (inlier_mask, np.isfinite(diff), *(np.isfinite(var) for var in bias_vars.values()))
        )

        # Raise errors if all values are NaN after introducing masks from the variables
        # (Others are already checked in Coreg.fit())
        if np.all(~valid_mask):
            raise ValueError("Some 'bias_vars' have only NaNs in the inlier mask.")

        subsample_mask = self._get_subsample_on_valid_mask(valid_mask=valid_mask, verbose=verbose)

        # Get number of variables
        nd = len(bias_vars)

        # Remove random state for keyword argument if its value is not in the optimizer function
        if self._fit_or_bin in ["fit", "bin_and_fit"]:
            fit_func_args = inspect.getfullargspec(self._meta["fit_optimizer"]).args
            if "random_state" not in fit_func_args and "random_state" in kwargs:
                kwargs.pop("random_state")

        # We need to sort the bin sizes in the same order as the bias variables if a dict is passed for bin_sizes
        if self._fit_or_bin in ["bin", "bin_and_fit"]:
            if isinstance(self._meta["bin_sizes"], dict):
                var_order = list(bias_vars.keys())
                # Declare type to write integer or tuple to the variable
                bin_sizes: int | tuple[int, ...] | tuple[NDArrayf, ...] = tuple(
                    np.array(self._meta["bin_sizes"][var]) for var in var_order
                )
            # Otherwise, write integer directly
            else:
                bin_sizes = self._meta["bin_sizes"]

        # Option 1: Run fit and save optimized function parameters
        if self._fit_or_bin == "fit":

            # Print if verbose
            if verbose:
                print(
                    "Estimating bias correction along variables {} by fitting "
                    "with function {}.".format(", ".join(list(bias_vars.keys())), self._meta["fit_func"].__name__)
                )

            results = self._meta["fit_optimizer"](
                f=self._meta["fit_func"],
                xdata=np.array([var[subsample_mask].flatten() for var in bias_vars.values()]).squeeze(),
                ydata=diff[subsample_mask].flatten(),
                sigma=weights[subsample_mask].flatten() if weights is not None else None,
                absolute_sigma=True,
                **kwargs,
            )

        # Option 2: Run binning and save dataframe of result
        elif self._fit_or_bin == "bin":

            if verbose:
                print(
                    "Estimating bias correction along variables {} by binning "
                    "with statistic {}.".format(", ".join(list(bias_vars.keys())), self._meta["bin_statistic"].__name__)
                )

            df = xdem.spatialstats.nd_binning(
                values=diff[subsample_mask],
                list_var=[var[subsample_mask] for var in bias_vars.values()],
                list_var_names=list(bias_vars.keys()),
                list_var_bins=bin_sizes,
                statistics=(self._meta["bin_statistic"], "count"),
            )

        # Option 3: Run binning, then fitting, and save both results
        else:

            # Print if verbose
            if verbose:
                print(
                    "Estimating bias correction along variables {} by binning with statistic {} and then fitting "
                    "with function {}.".format(
                        ", ".join(list(bias_vars.keys())),
                        self._meta["bin_statistic"].__name__,
                        self._meta["fit_func"].__name__,
                    )
                )

            df = xdem.spatialstats.nd_binning(
                values=diff[subsample_mask],
                list_var=[var[subsample_mask] for var in bias_vars.values()],
                list_var_names=list(bias_vars.keys()),
                list_var_bins=bin_sizes,
                statistics=(self._meta["bin_statistic"], "count"),
            )

            # Now, we need to pass this new data to the fitting function and optimizer
            # We use only the N-D binning estimates (maximum dimension, equal to length of variable list)
            df_nd = df[df.nd == len(bias_vars)]

            # We get the middle of bin values for variable, and statistic for the diff
            new_vars = [pd.IntervalIndex(df_nd[var_name]).mid.values for var_name in bias_vars.keys()]
            new_diff = df_nd[self._meta["bin_statistic"].__name__].values
            # TODO: pass a new sigma based on "count" and original sigma (and correlation?)?
            #  sigma values would have to be binned above also

            # Valid values for the binning output
            ind_valid = np.logical_and.reduce((np.isfinite(new_diff), *(np.isfinite(var) for var in new_vars)))

            if np.all(~ind_valid):
                raise ValueError("Only NaNs values after binning, did you pass the right bin edges?")

            results = self._meta["fit_optimizer"](
                f=self._meta["fit_func"],
                xdata=np.array([var[ind_valid].flatten() for var in new_vars]).squeeze(),
                ydata=new_diff[ind_valid].flatten(),
                sigma=weights[ind_valid].flatten() if weights is not None else None,
                absolute_sigma=True,
                **kwargs,
            )

        if verbose:
            print(f"{nd}D bias estimated.")

        # Save results if fitting was performed
        if self._fit_or_bin in ["fit", "bin_and_fit"]:

            # Write the results to metadata in different ways depending on optimizer returns
            if self._meta["fit_optimizer"] in (w["optimizer"] for w in fit_workflows.values()):
                params = results[0]
                order_or_freq = results[1]
                if self._meta["fit_optimizer"] == robust_norder_polynomial_fit:
                    self._meta["poly_order"] = order_or_freq
                else:
                    self._meta["nb_sin_freq"] = order_or_freq

            elif self._meta["fit_optimizer"] == scipy.optimize.curve_fit:
                params = results[0]
                # Calculation to get the error on parameters (see description of scipy.optimize.curve_fit)
                perr = np.sqrt(np.diag(results[1]))
                self._meta["fit_perr"] = perr

            else:
                params = results[0]

            self._meta["fit_params"] = params

        # Save results of binning if it was perfrmed
        elif self._fit_or_bin in ["bin", "bin_and_fit"]:
            self._meta["bin_dataframe"] = df

    def _fit_rst_rst(
        self,
        ref_elev: NDArrayf,
        tba_elev: NDArrayf,
        inlier_mask: NDArrayb,
        transform: rio.transform.Affine,
        crs: rio.crs.CRS,
        z_name: str,
        weights: NDArrayf | None = None,
        bias_vars: dict[str, NDArrayf] | None = None,
        verbose: bool = False,
        **kwargs: Any,
    ) -> None:
        """Should only be called through subclassing"""

        self._fit_biascorr(
            ref_elev=ref_elev,
            tba_elev=tba_elev,
            inlier_mask=inlier_mask,
            transform=transform,
            crs=crs,
            z_name=z_name,
            weights=weights,
            bias_vars=bias_vars,
            verbose=verbose,
            **kwargs,
        )

    def _fit_rst_pts(  # type: ignore
        self,
        ref_elev: NDArrayf | gpd.GeoDataFrame,
        tba_elev: NDArrayf | gpd.GeoDataFrame,
        inlier_mask: NDArrayb,
        transform: rio.transform.Affine,  # Never None thanks to Coreg.fit() pre-process
        crs: rio.crs.CRS,  # Never None thanks to Coreg.fit() pre-process
        z_name: str,
        bias_vars: None | dict[str, NDArrayf] = None,
        weights: None | NDArrayf = None,
        verbose: bool = False,
        **kwargs,
    ) -> None:
        """Should only be called through subclassing."""

        # Get point reference to also convert inlier and bias vars
        pts_elev = ref_elev if isinstance(ref_elev, gpd.GeoDataFrame) else tba_elev
        rst_elev = ref_elev if not isinstance(ref_elev, gpd.GeoDataFrame) else tba_elev

        pts = np.array((pts_elev.geometry.x.values, pts_elev.geometry.y.values)).T

        # Get valid mask ahead of subsampling to have the exact number of requested subsamples by user
        if bias_vars is not None:
            valid_mask = np.logical_and.reduce(
                (inlier_mask, np.isfinite(rst_elev), *(np.isfinite(var) for var in bias_vars.values()))
            )
        else:
            valid_mask = np.logical_and.reduce((inlier_mask, np.isfinite(rst_elev)))

        # Convert inlier mask to points to be able to determine subsample later
        inlier_rst = gu.Raster.from_array(data=valid_mask, transform=transform, crs=crs)
        # The location needs to be surrounded by inliers, use floor to get 0 for at least one outlier
        valid_pts = np.floor(inlier_rst.interp_points(pts)).astype(bool)  # Interpolates boolean mask as integers

        # If there is a subsample, it needs to be done now on the point dataset to reduce later calculations
        subsample_mask = self._get_subsample_on_valid_mask(valid_mask=valid_pts, verbose=verbose)
        pts = pts[subsample_mask]

        # Now all points should be valid, we can pass an inlier mask completely true
        inlier_pts_alltrue = np.ones(len(pts), dtype=bool)

        # Below, we derive 1D arrays for the rst_rst function to take over after interpolating to the point coordinates
        # (as rst_rst works for 1D arrays as well as 2D arrays, as long as coordinates match)

        # Convert ref or tba depending on which is the point dataset
        if isinstance(ref_elev, gpd.GeoDataFrame):
            tba_rst = gu.Raster.from_array(data=tba_elev, transform=transform, crs=crs, nodata=-9999)
            tba_elev_pts = tba_rst.interp_points(pts)
            ref_elev_pts = ref_elev[z_name].values[subsample_mask]
        else:
            ref_rst = gu.Raster.from_array(data=ref_elev, transform=transform, crs=crs, nodata=-9999)
            ref_elev_pts = ref_rst.interp_points(pts)
            tba_elev_pts = tba_elev[z_name].values[subsample_mask]

        # Convert bias variables
        if bias_vars is not None:
            bias_vars_pts = {}
            for var in bias_vars.keys():
                bias_vars_pts[var] = gu.Raster.from_array(
                    bias_vars[var], transform=transform, crs=crs, nodata=-9999
                ).interp_points(pts)
        else:
            bias_vars_pts = None

        # Send to raster-raster fit but using 1D arrays instead of 2D arrays (flattened anyway during analysis)
        self._fit_biascorr(
            ref_elev=ref_elev_pts,
            tba_elev=tba_elev_pts,
            inlier_mask=inlier_pts_alltrue,
            bias_vars=bias_vars_pts,
            transform=transform,
            crs=crs,
            z_name=z_name,
            weights=weights,
            verbose=verbose,
            **kwargs,
        )

    def _apply_rst(  # type: ignore
        self,
        elev: NDArrayf,
        transform: rio.transform.Affine,  # Never None thanks to Coreg.fit() pre-process
        crs: rio.crs.CRS,  # Never None thanks to Coreg.fit() pre-process
        bias_vars: None | dict[str, NDArrayf] = None,
        **kwargs: Any,
    ) -> tuple[NDArrayf, rio.transform.Affine]:

        if bias_vars is None:
            raise ValueError("At least one `bias_var` should be passed to the `apply` function, got None.")

        # Check the bias_vars passed match the ones stored for this bias correction class
        if not sorted(bias_vars.keys()) == sorted(self._meta["bias_var_names"]):
            raise ValueError(
                "The keys of `bias_vars` do not match the `bias_var_names` defined during "
                "instantiation or fitting: {}.".format(self._meta["bias_var_names"])
            )

        # Apply function to get correction (including if binning was done before)
        if self._fit_or_bin in ["fit", "bin_and_fit"]:
            corr = self._meta["fit_func"](tuple(bias_vars.values()), *self._meta["fit_params"])

        # Apply binning to get correction
        else:
            if self._meta["bin_apply_method"] == "linear":
                # N-D interpolation of binning
                bin_interpolator = xdem.spatialstats.interp_nd_binning(
                    df=self._meta["bin_dataframe"],
                    list_var_names=list(bias_vars.keys()),
                    statistic=self._meta["bin_statistic"],
                )
                corr = bin_interpolator(tuple(var.flatten() for var in bias_vars.values()))
                first_var = list(bias_vars.keys())[0]
                corr = corr.reshape(np.shape(bias_vars[first_var]))

            else:
                # Get N-D binning statistic for each pixel of the new list of variables
                corr = xdem.spatialstats.get_perbin_nd_binning(
                    df=self._meta["bin_dataframe"],
                    list_var=list(bias_vars.values()),
                    list_var_names=list(bias_vars.keys()),
                    statistic=self._meta["bin_statistic"],
                )

        dem_corr = elev + corr

        return dem_corr, transform




[docs]
class DirectionalBias(BiasCorr):
    """
    Bias correction for directional biases, for example along- or across-track of satellite angle.
    """


[docs]
    def __init__(
        self,
        angle: float = 0,
        fit_or_bin: Literal["bin_and_fit"] | Literal["fit"] | Literal["bin"] = "bin_and_fit",
        fit_func: Callable[..., NDArrayf] | Literal["norder_polynomial"] | Literal["nfreq_sumsin"] = "nfreq_sumsin",
        fit_optimizer: Callable[..., tuple[NDArrayf, Any]] = scipy.optimize.curve_fit,
        bin_sizes: int | dict[str, int | Iterable[float]] = 100,
        bin_statistic: Callable[[NDArrayf], np.floating[Any]] = np.nanmedian,
        bin_apply_method: Literal["linear"] | Literal["per_bin"] = "linear",
        subsample: float | int = 1.0,
    ):
        """
        Instantiate a directional bias correction.

        :param angle: Angle in which to perform the directional correction (degrees).
        :param fit_or_bin: Whether to fit or bin. Use "fit" to correct by optimizing a function or
            "bin" to correct with a statistic of central tendency in defined bins.
        :param fit_func: Function to fit to the bias with variables later passed in .fit().
        :param fit_optimizer: Optimizer to minimize the function.
        :param bin_sizes: Size (if integer) or edges (if iterable) for binning variables later passed in .fit().
        :param bin_statistic: Statistic of central tendency (e.g., mean) to apply during the binning.
        :param bin_apply_method: Method to correct with the binned statistics, either "linear" to interpolate linearly
            between bins, or "per_bin" to apply the statistic for each bin.
        :param subsample: Subsample the input for speed-up. <1 is parsed as a fraction. >1 is a pixel count.
        """
        super().__init__(
            fit_or_bin, fit_func, fit_optimizer, bin_sizes, bin_statistic, bin_apply_method, ["angle"], subsample
        )
        self._meta["angle"] = angle
        self._needs_vars = False


    def _fit_rst_rst(  # type: ignore
        self,
        ref_elev: NDArrayf,
        tba_elev: NDArrayf,
        inlier_mask: NDArrayb,
        transform: rio.transform.Affine,
        crs: rio.crs.CRS,
        z_name: str,
        bias_vars: dict[str, NDArrayf] = None,
        weights: None | NDArrayf = None,
        verbose: bool = False,
        **kwargs,
    ) -> None:

        if verbose:
            print("Estimating rotated coordinates.")

        x, _ = gu.raster.get_xy_rotated(
            raster=gu.Raster.from_array(data=ref_elev, crs=crs, transform=transform, nodata=-9999),
            along_track_angle=self._meta["angle"],
        )

        # Parameters dependent on resolution cannot be derived from the rotated x coordinates, need to be passed below
        if "hop_length" not in kwargs:
            # The hop length will condition jump in function values, need to be larger than average resolution
            average_res = (transform[0] + abs(transform[4])) / 2
            kwargs.update({"hop_length": average_res})

        self._fit_biascorr(
            ref_elev=ref_elev,
            tba_elev=tba_elev,
            inlier_mask=inlier_mask,
            bias_vars={"angle": x},
            transform=transform,
            crs=crs,
            z_name=z_name,
            weights=weights,
            verbose=verbose,
            **kwargs,
        )

    def _fit_rst_pts(  # type: ignore
        self,
        ref_elev: NDArrayf | gpd.GeoDataFrame,
        tba_elev: NDArrayf | gpd.GeoDataFrame,
        inlier_mask: NDArrayb,
        transform: rio.transform.Affine,
        crs: rio.crs.CRS,
        z_name: str,
        bias_vars: dict[str, NDArrayf] = None,
        weights: None | NDArrayf = None,
        verbose: bool = False,
        **kwargs,
    ) -> None:

        # Figure out which data is raster format to get gridded attributes
        rast_elev = ref_elev if not isinstance(ref_elev, gpd.GeoDataFrame) else tba_elev

        if verbose:
            print("Estimating rotated coordinates.")

        x, _ = gu.raster.get_xy_rotated(
            raster=gu.Raster.from_array(data=rast_elev, crs=crs, transform=transform, nodata=-9999),
            along_track_angle=self._meta["angle"],
        )

        # Parameters dependent on resolution cannot be derived from the rotated x coordinates, need to be passed below
        if "hop_length" not in kwargs:
            # The hop length will condition jump in function values, need to be larger than average resolution
            average_res = (transform[0] + abs(transform[4])) / 2
            kwargs.update({"hop_length": average_res})

        super()._fit_rst_pts(
            ref_elev=ref_elev,
            tba_elev=tba_elev,
            inlier_mask=inlier_mask,
            bias_vars={"angle": x},
            transform=transform,
            crs=crs,
            z_name=z_name,
            weights=weights,
            verbose=verbose,
            **kwargs,
        )

    def _apply_rst(
        self,
        elev: NDArrayf,
        transform: rio.transform.Affine,
        crs: rio.crs.CRS,
        bias_vars: None | dict[str, NDArrayf] = None,
        **kwargs: Any,
    ) -> tuple[NDArrayf, rio.transform.Affine]:

        # Define the coordinates for applying the correction
        x, _ = gu.raster.get_xy_rotated(
            raster=gu.Raster.from_array(data=elev, crs=crs, transform=transform, nodata=-9999),
            along_track_angle=self._meta["angle"],
        )

        return super()._apply_rst(elev=elev, transform=transform, crs=crs, bias_vars={"angle": x}, **kwargs)




[docs]
class TerrainBias(BiasCorr):
    """
    Correct a bias according to terrain, such as elevation or curvature.

    With elevation: often useful for nadir image DEM correction, where the focal length is slightly miscalculated.
    With curvature: often useful for a difference of DEMs with different effective resolution.

    DISCLAIMER: An elevation correction may introduce error when correcting non-photogrammetric biases, as generally
    elevation biases are interlinked with curvature biases.
    See Gardelle et al. (2012) (Figure 2), http://dx.doi.org/10.3189/2012jog11j175, for curvature-related biases.
    """


[docs]
    def __init__(
        self,
        terrain_attribute: str = "maximum_curvature",
        fit_or_bin: Literal["bin_and_fit"] | Literal["fit"] | Literal["bin"] = "bin",
        fit_func: Callable[..., NDArrayf]
        | Literal["norder_polynomial"]
        | Literal["nfreq_sumsin"] = "norder_polynomial",
        fit_optimizer: Callable[..., tuple[NDArrayf, Any]] = scipy.optimize.curve_fit,
        bin_sizes: int | dict[str, int | Iterable[float]] = 100,
        bin_statistic: Callable[[NDArrayf], np.floating[Any]] = np.nanmedian,
        bin_apply_method: Literal["linear"] | Literal["per_bin"] = "linear",
        subsample: float | int = 1.0,
    ):
        """
        Instantiate a terrain bias correction.

        :param terrain_attribute: Terrain attribute to use for correction.
        :param fit_or_bin: Whether to fit or bin. Use "fit" to correct by optimizing a function or
            "bin" to correct with a statistic of central tendency in defined bins.
        :param fit_func: Function to fit to the bias with variables later passed in .fit().
        :param fit_optimizer: Optimizer to minimize the function.
        :param bin_sizes: Size (if integer) or edges (if iterable) for binning variables later passed in .fit().
        :param bin_statistic: Statistic of central tendency (e.g., mean) to apply during the binning.
        :param bin_apply_method: Method to correct with the binned statistics, either "linear" to interpolate linearly
            between bins, or "per_bin" to apply the statistic for each bin.
        :param subsample: Subsample the input for speed-up. <1 is parsed as a fraction. >1 is a pixel count.
        """

        super().__init__(
            fit_or_bin,
            fit_func,
            fit_optimizer,
            bin_sizes,
            bin_statistic,
            bin_apply_method,
            [terrain_attribute],
            subsample,
        )
        # This is the same as bias_var_names, but let's leave the duplicate for clarity
        self._meta["terrain_attribute"] = terrain_attribute
        self._needs_vars = False


    def _fit_rst_rst(  # type: ignore
        self,
        ref_elev: NDArrayf,
        tba_elev: NDArrayf,
        inlier_mask: NDArrayb,
        transform: rio.transform.Affine,
        crs: rio.crs.CRS,
        z_name: str,
        bias_vars: dict[str, NDArrayf] = None,
        weights: None | NDArrayf = None,
        verbose: bool = False,
        **kwargs,
    ) -> None:

        # If already passed by user, pass along
        if bias_vars is not None and self._meta["terrain_attribute"] in bias_vars:
            attr = bias_vars[self._meta["terrain_attribute"]]

        # If only declared during instantiation
        else:
            # Derive terrain attribute
            if self._meta["terrain_attribute"] == "elevation":
                attr = ref_elev
            else:
                attr = xdem.terrain.get_terrain_attribute(
                    dem=ref_elev,
                    attribute=self._meta["terrain_attribute"],
                    resolution=(transform[0], abs(transform[4])),
                )

        # Run the parent function
        self._fit_biascorr(
            ref_elev=ref_elev,
            tba_elev=tba_elev,
            inlier_mask=inlier_mask,
            bias_vars={self._meta["terrain_attribute"]: attr},
            transform=transform,
            crs=crs,
            z_name=z_name,
            weights=weights,
            verbose=verbose,
            **kwargs,
        )

    def _fit_rst_pts(  # type: ignore
        self,
        ref_elev: NDArrayf | gpd.GeoDataFrame,
        tba_elev: NDArrayf | gpd.GeoDataFrame,
        inlier_mask: NDArrayb,
        transform: rio.transform.Affine,
        crs: rio.crs.CRS,
        z_name: str,
        bias_vars: dict[str, NDArrayf] = None,
        weights: None | NDArrayf = None,
        verbose: bool = False,
        **kwargs,
    ) -> None:

        # If already passed by user, pass along
        if bias_vars is not None and self._meta["terrain_attribute"] in bias_vars:
            attr = bias_vars[self._meta["terrain_attribute"]]

        # If only declared during instantiation
        else:
            # Figure out which data is raster format to get gridded attributes
            rast_elev = ref_elev if not isinstance(ref_elev, gpd.GeoDataFrame) else tba_elev

            # Derive terrain attribute
            if self._meta["terrain_attribute"] == "elevation":
                attr = rast_elev
            else:
                attr = xdem.terrain.get_terrain_attribute(
                    dem=rast_elev,
                    attribute=self._meta["terrain_attribute"],
                    resolution=(transform[0], abs(transform[4])),
                )

        # Run the parent function
        super()._fit_rst_pts(
            ref_elev=ref_elev,
            tba_elev=tba_elev,
            inlier_mask=inlier_mask,
            bias_vars={self._meta["terrain_attribute"]: attr},
            transform=transform,
            crs=crs,
            z_name=z_name,
            weights=weights,
            verbose=verbose,
            **kwargs,
        )

    def _apply_rst(
        self,
        elev: NDArrayf,
        transform: rio.transform.Affine,
        crs: rio.crs.CRS,
        bias_vars: None | dict[str, NDArrayf] = None,
        **kwargs: Any,
    ) -> tuple[NDArrayf, rio.transform.Affine]:

        if bias_vars is None:
            # Derive terrain attribute
            if self._meta["terrain_attribute"] == "elevation":
                attr = elev
            else:
                attr = xdem.terrain.get_terrain_attribute(
                    dem=elev, attribute=self._meta["terrain_attribute"], resolution=(transform[0], abs(transform[4]))
                )
            bias_vars = {self._meta["terrain_attribute"]: attr}

        return super()._apply_rst(elev=elev, transform=transform, crs=crs, bias_vars=bias_vars, **kwargs)




[docs]
class Deramp(BiasCorr):
    """
    Correct for a 2D polynomial along X/Y coordinates, for example from residual camera model deformations.
    """


[docs]
    def __init__(
        self,
        poly_order: int = 2,
        fit_or_bin: Literal["bin_and_fit"] | Literal["fit"] | Literal["bin"] = "fit",
        fit_func: Callable[..., NDArrayf] = polynomial_2d,
        fit_optimizer: Callable[..., tuple[NDArrayf, Any]] = scipy.optimize.curve_fit,
        bin_sizes: int | dict[str, int | Iterable[float]] = 10,
        bin_statistic: Callable[[NDArrayf], np.floating[Any]] = np.nanmedian,
        bin_apply_method: Literal["linear"] | Literal["per_bin"] = "linear",
        subsample: float | int = 5e5,
    ):
        """
        Instantiate a directional bias correction.

        :param poly_order: Order of the 2D polynomial to fit.
        :param fit_or_bin: Whether to fit or bin. Use "fit" to correct by optimizing a function or
            "bin" to correct with a statistic of central tendency in defined bins.
        :param fit_func: Function to fit to the bias with variables later passed in .fit().
        :param fit_optimizer: Optimizer to minimize the function.
        :param bin_sizes: Size (if integer) or edges (if iterable) for binning variables later passed in .fit().
        :param bin_statistic: Statistic of central tendency (e.g., mean) to apply during the binning.
        :param bin_apply_method: Method to correct with the binned statistics, either "linear" to interpolate linearly
            between bins, or "per_bin" to apply the statistic for each bin.
        :param subsample: Subsample the input for speed-up. <1 is parsed as a fraction. >1 is a pixel count.
        """
        super().__init__(
            fit_or_bin,
            fit_func,
            fit_optimizer,
            bin_sizes,
            bin_statistic,
            bin_apply_method,
            ["xx", "yy"],
            subsample,
        )
        self._meta["poly_order"] = poly_order
        self._needs_vars = False


    def _fit_rst_rst(  # type: ignore
        self,
        ref_elev: NDArrayf,
        tba_elev: NDArrayf,
        inlier_mask: NDArrayb,
        transform: rio.transform.Affine,
        crs: rio.crs.CRS,
        z_name: str,
        bias_vars: dict[str, NDArrayf] | None = None,
        weights: None | NDArrayf = None,
        verbose: bool = False,
        **kwargs,
    ) -> None:

        # The number of parameters in the first guess defines the polynomial order when calling np.polyval2d
        p0 = np.ones(shape=((self._meta["poly_order"] + 1) ** 2))

        # Coordinates (we don't need the actual ones, just array coordinates)
        xx, yy = np.meshgrid(np.arange(0, ref_elev.shape[1]), np.arange(0, ref_elev.shape[0]))

        self._fit_biascorr(
            ref_elev=ref_elev,
            tba_elev=tba_elev,
            inlier_mask=inlier_mask,
            bias_vars={"xx": xx, "yy": yy},
            transform=transform,
            crs=crs,
            z_name=z_name,
            weights=weights,
            verbose=verbose,
            p0=p0,
            **kwargs,
        )

    def _fit_rst_pts(  # type: ignore
        self,
        ref_elev: NDArrayf | gpd.GeoDataFrame,
        tba_elev: NDArrayf | gpd.GeoDataFrame,
        inlier_mask: NDArrayb,
        transform: rio.transform.Affine,
        crs: rio.crs.CRS,
        z_name: str,
        bias_vars: dict[str, NDArrayf] | None = None,
        weights: None | NDArrayf = None,
        verbose: bool = False,
        **kwargs,
    ) -> None:

        # Figure out which data is raster format to get gridded attributes
        rast_elev = ref_elev if not isinstance(ref_elev, gpd.GeoDataFrame) else tba_elev

        # The number of parameters in the first guess defines the polynomial order when calling np.polyval2d
        p0 = np.ones(shape=((self._meta["poly_order"] + 1) ** 2))

        # Coordinates (we don't need the actual ones, just array coordinates)
        xx, yy = np.meshgrid(np.arange(0, rast_elev.shape[1]), np.arange(0, rast_elev.shape[0]))

        super()._fit_rst_pts(
            ref_elev=ref_elev,
            tba_elev=tba_elev,
            inlier_mask=inlier_mask,
            bias_vars={"xx": xx, "yy": yy},
            transform=transform,
            crs=crs,
            z_name=z_name,
            weights=weights,
            verbose=verbose,
            p0=p0,
            **kwargs,
        )

    def _apply_rst(
        self,
        elev: NDArrayf,
        transform: rio.transform.Affine,
        crs: rio.crs.CRS,
        bias_vars: None | dict[str, NDArrayf] = None,
        **kwargs: Any,
    ) -> tuple[NDArrayf, rio.transform.Affine]:

        # Define the coordinates for applying the correction
        xx, yy = np.meshgrid(np.arange(0, elev.shape[1]), np.arange(0, elev.shape[0]))

        return super()._apply_rst(elev=elev, transform=transform, crs=crs, bias_vars={"xx": xx, "yy": yy}, **kwargs)