Source code for fast_poibin.poibin

from typing import Literal, Union

import numpy as np
import numpy.typing as npt

from fast_poibin.pmf import FloatSequence, calc_pmf, calc_pmf_dp


def _validate_probabilities(probabilities: FloatSequence) -> None:
    if isinstance(probabilities, np.ndarray):
        assert probabilities.ndim == 1
        assert np.all(probabilities <= 1)
        assert np.all(probabilities >= 0)
    else:
        assert all(0 <= p <= 1 for p in probabilities)


[docs]class PoiBin: """Class for storing PMF and CDF of Poisson binomial distribution. Attributes: pmf: array of probability mass function cdf: array of cumulative distribution function Examples: >>> poibin = PoiBin([0.1, 0.2, 0.2]) >>> poibin.pmf array([0.576, 0.352, 0.068, 0.004]) >>> poibin.cdf array([0.576, 0.928, 0.996, 1. ]) Note: This class stores data as arrays of `numpy.float64` no matter which type of float is passed at initialization. This is partly because `numpy.fft` deals with only `float64`, and partly because it's easier to implement. Please see also https://numpy.org/doc/1.24/reference/routines.fft.html#type-promotion. Notes: The internal algorithm for `mixed` mode is based on the well-known divide-and-conquer approach for convolving many polynomials. Its complexities are as follows. - Time: O(N(logN)^2) - Space: O(N) In the context of Poisson binomial distribution, that seems to be equivalent to the one proposed in the following paper. Biscarri, William & Zhao, Sihai & Brunner, Robert. (2018). A simple and fast method for computing the Poisson binomial distribution function. Computational Statistics & Data Analysis. 122. 10.1016/j.csda.2018.01.007. A downside of this approach is numerical precision. Since convolution by FFT is unreliable in the world under the float epsilon (i.e. 2^-52 ~ 2 * 10^-16), the relative errors of the computed PMF could be large though it is still not bad w.r.t. the absolute errors. For details, please see the following issue: https://github.com/privet-kitty/fast-poibin/issues/9. For situations where the relative precision is required, you may want to use `dp` mode instead, whose time complexity is O(N^2). You should also note that neither mode can express a probability mass below the least positive double float (~ 5 * 10^-324). """ def __init__( self, probabilities: FloatSequence, mode: Union[Literal["mixed"], Literal["dp"]] = "mixed" ) -> None: """ Args: probabilities: Success probabilities for each binomial trial. """ _validate_probabilities(probabilities) if mode == "mixed": self.pmf = calc_pmf(probabilities) elif mode == "dp": self.pmf = calc_pmf_dp(np.array(probabilities, np.float64)) else: raise ValueError(f"Invalid mode: {mode}") self.cdf: npt.NDArray[np.float64] = np.minimum(np.cumsum(self.pmf), 1.0) self.cdf[-1] = 1.0
[docs] def quantile(self, p: npt.ArrayLike): # type: ignore """Quantile function (inverse of `cdf`). Args: p: probability value(s) Returns: int or array of ints """ return np.searchsorted(self.cdf, p)