Source code for toy_crypto.birthday

import math
from typing import Literal, assert_never

try:
    from warnings import deprecated  # novermin # ty: ignore[unresolved-import]
except ImportError:
    from typing_extensions import deprecated  # novermin

from . import types
from .utils import export, find_zero

__all__: list[str] = []  # will be appended to with each definition

MAX_QBIRTHDAY_P = 1.0 - (10**-8)
"""Maximum probability that Q can handle."""


EXACT_THRESHOLD = 1000
"""With auto mode, the threshold for using exact or approximate modes."""

__all__.append("MAX_QBIRTHDAY_P")
__all__.append("EXACT_THRESHOLD")

type Mode = Literal["exact", "approximate", "auto"]
__all__.append("Mode")


def _pbirthday_exact(n: int, classes: int, coincident: int) -> types.Prob:
    # use notation  from Diconis and Mosteller 1969
    c = classes
    k = coincident

    if k < 2:
        return types.Prob(1.0)
    if k > 2:
        return _pbirthday_approx(n, c, coincident=k)

    if n >= c:
        return types.Prob(1.0)

    v_dn = math.perm(c, n)
    v_t = c**n

    p = 1.0 - v_dn / v_t
    if not types.is_prob(p):
        assert False, f"This should not happen: p = {p}"
    return p


def _pbirthday_approx(n: int, classes: int, coincident: int) -> types.Prob:
    # DM1969 notation
    c = classes
    k = coincident

    if n >= c * (k - 1):
        return types.Prob(1.0)

    if k < 2:
        return types.Prob(1.0)

    # lifted from R src/library/stats/R/birthday.R
    LHS = n * math.exp(-n / (c * k)) / (1 - n / (c * (k + 1))) ** (1 / k)
    lxx = k * math.log(LHS) - (k - 1) * math.log(c) - math.lgamma(k + 1)
    p = -math.expm1(-math.exp(lxx))
    if not types.is_prob(p):
        assert False, f"this should not happen: p = {p}"
    return p  # ty: ignore


[docs] @export def probability( n: int, classes: int = 365, coincident: int = 2, mode: Mode = "auto" ) -> types.Prob: """probability of at least 1 collision among n individuals for c classes". The "exact" method still involves floating point approximations and may be very slow for large n. :raises ValueError: if any of ``n``, ``classes``, or ``coincident`` are less than 1. """ if n < 1: raise ValueError("n must be a positive integer") if classes < 1: raise ValueError("classes must be a positive integer") if coincident < 1: raise ValueError("coincident must be a positive integer") # Name parameters to follow # Use DM69 notation c = classes k = coincident if k == 1: return types.Prob(1.0) if mode == "auto": mode = "exact" if c < EXACT_THRESHOLD else "approximate" match mode: case "exact": return _pbirthday_exact(n, c, coincident=k) case "approximate": return _pbirthday_approx(n, c, coincident=k) case _: assert_never(mode)
[docs] @deprecated("Use 'probability' instead") def P( n: int, classes: int = 365, coincident: int = 2, mode: Mode = "auto" ) -> types.Prob: """ .. deprecated:: 0.5 Renamed. Use :func:`probability`. """ return probability(n, classes, coincident, mode)
__all__.append('P') # fmt: skip
[docs] @export def quantile( prob: float = 0.5, classes: int = 365, coincident: int = 2 ) -> int: """Quantile: minimum number n to get prob for classes. :raises ValueError: if ``prob`` is less than 0 or greater than 1. :raises ValueError: if ``classes`` is less than 1. :raises ValueError: if ``coincident`` is less than 1. """ if not types.is_prob(prob): raise ValueError(f"{prob} is not a valid probability") if not types.is_positive_int(classes): raise ValueError("classes must be positive") if not types.is_positive_int(coincident): raise ValueError("coincident must be positive") # Use DM69 notation so I can better connect code to published method. c = classes k = coincident if prob > MAX_QBIRTHDAY_P: return c * (k - 1) + 1 # Lifted from R src/library/stats/R/birthday.R if prob == types.Prob(0): return 1 # First approximation # broken down into three terms to help me better understand # t_1 = c^{k-1} # t_2 = k! # t_3 = 1/log(1-p) # n \approx. \lceil t_1 t_2 t_3 \rceil # # All terms are computed as logarithms term1 = (k - 1) * math.log(c) # log c^{k-1} term2 = math.lgamma(k + 1) # log k! term3 = math.log(-math.log1p(-prob)) # log 1/log(1-p) log_n = (term1 + term2 + term3) / k # adding log x_i is log prod x_i n = math.exp(log_n) n = math.ceil(n) if n < k: return k # n is now close to what it should be, # but we may need to increase it or decrease it # until n is smallest n such that P(n, c, k) >= p step = max(1, n // 100_000) n = find_zero( function=lambda m: probability(m, c, coincident=k) - prob, initial_estimate=n, initial_step=step, lower_bound=k, upper_bound=(k - 1) * (c + 1), ) return n
[docs] @deprecated("Use 'quantile' instead") def Q(prob: float = 0.5, classes: int = 365, coincident: int = 2) -> int: """ .. deprecated:: 0.5 Renamed. Use :func:`quantile`. """ return quantile(prob, classes, coincident)
__all__.append("Q")