import math
from typing import NewType, TypeGuard, Any
import random

import matplotlib.pyplot as plt
import numpy as np

import inflect

def first_collision_sim(d: int = 365) -> int:
    if not (isinstance(d, int) and d > 0):
        raise ValueError(f'd ({d}) must be a positive integer')
    
    days: set[int] = set()
    while True:
        bd = random.randrange(1, d+1)
        if bd not in days:
            days.add(bd)
        else:
            break
    
    return len(days)

for trial in range(10):
    print(f'Trial {trial + 1}: First collision at person {first_collision_sim()}')

Trial 1: First collision at person 21
Trial 2: First collision at person 20
Trial 3: First collision at person 13
Trial 4: First collision at person 9
Trial 5: First collision at person 28
Trial 6: First collision at person 22
Trial 7: First collision at person 32
Trial 8: First collision at person 35
Trial 9: First collision at person 23
Trial 10: First collision at person 22

firsts: list[int] = [first_collision_sim() for _ in range(100000)]

bins = np.arange(min(firsts), max(firsts)+3, 3)
plt.hist(firsts, bins = bins)
plt.ylabel("occurences")
plt.xlabel("first collision")
plt.show()

Prob = NewType('Prob', float)

def is_prob(val: Any) -> TypeGuard[Prob]:
    """true if val is a float, s.t. 0.0 <= va <= 1.0"""
    if not isinstance(val, float):
        return False
    return val >= 0.0 and val <= 1.0

def pbirthday_exact(n: int, d: int = 365) -> Prob: 
        """probability that at least one pair of n "people" will the same birthday from d "days"."""

        if not (isinstance(d, int) and d > 0):
             raise ValueError(f'd ({d}) must be a positive integer')
        if not (isinstance(n, int) and n > 0):
             raise ValueError(f'n ({n}) must be a positive integer')
             
        if n >= d:
            return Prob(1.0)
        
        v_dn = math.perm(d, n)
        v_t = pow(d, n)

        p = 1.0  - float(v_dn / v_t)
        if not is_prob(p):
            raise Exception("this should not happen")
        return p

pbirthday_exact(22)

0.4756953076625501

def pbirthday_approx(n: int, d:int = 365) -> Prob:

    if not (isinstance(d, int) and d > 0):
        raise ValueError(f'd ({d}) must be a positive integer')
    if not (isinstance(n, int) and n > 0):
        raise ValueError(f'n ({n}) must be a positive integer')
    if n >= d:
        return Prob(1.0)

    p = 1.0 - math.exp(-(n * n)/(2*d))
    if not is_prob(p):
        raise Exception("this should not happen")
    return p

pbirthday_approx(22)

0.48470395490313967

def pbirthday(n: int, d:int = 365, mode="auto") -> Prob:
    if mode not in ["auto", "exact", "approximate"]:
        raise ValueError('mode must be one of "auto", "exact", "approximate"')
    if mode == "auto":
        mode = "exact" if n < 1000 else "approximate"

    
    return pbirthday_exact(n,d) if mode == "exact" else pbirthday_approx(n,d)

def qbirthday_approx(p:float = 0.5, d: int = 365) -> int:
    if not is_prob(p) or p == 0.0:
        raise ValueError(f'p ({p}) must be a positive probability')
    n = math.sqrt(2 * d * math.log(1.0/(1.0 - p)))
    return math.ceil(n)

qbirthday_approx()

23

qbirthday = qbirthday_approx

d = 2 ** 128

p = 1e-9

q = qbirthday(p, d)

print(f'{q:.2e}')

8.25e+14

def digit_count(x: float) -> int:
    """returns the nunmber of digits in the integer part of x"""

    x = abs(x)
    result = math.floor(math.log10(x) + 1)
    return int(result)

# I'm surprised python doesn't have something like R's signif(). 
def round_to_nsf(number, nsf=6) -> int | float:
    """Rounds number to nsf sigificant figures"""

    return round(number, nsf - digit_count(number))

inflector = inflect.engine()

print(inflector.number_to_words(round_to_nsf(q, nsf=3)))

eight hundred and twenty-five trillion

q / 10_000_000_000

82496.350816977

p = pbirthday(n =1_000_000_000, d=2**128)

0.0

Some birthday paradox examples¶

A simulation¶

Computing probabilities¶

Now we test with some bigger numbers¶