Source code for toy_crypto.vigenere

from collections import UserDict
from random import sample
from typing import Any, Optional, TypeAlias
from itertools import combinations
from toy_crypto.utils import hamming_distance

Letter: TypeAlias = str
"""Intended to indicate a str of length 1"""


[docs] class Alphabet: """An alphabet. This does not check if the alphabet is sensible. In particular, you may get very peculiar results if the alphabet contains duplicate elements. Instances of this class are conventionally immutable. """ CAPS_ONLY = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" """'A' through 'Z' in order.""" # Printable 7 bit ASCI with space but excluding backslash. Shuffled. PRINTABLE = r"""JDi-Km9247oBEctS%Isxz{<;=W^fL,[Y3Mgd6HV(kR8:_CF"*')>|#~Xay!]N+1vnqTl/}j$A.@0b ZGe`UPhp?Ow&ru5Q""" """ Printable 7-bit ASCII in a fixed scrambled order. It does not include the backslash character, and the scrambled order is hardcoded. """ DEFAULT = CAPS_ONLY """CAPS_ONLY is the default.""" def __init__( self, alphabet: Optional[str] = None, prebaked: Optional[str] = None, ): """This does not check if the alphabet is sensible. In particular, you may get very peculiar results if the alphabet contains duplicate elements. """ match (alphabet, prebaked): case (None, None) | (None, "default"): abc = self.DEFAULT case (None, "caps"): abc = self.CAPS_ONLY case (None, "printable"): abc = self.PRINTABLE case (None, _): raise ValueError("Unknown pre-baked alphabet") case (_, None): if not isinstance(alphabet, str): raise TypeError("alphabet must be a string") abc = alphabet case (_, _): raise ValueError( "Can't use both explicit and pre-baked alphabet" ) self._alphabet = abc self._modulus = len(self._alphabet) # Set up char to index table self._abc2idx: dict[Letter, int] = { c: i for i, c in enumerate(self._alphabet) } @property def alphabet(self) -> str: """The underlying alphabet.""" return self._alphabet @property def modulus(self) -> int: """The modulus.""" return self._modulus @property def abc2idx(self) -> dict[Letter, int]: """Dictionary of letter to position in the alphabet.""" return self._abc2idx # We will want to use 'in' for Alphabet instances def __contains__(self, item: Any) -> bool: """ Allows the 'in' and 'not in' operators. So if `abc` is an Alphabet, ``'Z' in abc`` is well defined. """ return item in self.alphabet def __getitem__(self, index: slice | int) -> str: """Allows retrieving bits of the Alphabet through [index] notation.""" return self.alphabet[index] # annoyingly, the type str is also used for single character strings # add, inverse, subtract all deal with single characters
[docs] def add(self, a: Letter, b: Letter) -> Letter: """Returns the modular sum of two characters.""" if a not in self or b not in self: raise ValueError("argument not an element") idx = (self.abc2idx[a] + self.abc2idx[b]) % self.modulus return self.alphabet[idx]
[docs] def inverse(self, c: Letter) -> Letter: """Returns the additive inverse of character c""" if c not in self: raise ValueError("argument not an element") idx = (self.modulus - self.abc2idx[c]) % self.modulus return self.alphabet[idx]
[docs] def subtract(self, a: Letter, b: Letter) -> Letter: """Returns the character corresponding to a - b.""" return self.add(a, self.inverse(b))
[docs] class Cipher: """A Vigenère Cipher is a key and an alphabet.""" def __init__(self, key: str, alphabet: Alphabet | str | None = None): if isinstance(alphabet, Alphabet): abc = alphabet else: abc = Alphabet(alphabet) self._alphabet = abc if not key: raise ValueError("key must not be empty") if any([k not in self._alphabet for k in key]): raise ValueError( "key must be comprised of characters in the alphabet" ) self._key: str = key self._key_length = len(self._key) @property def alphabet(self) -> Alphabet: """The Alphabet for this cipher.""" return self._alphabet @property def key(self) -> str: """Shhh! This is the key. Keep it secret.""" return self._key @property def modulus(self) -> int: """The modulus.""" return self._alphabet.modulus
[docs] def crypt(self, text: str, mode: str) -> str: """{en,de}crypts text depending on mode""" match mode: case "encrypt": operation = self.alphabet.add case "decrypt": operation = self.alphabet.subtract case _: raise ValueError("mode must be 'encrypt' or 'decrypt") # TODO: Generalize this for streaming input and output output: list[Letter] = [] """ I would love to use zip and cycle, but I need to handle input characters that are not in the alphabet. """ key_idx = 0 for c in text: if c not in self.alphabet: result = c else: k = self.key[key_idx] result = operation(c, k) key_idx = (key_idx + 1) % self._key_length output.append(result) return "".join(output)
[docs] def encrypt(self, plaintext: str) -> str: """Returns ciphertext.""" return self.crypt(plaintext, mode="encrypt")
[docs] def decrypt(self, ciphertext: str) -> str: """Returns plaintext.""" return self.crypt(ciphertext, mode="decrypt")
BitSimilarity = float """A float in [-4.0, 4.0] the bit similarity per byte. -4 indicates that all bits are different. +4 indicates that all bits are the same. """ class SimilarityScores(UserDict[int, list[BitSimilarity]]): """A dictionary of keysize : list[BitSimilarity].""" def __init__(self) -> None: self.data: dict[int, list[BitSimilarity]] = {} self._best: Optional[int] = None @staticmethod def is_keysize(k: object) -> bool: if not isinstance(k, int): return False return k >= 0 @staticmethod def is_bit_similarity(x: object) -> bool: if not isinstance(x, float | int): return False return x >= -4.0 and x <= 4.0 def is_valid(self) -> bool: if not all(SimilarityScores.is_keysize(k) for k in self.data.keys()): return False for scores in self.data.values(): if not all(SimilarityScores.is_bit_similarity(s) for s in scores): return False return True def mean(self, k: int) -> float: """The average score for keysize k. :raises KeyError: if k triggers a KeyError. """ scores = self.data[k] return sum(scores) / len(scores) @property def best(self) -> int: """Keysize with the best (highest average) score.""" if self._best is not None: return self._best best_so_far: tuple[int, float] = (0, -4.0) for k in self.data.keys(): mean = self.mean(k) if mean > best_so_far[1]: best_so_far = (k, mean) self._best = best_so_far[0] return self._best
[docs] def probable_keysize( ciphertext: bytes | str, min_size: int = 3, max_size: int = 40, trial_pairs: int = 1, ) -> SimilarityScores: """Assesses likelihood for key length of ciphertext. :param ciphertext: The ciphertext. :param min_size: The minimum key length to try. :param max_size: The maximum key length to try. :param trial_pairs: The number of pairs of blocks to test. :return: Returns list sorted by scores of (keysize, score) Scores are scaled 0 (least likely) to 1 (most likely), but they do not directly represent probabilities. """ scores = SimilarityScores() if min_size == max_size: # Should this be a ValueError? scores[min_size] = [0] return scores if min_size > max_size: raise ValueError("min_size can't be larger than max_size") if trial_pairs < 1: raise ValueError("trial_pairs must be positive") if isinstance(ciphertext, str): ciphertext = bytes(ciphertext, encoding="utf8") ctext_len = len(ciphertext) for keysize in range(min_size, max_size): if 2 * keysize > ctext_len: continue num_blocks = ctext_len // keysize all_pairs = list(combinations(range(num_blocks), 2)) # trial_pairs may have to be reduced to trial_pairs = min([trial_pairs, len(all_pairs)]) pairs = sample(all_pairs, trial_pairs) def get_block(idx: int) -> bytes: idx *= keysize return ciphertext[idx : idx + keysize] s_scores: list[float] = [] for i, j in pairs: distance = hamming_distance(get_block(i), get_block(j)) similarity_per_byte = 4.0 - (distance / keysize) s_scores.append(similarity_per_byte) scores[keysize] = s_scores return scores