Laboratorul 06 - AES Byte at a Time ECB Decryption [CS Open CourseWare]

This is an old revision of the document!

Inainte de laborator: Video Laborator 6

Laboratorul 06 - AES Byte at a Time ECB Decryption

Prezentarea PowerPoint pentru acest laborator poate fi găsită aici.

Puteți lucra acest laborator folosind și platforma Google Colab, accesând acest link.

Click pentru a vedea utils.py

import base64
from typing import Generator
 
 
def _pad(data: str, size: int) -> str:
    reminder = len(data) % size
    if reminder != 0:
        data = "0" * (size - reminder) + data
    return data
 
 
def _chunks(data: str, chunk_size: int) -> Generator[str, None, None]:
    data = _pad(data, chunk_size)
    for i in range(0, len(data), chunk_size):
        yield data[i : i + chunk_size]
 
 
def _hex(data: int) -> str:
    return format(data, "02x")
 
 
# Conversion functions
 
 
def byte_2_bin(bval: int) -> str:
    """Converts a byte value to a binary string.
 
    Args:
        bval (int):
            The byte value to be converted. It should be an integer between
            0 and 255.
 
    Returns:
        str: The binary string representation of the byte value, where each bit
        is encoded as a character. The result has a fixed length of 8 characters
        and is padded with leading zeros if necessary.
 
    Examples:
        >>> byte_2_bin(72)
        '01001000'
        >>> byte_2_bin(66)
        '01000010'
    """
    return bin(bval)[2:].zfill(8)
 
 
def hex_2_bin(data: str) -> str:
    """Converts a hexadecimal string to a binary representation.
 
    Args:
        data (str): The hexadecimal string to be converted. It should have an
            even number of characters and only contain valid hexadecimal digits
            (0-9, A-F, a-f).
 
    Returns:
        str: The binary representation of the hexadecimal string, where each
            pair of hexadecimal digits is encoded as an 8-bit binary number.
 
    Examples:
        >>> hex_2_bin("01abcd")
        '000000011010101111001101'
        >>> hex_2_bin("0a")
        '00001010'
    """
    return "".join(f"{int(x, 16):08b}" for x in _chunks(data, 2))
 
 
def bin_2_hex(data: str) -> str:
    """Converts a binary string to a hexadecimal representation.
 
    Args:
        data (str): The binary string to be converted. It should have a multiple
            of 8 characters and only contain valid binary digits (0 or 1).
 
    Returns:
        str: The hexadecimal representation of the binary string, where each
            group of 8 binary digits is encoded as a pair of hexadecimal digits.
 
    Examples:
        >>> bin_2_hex("000000011010101111001101")
        '01abcd'
        >>> bin_2_hex("00001010")
        '0a'
    """
    return "".join(f"{int(b, 2):02x}" for b in _chunks(data, 8))
 
 
def str_2_bin(data: str) -> str:
    """Converts a string to a binary representation.
 
    Args:
        data (str): The string to be converted.
 
    Returns:
        str: The binary representation of the string, where each character is
            encoded as an 8-bit binary number.
 
    Examples:
        >>> str_2_bin("Hello")
        '0100100001100101011011000110110001101111'
        >>> str_2_bin("IC")
        '0100100101000011'
    """
    return "".join(f"{ord(c):08b}" for c in data)
 
 
def bin_2_str(data: str) -> str:
    """Converts a binary string to a string.
 
    Args:
        data (str): The binary string to be converted. It should have a multiple
            of 8 characters and only contain valid binary digits (0 or 1).
 
    Returns:
        str: The string representation of the binary string, where each group
            of 8 binary digits is decoded as a character.
 
    Examples:
        >>> bin_2_str("0100100001100101011011000110110001101111")
        'Hello'
        >>> bin_2_str("0100100101000011")
        'IC'
    """
    return "".join(chr(int(b, 2)) for b in _chunks(data, 8))
 
 
def str_2_hex(data: str) -> str:
    """Converts a string to a hexadecimal representation.
 
    Args:
        data (str): The string to be converted.
 
    Returns:
        str: The hexadecimal representation of the string, where each character
            is encoded as a pair of hexadecimal digits.
 
    Examples:
        >>> str_2_hex("Hello")
        '48656c6c6f'
        >>> str_2_hex("IC")
        '4943'
    """
    return "".join(f"{ord(c):02x}" for c in data)
 
 
def hex_2_str(data: str) -> str:
    """Converts a hexadecimal string to a string.
 
    Args:
        data (str): The hexadecimal string to be converted. It should have an
            even number of characters and only contain valid hexadecimal digits
            (0-9, A-F, a-f).
 
    Returns:
        str: The string representation of the hexadecimal string, where each
            pair of hexadecimal digits is decoded as a character.
 
    Examples:
        >>> hex_2_str("48656c6c6f")
        'Hello'
        >>> hex_2_str("4943")
        'IC'
    """
    return "".join(chr(int(x, 16)) for x in _chunks(data, 2))
 
 
# XOR functions
 
 
def strxor(operand_1: str, operand_2: str) -> str:
    """Performs a bitwise exclusive OR (XOR) operation on two strings.
 
    Args:
        operand_1 (str): The first string to be XORed.
        operand_2 (str): The second string to be XORed.
 
    Returns:
        str: The result of the XOR operation on the two strings, where each
            character is encoded as an 8-bit binary number. The result has
            the same length as the shorter input string.
 
    Examples:
        >>> strxor("Hello", "IC")
        '\\x01&'
        >>> strxor("secret", "key")
        '\\x18\\x00\\x1a'
    """
    return "".join(chr(ord(x) ^ ord(y)) for (x, y) in zip(operand_1, operand_2))
 
 
def bitxor(operand_1: str, operand_2: str) -> str:
    """Performs a bitwise exclusive OR (XOR) operation on two bit-strings.
 
    Args:
        operand_1 (str): The first bit-string to be XORed. It should only
            contain valid binary digits (0 or 1).
        operand_2 (str): The second bit-string to be XORed. It should only
            contain valid binary digits (0 or 1).
 
    Returns:
        str: The result of the XOR operation on the two bit-strings, where each
            bit is encoded as a character. The result has the same length as
            the shorter input bit-string.
 
    Examples:
        >>> bitxor("01001000", "01000010")
        '00001010'
        >>> bitxor("10101010", "00110011")
        '10011001'
    """
    return "".join(str(int(x) ^ int(y)) for (x, y) in zip(operand_1, operand_2))
 
 
def hexxor(operand_1: str, operand_2: str) -> str:
    """Performs a bitwise exclusive OR (XOR) operation on two hexadecimal
    strings.
 
    Args:
        operand_1 (str): The first hexadecimal string to be XORed. It should
            have an even number of characters and only contain valid hexadecimal
            digits (0-9, A-F, a-f).
        operand_2 (str): The second hexadecimal string to be XORed. It should
            have an even number of characters and only contain valid
            digits (0-9, A-F, a-f).
 
    Returns:
        str: The result of the XOR operation on the two hexadecimal strings,
            where each pair of hexadecimal digits is encoded as a pair of
            hexadecimal digits. The result has the same length as the shorter
            input hexadecimal string.
 
    Examples:
        >>> hexxor("48656c6c6f", "42696e67")
        '0a0c020b'
        >>> hexxor("736563726574", "6b6579")
        '18001a'
    """
    return "".join(
        _hex(int(x, 16) ^ int(y, 16))
        for (x, y) in zip(_chunks(operand_1, 2), _chunks(operand_2, 2))
    )
 
 
# Python3 'bytes' functions
 
 
def bytes_to_string(bytes_data: bytearray | bytes) -> str:
    """Converts a byte array or a byte string to a string.
 
    Args:
        bytes_data (bytearray | bytes): The byte array or the byte string to be
            converted. It should be encoded in Latin-1 format.
 
    Returns:
        str: The string representation of the byte array or the byte string,
            decoded using Latin-1 encoding.
 
    Examples:
        >>> bytes_to_string(b'Hello')
        'Hello'
        >>> bytes_to_string(bytearray(b'IC'))
        'IC'
    """
    return bytes_data.decode(encoding="raw_unicode_escape")
 
 
def string_to_bytes(string_data: str) -> bytes:
    """Converts a string to a byte string.
 
    Args:
        string_data (str): The string to be converted.
 
    Returns:
        bytes: The byte string representation of the string, encoded using
        Latin-1 encoding.
 
    Examples:
        >>> string_to_bytes('Hello')
        b'Hello'
        >>> string_to_bytes('IC')
        b'IC'
    """
    return string_data.encode(encoding="raw_unicode_escape")
 
 
# Base64 functions
 
 
def b64encode(data: str) -> str:
    """Encodes a string to base64.
 
    Parameters:
        data (str): The string to be encoded.
 
    Returns:
        str: The base64 encoded string, using Latin-1 encoding.
 
    Examples:
        >>> b64encode("Hello")
        'SGVsbG8='
        >>> b64encode("IC")
        'SUM='
    """
    return bytes_to_string(base64.b64encode(string_to_bytes(data)))
 
 
def b64decode(data: str) -> str:
    """Decodes a base64 encoded string.
 
    Args:
        data (str): The base64 encoded string to be decoded. It should only
            contain valid base64 characters (A-Z, a-z, 0-9, +, /, =).
 
    Returns:
        str: The decoded string, using Latin-1 encoding.
 
    Examples:
        >>> b64decode("SGVsbG8=")
        'Hello'
        >>> b64decode("SUM=")
        'IC'
    """
    return bytes_to_string(base64.b64decode(string_to_bytes(data)))

AES ECB

Cel mai simplu mod de criptare este ECB (Electronic Codebook). Mesajul este împărțit în blocuri, fiecare bloc fiind criptat separat.

Ținând cont că fiecare bloc din mesaj este criptat individual cu cheia k, blocuri identice din mesaj vor rezulta în blocuri criptate identic. Putem astfel, în cazul în care folosim modul ECB, să ne așteptăm la multe porțiuni criptate repetate.

Această vulnerabilitate apare când:

Trimitem un INPUT către server
Serverul concatenează un mesaj secret INPUT → INPUT||secret
Serverul criptează mesajul trimis folosind propria cheie → AES-128-ECB(random-prefix || attacker-controlled || target-bytes, random-key)
Serverul trimite rezultatul criptat înapoi către noi.

PREFIX poate fi un header de pachet sau orice altă informație “inutilă”.

Pentru următoarele exerciții vom folosi scheletul de laborator.

Exercițiul 1 - Determinarea dimensiunii blocului (3p)

Primul pas necesar, într-un atac asupra unei criptări pe blocuri, este determinarea dimensiunii unui bloc. Deși cunoaștem deja dimensiunea în acest caz, este un pas necesar în alte situații.

Pentru a afla dimensiunea, este suficient să trimitem, pe rând, mesaje din ce în ce mai mari (“A”, “AA”, “AAA”, …).

Funcția find_block_size() mărește numărul de caractere adăugate, mărind lungimea mesajului.

Cum putem asocia dimensiunea mesajului cu numărul de blocuri criptate?

Exercițiul 2 - Determinarea dimensiunii prefixului (3p)

Vom aborda această problemă la fel ca în pasul anterior. Trimitem mesaje din ce în ce mai mari. Atunci când blocul criptat corespunzător prefixului nu se va mai schimba, putem calcula lungimea sa.

RRTT TT
RRXT TTT
RRXX TTTT
RRXX XTTT T  *detected that first block did not change*
RRXT TTT

În exemplul anterior am notat cu R caracterele prefixului, X = caracterele mesajului input și T mesajul target (secretul pe care dorim să îl aflăm).

Știm că avem lungimea (prefix + pad - 1) % block_size = 0

Ce se întâmplă dacă lungimea prefixului este mai mare decât lungimea unui singur bloc?

Exercițiul 3 - ECB Byte at a Time Attack (4p)

Presupunem că avem un algoritm de criptare-bloc care criptează 16 bytes, producând ciphertext de 16 bytes. Folosim acest algoritm pentru a cripta 2 blocuri de date necunoscute, m1 și m2. În plus, avem voie să trimitem propriul input m0, care va fi lipit în fața acestor blocuri. Pentru că putem trimite orice mesaj, vom alege să trimitem unul de lungime 16. Astfel, în cazul în care se folosește modul ECB, putem afla Enc(m0). Având acces la perechi de input propriu - ciphertext poate fi foarte util în acest caz, având de fapt un oracol. În cazul în care am trimite doar 15 bytes, putem afla ultimul byte prin brute force.

 Block 1          Block 2  Block 3
|RRXXXXXXXXXXXXX?|?......?|?......?|
 |----known----||--m1---|

Încercând toate cele 256 variante posibile pentru Block 1, putem asocia encripția corectă pentru un byte, folosind oracolul. Presupunem că am găsit byte-ul “w”. Repetăm același proces pentru următorul byte.

 Block 1          Block 2  Block 3
|RRXXXXXXXXXXXXw?|?......?|?......?|
 |----known----|

Repetând același proces, vom afla, rând pe rând, fiecare byte din secretul m1, găsind mesajul “we attack at daw”. Din păcate, în acest punct nu putem ajunge la mesajul m2. Dacă am alege m0 de lungime 0, obținem:

 M1               M2
|we attack at daw|?......?|
 |----known-----|

Totuși ne putem folosi de m1 pentru a continua atacul. Dacă alegem m0 de lungime 15 bytes, vom avea următoarea situatie:

 Block 1          Block 2          Block 3
|RRXXXXXXXXXXXXXw|e attack at daw?|?......?|
 |------------known-------------|

Se poate observa că putem să repetăm acum fix același proces pentru a afla byte-ul necunoscut din Block 2. Cât timp avem un oracol care să valideze atacul brute force, putem repeta procesul pentru a afla oricâți bytes din secret.

Lab Code

Click pentru a vedea lab06.py

lab06.py

import base64
from math import ceil
from typing import List, Tuple
 
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from utils import *
 
backend = default_backend()
 
 
def split_bytes_in_blocks(x: bytes, block_size: int) -> List[bytes]:
    """Splits a byte string into a list of blocks of equal size.
 
    Args:
        x (bytes): The byte string to split.
        block_size (int): The size of each block in bytes.
 
    Returns:
        List[bytes]: A list of byte strings, each of length block_size,
            except for the last one which may be shorter.
    """
    nb_blocks = ceil(len(x) / block_size)
    return [x[block_size * i : block_size * (i + 1)] for i in range(nb_blocks)]
 
 
def pkcs7_padding(message: bytes, block_size: int) -> bytes:
    """Applies PKCS#7 padding to a byte string.
 
    Args:
        message (bytes): The byte string to pad.
        block_size (int): The size of the block in bytes.
 
    Returns:
        bytes: A byte string that is a multiple of block_size in length,
            with padding bytes added at the end. The value of each padding
            byte is equal to the number of padding bytes added.
    """
    padding_length = block_size - (len(message) % block_size)
    if padding_length == 0:
        padding_length = block_size
    padding = bytes([padding_length]) * padding_length
    return message + padding
 
 
def pkcs7_strip(data: bytes) -> bytes:
    """Removes PKCS#7 padding from a byte string.
 
    Args:
        data (bytes): The byte string to strip.
 
    Returns:
        bytes: A byte string with the padding bytes removed from the end.
    """
    padding_length = data[-1]
    return data[:-padding_length]
 
 
def encrypt_aes_128_ecb(plaintext: bytes, key: bytes) -> bytes:
    """Encrypts a byte string using AES-128 in ECB mode.
 
    Args:
        plaintext (bytes): The byte string to encrypt. It will be padded
            using PKCS#7.
        key (bytes): The encryption key. It must be 16 bytes in length.
 
    Returns:
        bytes: A byte string that is the encrypted version of plaintext.
    """
    padded_msg = pkcs7_padding(plaintext, block_size=16)
    cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=backend)
    encryptor = cipher.encryptor()
    return encryptor.update(padded_msg) + encryptor.finalize()
 
 
def decrypt_aes_128_ecb(ciphertext: bytes, key: bytes) -> bytes:
    """Decrypts a byte string using AES-128 in ECB mode.
 
    Args:
        ciphertext (bytes): The byte string to decrypt. It must be a multiple of
            16 bytes in length.
        key (bytes): The decryption key. It must be 16 bytes in length.
 
    Returns:
        bytes: A byte string that is the decrypted version of ciphertext. The
            PKCS#7 padding will be removed.
    """
    cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=backend)
    decryptor = cipher.decryptor()
    decrypted_data = decryptor.update(ciphertext) + decryptor.finalize()
    message = pkcs7_strip(decrypted_data)
    return message
 
 
class Oracle:
    """A class that simulates an encryption oracle using AES-128 in ECB mode.
    You are not suppose to see this"""
 
    def __init__(self) -> None:
        self.key = "Mambo NumberFive".encode()
        self.prefix = "PREF".encode()
 
        # You are suppose to break this
        self.target = base64.b64decode(
            "RG8gbm90IGxheSB1cCBmb3IgeW91cnNlbHZlcyB0cmVhc3VyZXMgb24gZWFydGgsI"
            "HdoZXJlIG1vdGggYW5kIHJ1c3QgZGVzdHJveSBhbmQgd2hlcmUgdGhpZXZlcyBicm"
            "VhayBpbiBhbmQgc3RlYWwsCmJ1dCBsYXkgdXAgZm9yIHlvdXJzZWx2ZXMgdHJlYXN"
            "1cmVzIGluIGhlYXZlbiwgd2hlcmUgbmVpdGhlciBtb3RoIG5vciBydXN0IGRlc3Ry"
            "b3lzIGFuZCB3aGVyZSB0aGlldmVzIGRvIG5vdCBicmVhayBpbiBhbmQgc3RlYWwuC"
            "kZvciB3aGVyZSB5b3VyIHRyZWFzdXJlIGlzLCB0aGVyZSB5b3VyIGhlYXJ0IHdpbG"
            "wgYmUgYWxzby4="
        )
 
    def encrypt(self, message: bytes) -> bytes:
        return encrypt_aes_128_ecb(
            self.prefix + message + self.target,
            self.key,
        )
 
 
# Task 1
def find_block_size() -> Tuple[int, int, int]:
    initial_length = len(Oracle().encrypt(b""))
    i = 0
 
    block_size = 0
    size_of_prefix_target_padding = 0
    minimum_size_to_align_plaintext = 0
 
    while 1:
        # Feed identical bytes of your-string to the function 1 at a time
        # until you get the block length. You will also need to determine
        # here the size of fixed prefix + target + pad, and the minimum
        # size of the plaintext to make a new block
        length = len(Oracle().encrypt(b"X" * i))
        i += 1
 
        # TODO 1: find block_size, size_of_prefix_target_padding,
        # and minimum_size_to_align_plaintext
        break
 
    return (
        block_size,
        size_of_prefix_target_padding,
        minimum_size_to_align_plaintext,
    )
 
 
# Task 2
def find_prefix_size(block_size: int) -> int:
    initial_blocks = split_bytes_in_blocks(Oracle().encrypt(b""), block_size)
 
    # TODO 2: Find when prefix_size + padding_size - 1 = block_size
    # Use split_bytes_in_blocks to get blocks of size block_size.
 
    # TODO 2.1: Find the block containing the prefix by comparing
    # initial_blocks and modified_blocks
    # You may find enumerate() and zip() useful.
    modified_blocks = split_bytes_in_blocks(Oracle().encrypt(b"X"), block_size)
    prefix_block_index = 0
 
    # TODO 2.2: As now we know in which block to look, find when that block
    # does not change anymore when adding more X's. The complementary will
    # represent the prefix.
    prefix_size_in_block = 0
 
    prefix_size = prefix_block_index * block_size + prefix_size_in_block
    return prefix_size
 
 
# Task 3
def recover_one_byte_at_a_time(
    block_size: int,
    prefix_size: int,
    target_size: int,
) -> str:
    known_target_bytes = b""
 
    for _ in range(target_size):
        # prefix_size + padding_length + known_len + 1 = 0 mod block_size
        known_len = len(known_target_bytes)
 
        padding_length = (-known_len - 1 - prefix_size) % block_size
        padding = b"X" * padding_length
 
        # TODO 3.1: Determine the target block index which contains only known
        # characters except its last character.
 
        # TODO 3.2: Get the target block form split_bytes_in_blocks at the index
        # previously determined.
 
        # TODO 3.3: Try every possibility for the last character and search for
        # the block that you already know. That character will be added to
        # the known target bytes.
 
    return known_target_bytes.decode()
 
 
def main() -> None:
    # Find block size, prefix size, and length of plaintext size to align blocks
    (
        block_size,
        size_of_prefix_target_padding,
        minimum_size_to_align_plaintext,
    ) = find_block_size()
 
    print(f"Block size:\t\t\t\t{block_size}")
    print(
        "Size of prefix, target, and padding:"
        f"\t{size_of_prefix_target_padding}"
    )
    print(f"Pad needed to align:\t\t\t{minimum_size_to_align_plaintext}")
 
    # Find size of the prefix
    prefix_size = find_prefix_size(block_size)
    print(f"\nPrefix Size:\t{prefix_size}")
 
    # Size of the target
    target_size = (
        size_of_prefix_target_padding
        - minimum_size_to_align_plaintext
        - prefix_size
    )
 
    # Recover the target
    recovered_target = recover_one_byte_at_a_time(
        block_size,
        prefix_size,
        target_size,
    )
    print(f"\nTarget: {recovered_target}")
 
 
if __name__ == "__main__":
    main()

ic/labs/06.1696192359.txt.gz · Last modified: 2023/10/01 23:32 by razvan.smadu

Old revisions

Media Manager Back to top