Laboratorul 06 - AES Byte at a Time ECB Decryption [CS Open CourseWare]

This is an old revision of the document!

Inainte de laborator: Video Laborator 6

Laboratorul 06 - AES Byte at a Time ECB Decryption

Prezentarea PowerPoint pentru acest laborator poate fi găsită aici.

Puteți lucra acest laborator folosind și platforma Google Colab, accesând acest link.

Click pentru a vedea utils.py

import base64
from typing import Generator
 
 
def _pad(data: str, size: int) -> str:
    reminder = len(data) % size
    if reminder != 0:
        data = "0" * (size - reminder) + data
    return data
 
 
def _chunks(data: str, chunk_size: int) -> Generator[str, None, None]:
    data = _pad(data, chunk_size)
    for i in range(0, len(data), chunk_size):
        yield data[i : i + chunk_size]
 
 
def _hex(data: int) -> str:
    return format(data, "02x")
 
 
# Conversion functions
 
 
def byte_2_bin(bval: int) -> str:
    """Converts a byte value to a binary string.
 
    Args:
        bval (int):
            The byte value to be converted. It should be an integer between
            0 and 255.
 
    Returns:
        str: The binary string representation of the byte value, where each bit
        is encoded as a character. The result has a fixed length of 8 characters
        and is padded with leading zeros if necessary.
 
    Examples:
        >>> byte_2_bin(72)
        '01001000'
        >>> byte_2_bin(66)
        '01000010'
    """
    return bin(bval)[2:].zfill(8)
 
 
def hex_2_bin(data: str) -> str:
    """Converts a hexadecimal string to a binary representation.
 
    Args:
        data (str): The hexadecimal string to be converted. It should have an
            even number of characters and only contain valid hexadecimal digits
            (0-9, A-F, a-f).
 
    Returns:
        str: The binary representation of the hexadecimal string, where each
            pair of hexadecimal digits is encoded as an 8-bit binary number.
 
    Examples:
        >>> hex_2_bin("01abcd")
        '000000011010101111001101'
        >>> hex_2_bin("0a")
        '00001010'
    """
    return "".join(f"{int(x, 16):08b}" for x in _chunks(data, 2))
 
 
def bin_2_hex(data: str) -> str:
    """Converts a binary string to a hexadecimal representation.
 
    Args:
        data (str): The binary string to be converted. It should have a multiple
            of 8 characters and only contain valid binary digits (0 or 1).
 
    Returns:
        str: The hexadecimal representation of the binary string, where each
            group of 8 binary digits is encoded as a pair of hexadecimal digits.
 
    Examples:
        >>> bin_2_hex("000000011010101111001101")
        '01abcd'
        >>> bin_2_hex("00001010")
        '0a'
    """
    return "".join(f"{int(b, 2):02x}" for b in _chunks(data, 8))
 
 
def str_2_bin(data: str) -> str:
    """Converts a string to a binary representation.
 
    Args:
        data (str): The string to be converted.
 
    Returns:
        str: The binary representation of the string, where each character is
            encoded as an 8-bit binary number.
 
    Examples:
        >>> str_2_bin("Hello")
        '0100100001100101011011000110110001101111'
        >>> str_2_bin("IC")
        '0100100101000011'
    """
    return "".join(f"{ord(c):08b}" for c in data)
 
 
def bin_2_str(data: str) -> str:
    """Converts a binary string to a string.
 
    Args:
        data (str): The binary string to be converted. It should have a multiple
            of 8 characters and only contain valid binary digits (0 or 1).
 
    Returns:
        str: The string representation of the binary string, where each group
            of 8 binary digits is decoded as a character.
 
    Examples:
        >>> bin_2_str("0100100001100101011011000110110001101111")
        'Hello'
        >>> bin_2_str("0100100101000011")
        'IC'
    """
    return "".join(chr(int(b, 2)) for b in _chunks(data, 8))
 
 
def str_2_hex(data: str) -> str:
    """Converts a string to a hexadecimal representation.
 
    Args:
        data (str): The string to be converted.
 
    Returns:
        str: The hexadecimal representation of the string, where each character
            is encoded as a pair of hexadecimal digits.
 
    Examples:
        >>> str_2_hex("Hello")
        '48656c6c6f'
        >>> str_2_hex("IC")
        '4943'
    """
    return "".join(f"{ord(c):02x}" for c in data)
 
 
def hex_2_str(data: str) -> str:
    """Converts a hexadecimal string to a string.
 
    Args:
        data (str): The hexadecimal string to be converted. It should have an
            even number of characters and only contain valid hexadecimal digits
            (0-9, A-F, a-f).
 
    Returns:
        str: The string representation of the hexadecimal string, where each
            pair of hexadecimal digits is decoded as a character.
 
    Examples:
        >>> hex_2_str("48656c6c6f")
        'Hello'
        >>> hex_2_str("4943")
        'IC'
    """
    return "".join(chr(int(x, 16)) for x in _chunks(data, 2))
 
 
# XOR functions
 
 
def strxor(operand_1: str, operand_2: str) -> str:
    """Performs a bitwise exclusive OR (XOR) operation on two strings.
 
    Args:
        operand_1 (str): The first string to be XORed.
        operand_2 (str): The second string to be XORed.
 
    Returns:
        str: The result of the XOR operation on the two strings, where each
            character is encoded as an 8-bit binary number. The result has
            the same length as the shorter input string.
 
    Examples:
        >>> strxor("Hello", "IC")
        '\\x01&'
        >>> strxor("secret", "key")
        '\\x18\\x00\\x1a'
    """
    return "".join(chr(ord(x) ^ ord(y)) for (x, y) in zip(operand_1, operand_2))
 
 
def bitxor(operand_1: str, operand_2: str) -> str:
    """Performs a bitwise exclusive OR (XOR) operation on two bit-strings.
 
    Args:
        operand_1 (str): The first bit-string to be XORed. It should only
            contain valid binary digits (0 or 1).
        operand_2 (str): The second bit-string to be XORed. It should only
            contain valid binary digits (0 or 1).
 
    Returns:
        str: The result of the XOR operation on the two bit-strings, where each
            bit is encoded as a character. The result has the same length as
            the shorter input bit-string.
 
    Examples:
        >>> bitxor("01001000", "01000010")
        '00001010'
        >>> bitxor("10101010", "00110011")
        '10011001'
    """
    return "".join(str(int(x) ^ int(y)) for (x, y) in zip(operand_1, operand_2))
 
 
def hexxor(operand_1: str, operand_2: str) -> str:
    """Performs a bitwise exclusive OR (XOR) operation on two hexadecimal
    strings.
 
    Args:
        operand_1 (str): The first hexadecimal string to be XORed. It should
            have an even number of characters and only contain valid hexadecimal
            digits (0-9, A-F, a-f).
        operand_2 (str): The second hexadecimal string to be XORed. It should
            have an even number of characters and only contain valid
            digits (0-9, A-F, a-f).
 
    Returns:
        str: The result of the XOR operation on the two hexadecimal strings,
            where each pair of hexadecimal digits is encoded as a pair of
            hexadecimal digits. The result has the same length as the shorter
            input hexadecimal string.
 
    Examples:
        >>> hexxor("48656c6c6f", "42696e67")
        '0a0c020b'
        >>> hexxor("736563726574", "6b6579")
        '18001a'
    """
    return "".join(
        _hex(int(x, 16) ^ int(y, 16))
        for (x, y) in zip(_chunks(operand_1, 2), _chunks(operand_2, 2))
    )
 
 
# Python3 'bytes' functions
 
 
def bytes_to_string(bytes_data: bytearray | bytes) -> str:
    """Converts a byte array or a byte string to a string.
 
    Args:
        bytes_data (bytearray | bytes): The byte array or the byte string to be
            converted. It should be encoded in Latin-1 format.
 
    Returns:
        str: The string representation of the byte array or the byte string,
            decoded using Latin-1 encoding.
 
    Examples:
        >>> bytes_to_string(b'Hello')
        'Hello'
        >>> bytes_to_string(bytearray(b'IC'))
        'IC'
    """
    return bytes_data.decode(encoding="raw_unicode_escape")
 
 
def string_to_bytes(string_data: str) -> bytes:
    """Converts a string to a byte string.
 
    Args:
        string_data (str): The string to be converted.
 
    Returns:
        bytes: The byte string representation of the string, encoded using
        Latin-1 encoding.
 
    Examples:
        >>> string_to_bytes('Hello')
        b'Hello'
        >>> string_to_bytes('IC')
        b'IC'
    """
    return string_data.encode(encoding="raw_unicode_escape")
 
 
# Base64 functions
 
 
def b64encode(data: str) -> str:
    """Encodes a string to base64.
 
    Parameters:
        data (str): The string to be encoded.
 
    Returns:
        str: The base64 encoded string, using Latin-1 encoding.
 
    Examples:
        >>> b64encode("Hello")
        'SGVsbG8='
        >>> b64encode("IC")
        'SUM='
    """
    return bytes_to_string(base64.b64encode(string_to_bytes(data)))
 
 
def b64decode(data: str) -> str:
    """Decodes a base64 encoded string.
 
    Args:
        data (str): The base64 encoded string to be decoded. It should only
            contain valid base64 characters (A-Z, a-z, 0-9, +, /, =).
 
    Returns:
        str: The decoded string, using Latin-1 encoding.
 
    Examples:
        >>> b64decode("SGVsbG8=")
        'Hello'
        >>> b64decode("SUM=")
        'IC'
    """
    return bytes_to_string(base64.b64decode(string_to_bytes(data)))

AES ECB

Cel mai simplu mod de criptare este ECB (Electronic Codebook). Mesajul este împărțit în blocuri, fiecare bloc fiind criptat separat.

Ținând cont că fiecare bloc din mesaj este criptat individual cu cheia k, blocuri identice din mesaj vor rezulta în blocuri criptate identic. Putem astfel, în cazul în care folosim modul ECB, să ne așteptăm la multe porțiuni criptate repetate.

Această vulnerabilitate apare când:

Trimitem un INPUT către server
Serverul concatenează un mesaj secret INPUT → INPUT||secret
Serverul criptează mesajul trimis folosind propria cheie → AES-128-ECB(random-prefix || attacker-controlled || target-bytes, random-key)
Serverul trimite rezultatul criptat înapoi către noi.

PREFIX poate fi un header de pachet sau orice altă informație “inutilă”.

Pentru următoarele exerciții vom folosi scheletul de laborator.

Exercițiul 1 - Determinarea dimensiunii blocului (3p)

Primul pas necesar, într-un atac asupra unei criptări pe blocuri, este determinarea dimensiunii unui bloc. Deși cunoaștem deja dimensiunea în acest caz, este un pas necesar în alte situații.

Pentru a afla dimensiunea, este suficient să trimitem, pe rând, mesaje din ce în ce mai mari (“A”, “AA”, “AAA”, …).

Funcția find_block_size() mărește numărul de caractere adăugate, mărind lungimea mesajului.

Cum putem asocia dimensiunea mesajului cu numărul de blocuri criptate?

Exercițiul 2 - Determinarea dimensiunii prefixului (3p)

Vom aborda această problemă la fel ca în pasul anterior. Trimitem mesaje din ce în ce mai mari. Atunci când blocul criptat corespunzător prefixului nu se va mai schimba, putem calcula lungimea sa.

RRTT TT
RRXT TTT
RRXX TTTT
RRXX XTTT T  *detected that first block did not change*
RRXT TTT

În exemplul anterior am notat cu R caracterele prefixului, X = caracterele mesajului input și T mesajul target (secretul pe care dorim să îl aflăm).

Știm că avem lungimea (prefix + pad - 1) % block_size = 0

Ce se întâmplă dacă lungimea prefixului este mai mare decât lungimea unui singur bloc?

Exercițiul 3 - ECB Byte at a Time Attack (4p)

Presupunem că avem un algoritm de criptare-bloc care criptează 16 bytes, producând ciphertext de 16 bytes. Folosim acest algoritm pentru a cripta 2 blocuri de date necunoscute, m1 și m2. În plus, avem voie să trimitem propriul input m0, care va fi lipit în fața acestor blocuri. Pentru că putem trimite orice mesaj, vom alege să trimitem unul de lungime 16. Astfel, în cazul în care se folosește modul ECB, putem afla Enc(m0). Având acces la perechi de input propriu - ciphertext poate fi foarte util în acest caz, având de fapt un oracol. În cazul în care am trimite doar 15 bytes, putem afla ultimul byte prin brute force.

 Block 1          Block 2  Block 3
|RRXXXXXXXXXXXXX?|?......?|?......?|
 |----known----||--m1---|

Încercând toate cele 256 variante posibile pentru Block 1, putem asocia encripția corectă pentru un byte, folosind oracolul. Presupunem că am găsit byte-ul “w”. Repetăm același proces pentru următorul byte.

 Block 1          Block 2  Block 3
|RRXXXXXXXXXXXXw?|?......?|?......?|
 |----known----|

Repetând același proces, vom afla, rând pe rând, fiecare byte din secretul m1, găsind mesajul “we attack at daw”. Din păcate, în acest punct nu putem ajunge la mesajul m2. Dacă am alege m0 de lungime 0, obținem:

 M1               M2
|we attack at daw|?......?|
 |----known-----|

Totuși ne putem folosi de m1 pentru a continua atacul. Dacă alegem m0 de lungime 15 bytes, vom avea următoarea situatie:

 Block 1          Block 2          Block 3
|RRXXXXXXXXXXXXXw|e attack at daw?|?......?|
 |------------known-------------|

Se poate observa că putem să repetăm acum fix același proces pentru a afla byte-ul necunoscut din Block 2. Cât timp avem un oracol care să valideze atacul brute force, putem repeta procesul pentru a afla oricâți bytes din secret.

Lab Code

lab06.py

from math import ceil
import base64
import os
from random import randint
from Crypto.Cipher import AES
from utils import *
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.backends import default_backend
backend = default_backend()
 
 
def split_bytes_in_blocks(x, block_size):
    nb_blocks = ceil(len(x)/block_size)
    return [x[block_size*i:block_size*(i+1)] for i in range(nb_blocks)]
 
 
def pkcs7_padding(message, block_size):
    padding_length = block_size - (len(message) % block_size)
    if padding_length == 0:
        padding_length = block_size
    padding = bytes([padding_length]) * padding_length
    return message + padding
 
 
def pkcs7_strip(data):
    padding_length = data[-1]
    return data[:- padding_length]
 
 
def encrypt_aes_128_ecb(msg, key):
    padded_msg = pkcs7_padding(msg, block_size=16)
    cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=backend)
    encryptor = cipher.encryptor()
    return encryptor.update(padded_msg) + encryptor.finalize()
 
 
def decrypt_aes_128_ecb(ctxt, key):
    cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=backend)
    decryptor = cipher.decryptor()
    decrypted_data = decryptor.update(ctxt) + decryptor.finalize()
    message = pkcs7_strip(decrypted_data)
    return message
 
# You are not suppose to see this
class Oracle:
    def __init__(self):
        self.key = 'Mambo NumberFive'.encode()
        self.prefix = 'PREF'.encode()
        self.target = base64.b64decode(  # You are suppose to break this
            "RG8gbm90IGxheSB1cCBmb3IgeW91cnNlbHZlcyB0cmVhc3VyZXMgb24gZWFydGgsIHdoZXJlIG1vdGggYW5kIHJ1c3QgZGVzdHJveSBhbmQgd2hlcmUgdGhpZXZlcyBicmVhayBpbiBhbmQgc3RlYWwsCmJ1dCBsYXkgdXAgZm9yIHlvdXJzZWx2ZXMgdHJlYXN1cmVzIGluIGhlYXZlbiwgd2hlcmUgbmVpdGhlciBtb3RoIG5vciBydXN0IGRlc3Ryb3lzIGFuZCB3aGVyZSB0aGlldmVzIGRvIG5vdCBicmVhayBpbiBhbmQgc3RlYWwuCkZvciB3aGVyZSB5b3VyIHRyZWFzdXJlIGlzLCB0aGVyZSB5b3VyIGhlYXJ0IHdpbGwgYmUgYWxzby4="
        )
 
    def encrypt(self, message):
        return encrypt_aes_128_ecb(
            self.prefix + message + self.target,
            self.key
        )
 
# Task 1
def findBlockSize():
    initialLength = len(Oracle().encrypt(b''))
    i = 0
    while 1:  # Feed identical bytes of your-string to the function 1 at a time until you get the block length
        # You will also need to determine here the size of fixed prefix + target + pad
        # And the minimum size of the plaintext to make a new block
        length = len(Oracle().encrypt(b'X'*i))
        i += 1
 
        return block_size, sizeOfPrefixTargetPadding, minimumSizeToAlighPlaintext
 
# Task 2
def findPrefixSize(block_size):
    previous_blocks = None
    # Find the situation where prefix_size + padding_size - 1 = block_size
    # Use split_bytes_in_blocks to get blocks of size(block_size)
 
    return prefix_size
 
 
# Task 3
def recoverOneByteAtATime(block_size, prefix_size, target_size):
    known_target_bytes = b""
    for _ in range(target_size):
        # prefix_size + padding_length + known_len + 1 = 0 mod block_size
        known_len = len(known_target_bytes)
 
        padding_length = (- known_len - 1 - prefix_size) % block_size
        padding = b"X" * padding_length
 
        # target block plaintext contains only known characters except its last character
        # Don't forget to use split_bytes_in_blocks to get the correct block
 
        # trying every possibility for the last character
 
    print(known_target_bytes.decode())
 
 
# Find block size, prefix size, and length of plaintext size to allign blocks
block_size, sizeOfPrefixTargetPadding, minimumSizeToAlignPlaintext = findBlockSize()
print("Block size:\t\t\t\t" + str(block_size))
print("Size of prefix, target, and padding:\t" + str(sizeOfPrefixTargetPadding))
print("Pad needed to align:\t\t\t" + str(minimumSizeToAlignPlaintext))
 
# Find size of the prefix
prefix_size = findPrefixSize(block_size)
print("\nPrefix Size:\t" + str(prefix_size))
 
# Size of the target
target_size = sizeOfPrefixTargetPadding - \
    minimumSizeToAlignPlaintext - prefix_size
 
print("\nTarget:")
recoverOneByteAtATime(block_size, prefix_size, target_size)

ic/labs/06.1696192280.txt.gz · Last modified: 2023/10/01 23:31 by razvan.smadu

Old revisions

Media Manager Back to top