This is an old revision of the document!
Inainte de laborator: Video Laborator 6
The powerpoint presentation for this lab can be found here.
import base64 # CONVERSION FUNCTIONS def _chunks(string, chunk_size): for i in range(0, len(string), chunk_size): yield string[i:i+chunk_size] def byte_2_bin(bval): """ Transform a byte (8-bit) value into a bitstring """ return bin(bval)[2:].zfill(8) def _hex(x): return format(x, '02x') def hex_2_bin(data): return ''.join(f'{int(x, 16):08b}' for x in _chunks(data, 2)) def str_2_bin(data): return ''.join(f'{ord(c):08b}' for c in data) def bin_2_hex(data): return ''.join(f'{int(b, 2):02x}' for b in _chunks(data, 8)) def str_2_hex(data): return ''.join(f'{ord(c):02x}' for c in data) def bin_2_str(data): return ''.join(chr(int(b, 2)) for b in _chunks(data, 8)) def hex_2_str(data): return ''.join(chr(int(x, 16)) for x in _chunks(data, 2)) # XOR FUNCTIONS def strxor(a, b): # xor two strings, trims the longer input return ''.join(chr(ord(x) ^ ord(y)) for (x, y) in zip(a, b)) def bitxor(a, b): # xor two bit-strings, trims the longer input return ''.join(str(int(x) ^ int(y)) for (x, y) in zip(a, b)) def hexxor(a, b): # xor two hex-strings, trims the longer input return ''.join(_hex(int(x, 16) ^ int(y, 16)) for (x, y) in zip(_chunks(a, 2), _chunks(b, 2))) # BASE64 FUNCTIONS def b64decode(data): return bytes_to_string(base64.b64decode(string_to_bytes(data))) def b64encode(data): return bytes_to_string(base64.b64encode(string_to_bytes(data))) # PYTHON3 'BYTES' FUNCTIONS def bytes_to_string(bytes_data): return bytes_data.decode() # default utf-8 def string_to_bytes(string_data): return string_data.encode() # default utf-8
The simplest of the encryption modes is the Electronic Codebook (ECB) mode (named after conventional physical codebooks). The message is divided into blocks, and each block is encrypted separately.
Since each block of plaintext is encrypted with the key independently, identical blocks of plaintext will yield identical blocks of ciphertext. Lots of people know that when you encrypt something in ECB mode, you can see penguins through it.
The vulnerability happens when:
For the next exercises, we will use the following code stub.
The first step in attacking a block-based cipher is to determine the size of the block. Feed identical bytes of your-string to the function 1 at a time - start with 1 byte (“A”), then “AA”, then “AAA” and so on. Discover the block size of the cipher. You know it, but do this step anyway.
How does the message length relates to the number of cypher blocks?
We give some chosen plaintext of increasing length to the oracle. When we detect a block that does not change with the addition of one more byte of chosen plaintext, this means this block only contains prefix and chosen plaintext. Eg:
RRTT TT RRXT TTT RRXX TTTT RRXX XTTT T *detected that first block did not change* RRXT TTT
Using R to denote the random prefix, X for the input we would give to the oracle (hereafter called the chosen plaintext) and T for target.
Now we know the pad length required to align the target to blocks.
Suppose we have a block cipher that takes a 16 byte plaintext and produces a 16 byte ciphertext. We use this block cipher to encrypt two blocks worth of unknown data, call them m1 and m2. Additionally we are allowed to prepend some data to these two blocks, let's call it m0 (we control this data). Note that in this scheme nothing prevents us from choosing an m0 that is 16 bytes long. This means we effectively have an encryption oracle for a full block, since the first block returned in this case would be Enc(m0) if ECB mode is being used. This means we can get the encryption of arbitrary blocks of data, which will come in handy. We can set m0 equal to 15 known bytes, and if we have an encryption oracle we can brute force the last byte:
Block 1 Block 2 Block 3 |RRXXXXXXXXXXXXX?|?......?|?......?| |----known----||--m1---|
We just have to send all 256 possible guesses for Block 1 to the encryption oracle and see which one matches the output. Let's say we get a match on the byte encoding “w”. We then repeat the process with a one byte shorter m0 to get the next byte in the same fashion:
Block 1 Block 2 Block 3 |RRXXXXXXXXXXXXw?|?......?|?......?| |----known----|
We can repeat this process for each byte until we have the whole first block m1, which let's say is “we attack at daw”. Unfortunately at this point we can't reduce m0 by any more bytes since m0 would be 0 bytes and we would simply get:
M1 M2 |we attack at daw|?......?| |----known-----|
But we since we now know all of m1 we can use the sort of attack we used to recover the first byte of m1 to recover the first byte of m2. Suppose we again choose m0 to be of length 15 bytes:
Block 1 Block 2 Block 3 |RRXXXXXXXXXXXXXw|e attack at daw?|?......?| |------------known-------------|
There's only one unknown byte in Block 2 so all we have to do is again submit all 256 guesses to the encryption oracle, except this time for Block 2 instead of Block 1! This process can be repeated to decrypt an arbitrary amount of ciphertext that is ECB encrypted as long as we can prepend data to the plaintext and have access to an encryption oracle.
from math import ceil import base64 import os from random import randint from Crypto.Cipher import AES from utils import * from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes from cryptography.hazmat.backends import default_backend backend = default_backend() def split_bytes_in_blocks(x, block_size): nb_blocks = ceil(len(x)/block_size) return [x[block_size*i:block_size*(i+1)] for i in range(nb_blocks)] def pkcs7_padding(message, block_size): padding_length = block_size - (len(message) % block_size) if padding_length == 0: padding_length = block_size padding = bytes([padding_length]) * padding_length return message + padding def pkcs7_strip(data): padding_length = data[-1] return data[:- padding_length] def encrypt_aes_128_ecb(msg, key): padded_msg = pkcs7_padding(msg, block_size=16) cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=backend) encryptor = cipher.encryptor() return encryptor.update(padded_msg) + encryptor.finalize() def decrypt_aes_128_ecb(ctxt, key): cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=backend) decryptor = cipher.decryptor() decrypted_data = decryptor.update(ctxt) + decryptor.finalize() message = pkcs7_strip(decrypted_data) return message # You are not suppose to see this class Oracle: def __init__(self): self.key = 'Mambo NumberFive'.encode() self.prefix = 'PREF'.encode() self.target = base64.b64decode( # You are suppose to break this "RG8gbm90IGxheSB1cCBmb3IgeW91cnNlbHZlcyB0cmVhc3VyZXMgb24gZWFydGgsIHdoZXJlIG1vdGggYW5kIHJ1c3QgZGVzdHJveSBhbmQgd2hlcmUgdGhpZXZlcyBicmVhayBpbiBhbmQgc3RlYWwsCmJ1dCBsYXkgdXAgZm9yIHlvdXJzZWx2ZXMgdHJlYXN1cmVzIGluIGhlYXZlbiwgd2hlcmUgbmVpdGhlciBtb3RoIG5vciBydXN0IGRlc3Ryb3lzIGFuZCB3aGVyZSB0aGlldmVzIGRvIG5vdCBicmVhayBpbiBhbmQgc3RlYWwuCkZvciB3aGVyZSB5b3VyIHRyZWFzdXJlIGlzLCB0aGVyZSB5b3VyIGhlYXJ0IHdpbGwgYmUgYWxzby4=" ) def encrypt(self, message): return encrypt_aes_128_ecb( self.prefix + message + self.target, self.key ) # Task 1 def findBlockSize(): initialLength = len(Oracle().encrypt(b'')) i = 0 while 1: # Feed identical bytes of your-string to the function 1 at a time until you get the block length # You will also need to determine here the size of fixed prefix + target + pad # And the minimum size of the plaintext to make a new block length = len(Oracle().encrypt(b'X'*i)) i += 1 return block_size, sizeOfTheFixedPrefixPlusTarget, minimumSizeToAlighPlaintext # Task 2 def findPrefixSize(block_size): previous_blocks = None # Find the situation where prefix_size + padding_size - 1 = block_size # Use split_bytes_in_blocks to get blocks of size(block_size) return prefix_size # Task 3 def recoverOneByteAtATime(block_size, prefix_size, target_size): known_target_bytes = b"" for _ in range(target_size): # prefix_size + padding_length + known_len + 1 = 0 mod block_size known_len = len(know_target_bytes) padding_length = (- known_len - 1 - prefix_size) % block_size padding = b"X" * padding_length # target block plaintext contains only known characters except its last character # Don't forget to use split_bytes_in_blocks to get the correct block # trying every possibility for the last character print(known_target_bytes.decode()) # Find block size, prefix size, and length of plaintext size to allign blocks block_size, sizeOfTheFixedPrefixPlusTarget, minimumSizeToAlignPlaintext = findBlockSize() print("Block size:\t\t\t" + str(block_size)) print("Size of prefix and target:\t" + str(sizeOfTheFixedPrefixPlusTarget)) print("Pad needed to align:\t\t" + str(minimumSizeToAlignPlaintext)) # Find size of the prefix prefix_size = findPrefixSize(block_size) print("\nPrefix Size:\t" + str(prefix_size)) # Size of the target target_size = sizeOfTheFixedPrefixPlusTarget - \ minimumSizeToAlignPlaintext - prefix_size print("\nTarget:") recoverOneByteAtATime(block_size, prefix_size, target_size)