Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision | ||
lfa:2024:lab04 [2024/10/27 21:38] cata_chiru |
lfa:2024:lab04 [2024/10/29 09:27] (current) cata_chiru |
||
---|---|---|---|
Line 46: | Line 46: | ||
<note> | <note> | ||
- | Although Python is dynamically typed, we still encourage you to write the types for parameters and outputs explicitly, as they contribute to documenting the code. | + | Although Python is dynamically typed, we still encourage you to **write the types for parameters and outputs explicitly**, as they **contribute to documenting the code**. |
- | Further, when writing python code for interviews, the employers usually follow this aspect and grade you in consequence. | + | Further, when writing python code for interviews, **the employers usually follow this aspect and grade you in consequence**. |
</note> | </note> | ||
+ | ==== Your Task ==== | ||
+ | Your task is to implement the missing pieces of code from the regex.py file following the comments in the TODOS: | ||
- | ==== Subtitlu ==== | + | <code python> |
+ | # This is an auxiliary method used to generate our strings by length and alphabetically | ||
+ | # E.g: (b | aa)* = [b, aa, bb, aab, bbb, aaaa, ...] | ||
+ | def convert_to_sorted_set(lst: [str]) -> [str]: | ||
+ | '''Converts a list to a sorted set''' | ||
+ | crt_lst = list(set(lst)) | ||
+ | return sorted(crt_lst, key = lambda x: (len(x), x)) | ||
+ | |||
+ | # Global variable used to threshold the number of Star items | ||
+ | INNER_STAR_NO_ITEMS = 3 | ||
+ | |||
+ | class Regex: | ||
+ | '''Base class for Regex ADT''' | ||
+ | |||
+ | def __str__(self) -> str: | ||
+ | '''Returns the string representation of the regular expression''' | ||
+ | pass | ||
+ | |||
+ | def gen(self) -> [str]: | ||
+ | '''Return a representative set of strings that the regular expression can generate''' | ||
+ | pass | ||
+ | |||
+ | def __len__(self) -> str: | ||
+ | '''Returns the length of the regular expression''' | ||
+ | pass | ||
+ | |||
+ | def eval_gen(self): | ||
+ | '''Prints the set of strings that the regular expression can generate''' | ||
+ | pass | ||
+ | |||
+ | # TODO 0: Implementati Clasa Void dupa blueprint-ul de mai sus | ||
+ | class Void(Regex): | ||
+ | '''Represents the empty regular expression''' | ||
+ | |||
+ | # Va returna Void ca string | ||
+ | def __str__(self) -> str: | ||
+ | pass | ||
+ | |||
+ | # Va genera un obiect din Python corespunzatoar clasei void | ||
+ | def gen(self) -> [str]: | ||
+ | pass | ||
+ | |||
+ | def __len__(self): | ||
+ | return 0 | ||
+ | |||
+ | def eval_gen(self): | ||
+ | print("Void generates nothing") | ||
+ | |||
+ | |||
+ | # TODO 1: Implementati Clasa Epsilon-String (Empty) dupa blueprint-ul de mai sus | ||
+ | class Empty(Regex): | ||
+ | '''Represents the empty string regular expression''' | ||
+ | |||
+ | # Va returna Empty ca string | ||
+ | def __str__(self) -> str: | ||
+ | pass | ||
+ | |||
+ | # Va returna o lista corespunzatoare a ce stringuri produce sirul vid | ||
+ | def gen(self) -> [str]: | ||
+ | pass | ||
+ | |||
+ | # Va returna lungimea corespunzatoare sirului vid | ||
+ | def __len__(self) -> int: | ||
+ | pass | ||
+ | |||
+ | def eval_gen(self): | ||
+ | print(f"Empty string has length {len(self)} generates ''.") | ||
+ | |||
+ | # TODO 2: Implementati functionalitatile necesare pentru Clasa Symbol dupa blueprint-ul de mai sus | ||
+ | class Symbol(Regex): | ||
+ | '''Represents a symbol in the regular expression''' | ||
+ | |||
+ | def __init__(self, char: str): | ||
+ | self.char = char | ||
+ | |||
+ | # TODO 2: Completati metodele __str__ si gen pentru clasa Symbol | ||
+ | def __str__(self) -> str: | ||
+ | pass | ||
+ | |||
+ | def gen(self) -> [str]: | ||
+ | pass | ||
+ | |||
+ | def __len__(self) -> int: | ||
+ | return 1 | ||
+ | |||
+ | def eval_gen(self): | ||
+ | self.gen() | ||
+ | |||
+ | # Dorim sa pastram in atributul words al clasei curente stringurile generate cu gen(self) | ||
+ | return f"Symbol {self.char} has length {len(self)} generates {self.words}" | ||
+ | |||
+ | # TODO 3: Implementati functionalitatile necesare pentru Clasa Union dupa blueprint-ul de mai sus | ||
+ | class Union(Regex): | ||
+ | '''Represents the union of two regular expressions''' | ||
+ | def __init__(self, *arg: [Regex]): | ||
+ | self.components = arg | ||
+ | |||
+ | # TODO 3: Completati metodele __str__, gen, __len__ si eval_gen pentru clasa Union | ||
+ | |||
+ | # Hint: Look at the str.join method to create (expr1|expr2|...) | ||
+ | def __str__(self) -> str: | ||
+ | pass | ||
+ | |||
+ | def gen(self) -> [str]: | ||
+ | pass | ||
+ | |||
+ | # We will consider the len of a Union as the max length of its components | ||
+ | def __len__(self) -> int: | ||
+ | pass | ||
+ | |||
+ | # Dupa modelul de la Symbol, vom dori ca urmatoarele eval sa implementeze | ||
+ | # return "Class {varianta toString() a clasei) has length {len(self)} and generates {self.words}" | ||
+ | def eval_gen(self) -> str: | ||
+ | pass | ||
+ | |||
+ | # TODO 4: Implementati functionalitatile necesare pentru Clasa Concat dupa blueprint-ul de mai sus | ||
+ | class Concat(Regex): | ||
+ | '''Represents the concatenation of two regular expressions''' | ||
+ | |||
+ | def __init__(self, *arg : [Regex]): | ||
+ | self.components = arg | ||
+ | |||
+ | # TODO 4: Completati metodele __str__, gen, __len__ si eval_gen pentru clasa Concat | ||
+ | def __str__(self) -> str: | ||
+ | pass | ||
+ | |||
+ | def gen(self) -> [str]: | ||
+ | pass | ||
+ | |||
+ | def __len__(self) -> int: | ||
+ | pass | ||
+ | |||
+ | def eval_gen(self) -> str: | ||
+ | pass | ||
+ | |||
+ | # TODO 5: Implementati functionalitatile necesare pentru Clasa Star dupa blueprint-ul de mai sus | ||
+ | class Star(Regex): | ||
+ | '''Represents the Kleene star (zero or more repetitions) of a regular expression''' | ||
+ | def __init__(self, regex: Regex): | ||
+ | self.regex = regex | ||
+ | |||
+ | # To memorize the base words generated by the regex inside the star, we store them in a list | ||
+ | self.base_words = [""] | ||
+ | self.words = [] | ||
+ | |||
+ | # TODO 5: Completati metodele __str__, gen, __len__, eval_gen pentru clasa Star | ||
+ | def __str__(self) -> str: | ||
+ | pass | ||
+ | |||
+ | def gen(self, no_items = 10) -> [str]: | ||
+ | pass | ||
+ | |||
+ | # To ease your implementation we will consider a big number e.g. 1000000 as Infinity | ||
+ | def __len__(self) -> int: | ||
+ | pass | ||
+ | |||
+ | def eval_gen(self, no_items = 10): | ||
+ | pass | ||
+ | |||
+ | |||
+ | if __name__ == "__main__": | ||
+ | r1 = Symbol('a') | ||
+ | r2 = Symbol('b') | ||
+ | regex_union = Union(r1, r2) | ||
+ | regex_union.eval_gen() | ||
+ | |||
+ | e2 = Union(Symbol('a'),Symbol('b'), Symbol('c')) | ||
+ | e4 = Concat(r1, e2) | ||
+ | e4.eval_gen() | ||
+ | |||
+ | e5 = Concat(e2, e2, r2) | ||
+ | e5.eval_gen() | ||
+ | |||
+ | star_ex = Star(r1) | ||
+ | star_ex.eval_gen() | ||
+ | |||
+ | regex_concat = Concat(regex_union, star_ex) | ||
+ | regex_concat.eval_gen() | ||
+ | |||
+ | last_expr = Concat(Star(Union(Symbol('a'), Symbol('b'))), Symbol('b'), Star(Symbol('c'))) | ||
+ | last_expr.eval_gen() | ||
+ | </code> | ||
+ | |||
+ | The output should be similar to: | ||
+ | <code> | ||
+ | Union (a | b) has length 1 and generates ['a', 'b']. | ||
+ | |||
+ | Concat (a(a | b | c)) has length 2 and generates ['aa', 'ab', 'ac']. | ||
+ | |||
+ | Concat ((a | b | c)(a | b | c)b) has length 3 and generates ['aab', 'abb', 'acb', 'bab', 'bbb', 'bcb', 'cab', 'cbb', 'ccb']. | ||
+ | |||
+ | Star (a*) has length 10000000 generates ['', 'a', 'aa', 'aaa', 'aaaa', 'aaaaa', 'aaaaaa', 'aaaaaaa', 'aaaaaaaa', 'aaaaaaaaa', 'aaaaaaaaaa', '...']. | ||
+ | |||
+ | Concat ((a | b)(a*)) has length 10000001 and generates ['a', 'b', 'aa', 'ba', 'aaa', 'baa', 'aaaa', 'baaa', '...']. | ||
+ | |||
+ | Concat (((a | b)*)b(c*)) has length 20000001 and generates ['b', 'ab', 'bb', 'bc', 'aab', 'abb', 'abc', 'bab', 'bbb', 'bbc', 'bcc', 'aaab', 'aabb', 'aabc', 'abab', 'abbb', 'abbc', 'abcc', 'baab', 'babb', 'babc', 'bbab', 'bbbb', 'bbbc', 'bbcc', 'bccc', '...']. | ||
+ | |||
+ | </code> | ||
+ | |||
+ | ==== Implementation Details ==== | ||
+ | |||
+ | We have to take into account for the precedence rules and implement each possibility accordingly: | ||
+ | |||
+ | For example, what is the outer class of a(a|b|c)? | ||
+ | |||
+ | <hidden Answer> | ||
+ | Concat | ||
+ | </hidden> | ||
+ | |||
+ | How can we write this Regex using our classes? | ||
+ | |||
+ | <hidden Answer> | ||
+ | Concat(Symbol('a'), Union(Symbol('a'), Symbol('b'), Symbol('c')) | ||
+ | </hidden> | ||
- | abc | ||
<note> | <note> | ||
- | Cum arata cod python | + | Note that we have used the following code structures that takes a variable number of parameters as inputs for Concat and Union constructors: |
<code python> | <code python> | ||
- | def main(): | + | def __init__(self, *arg : [Regex]): |
- | ... # your code here | + | self.components = arg |
- | + | ||
- | if __name__ == "__main__": | + | |
- | main() | + | |
</code> | </code> | ||
+ | This design choice was meant to ease your work when representing more complex regexes. The alternative would be to use binary operations and fuse the ones identical together: (a|b|c) = Concat(Symbol('a'), Concat(Symbol('b'), Symbol('c')) = (a|(b|c)) | ||
+ | </note> | ||
+ | |||
+ | What should it generate? | ||
+ | <hidden Answer> | ||
+ | [aa, ab, ac] | ||
+ | </hidden> | ||
+ | |||
+ | Therefore, we should analyse each possibility and think for each class if having other types of regular expressions inside affects the way in which the regex generates further and adapt our code to suit those class-recursions. | ||
+ | |||
+ | We should develop this process from simple examples to more complex ones: Symbol, Concat(Symbol, Symbol), Union(Symbol, Symbol), Concat(Symbol, Union), Concat(Union, Union)... | ||
+ | |||
+ | <note> | ||
+ | If there are design choices that do not suit you in this laboratory, please feel free to adapt your implementation accordingly, as far as you keep the same classes and the main functionalities for ''%%__str__%%'', ''gen'' and ''eval_gen''. | ||
</note> | </note> | ||