This is an old revision of the document!
L'AST est un arbre représentant le format source. Il est indépendant de la forme de la grammaire. L'AST peut être une transformation de le parse tree ou peut être généré directement par l'analyseur.
Dans l'AST, chaque règle de langue a un noeud.
Pour ce laboratoire, vous pouvez définir le format de nœud AST comme vous le souhaitez. Un point de départ est le suivant:
{ type:"node_type", ... }
Pour le code suivant
2+3 var a of int function fid (a of int) 5+6 endoffunction
Nous pouvons décrire l'AST suivant
{ type: 'program', elements: [ { type:'expr', op: '+', left: { type: 'int', value: 2 }, right: { type: 'int', value: 3 } }, { type: 'var', variables: [ { id: 'a', type: 'int' } ] }, { type: 'function_definition', id: 'fid', parameters:[ { id:'a', type:'int' } ] } ] }
/* Jison example file */ /* Tokens part */ %lex %% /* RegEx */ // add newline for Windows (\r\n) and Linux/Unix/Mac (\n) \r?\n return 'NEWLINE'; // \s includes \n space and tab, we need the NEWLINE token, so we put space and tab in white spacve [ \t] /* skip whitespace */ var return 'VAR'; of return 'OF'; int return 'INT'; float return 'FLOAT'; string return 'STRING'; function return 'FUNCTION'; endfunction return 'END_FUNCTION'; [0-9]+("."[0-9]+)? return 'NUMBER'; \"[^\"]*\" return 'STRING_VALUE'; // add the token for the variable [A-Za-z][A-Za-z0-9]* return 'IDENTIFIER'; '=' return '='; "-" return '-'; "+" return '+'; "*" return '*'; "/" return '/'; "(" return '('; ")" return ')'; ',' return ','; /lex /* Grammar part, for this lab */ // when it is ambiguous, derive the left part %left '+' '-' // * and / have higher priority %left '*' '/' %{ // function for grammar rule function rule (rule_name, items) { return { rule: rule_name, items: items }; } // function for token function token (token_name, value) { return { token: token_name, value: value }; } %} %% start: expressions { $$ = rule ('start', [$1]); return $$; // AST // return { // type: 'program', // elements: $1 // } }; expressions: statement NEWLINE expressions { $$ = rule ('statement', [$1, token ('NEWLINE', $2), $3]); // AST // $3.splice (0, 0, $1); // add the expression to the array produced by expressions ($3) // $$ = $3; } | statement NEWLINE { $$ = rule ('statement', [$1, token ('NEWLINE', $2)]); // AST // $$ = [$1]; // an array with one element } | statement { $$ = rule ('statement', [$1]); // AST // $$ = [$1]; // an array with one element }; statement: expr { $$ = rule ('statement', [$1]); } | variable { $$ = rule ('statement', [$1]); } | assign { $$ = rule ('statement', [$1]); } | function { $$ = rule ('function', [$1]); } | function_run { $$ = rule ('function_run', [$1]); }; expr: '(' expr ')' { $$ = rule ('expr', [token ('(', $1), $2, token (')', $3)]); } | expr '+' expr { $$ = rule ('expr', [$1, token ('+', $2), $3]); } | expr '-' expr { $$ = rule ('expr', [$1, token ('-', $2), $3]); } | expr '*' expr { $$ = rule ('expr', [$1, token ('*', $2), $3]); } | expr '/' expr { $$ = rule ('expr', [$1, token ('/', $2), $3]); } | NUMBER { // $1 is string so we store its float value $$ = token ('NUMBER', parseFloat ($1)); } | IDENTIFIER { // store the variable $$ = token ('IDENTIFIER', $1); } | STRING_VALUE { // store the variable // get the value of the string without the quotes $$ = token ('STRING_VALUE', $1.substring (1, $1.length-2)); }; variable: VAR variables { $$ = rule ('variable', [token ('VAR', $1), $2]); }; variables: IDENTIFIER ',' variables { $$ = rule ('variables', [ token ('IDENTIFIER', $1), token (',', ','), $3 ] ); } | IDENTIFIER { $$ = token ('IDENTIFIER', $1); } | IDENTIFIER OF type ',' variables { $$ = rule ('variables', [ token ('IDENTIFIER', $1), token ('OF', 'of'), $3, token (',', ','), $5 ] ); } | IDENTIFIER OF type { $$ = rule ('variables', [ token ('IDENTIFIER', $1), token ('OF', 'of'), $3 ] ); }; type: INT { $$ = token ('INT', 'int'); } | FLOAT { $$ = token ('FLOAT', 'float'); } | STRING { $$ = token ('STRING', 'string'); }; assign: IDENTIFIER '=' expr { $$ = rule ('assign', [ token ('IDENTIFIER', $1), token ('=', '='), $3 ] ); }; function: FUNCTION IDENTIFIER '(' parameters ')' NEWLINE expressions END_FUNCTION {$$ = rule ('function', [token('FUNCTION', $1), token('IDENTIFIER', $2), token('(', $3), $4, token(')', $5), token('NEWLINE', $6), $7, token('END_FUNCTION', $8)]);}; parameters: IDENTIFIER OF type ',' parameters { $$ = rule ('parameters', [$1, token ('OF', 'of'), $3, token (',', $4), $5]); } | IDENTIFIER OF type { $$ = rule ('parameters', [$1, token ('OF', 'of'), $3]); } | { $$ = token ('EMPTY', ''); }; function_run: IDENTIFIER '(' parameters_run ')' { $$ = rule ('function_run', [token ('IDENTIFIER', $1), token ('(', $2), $3, token (')', $4)]); }; parameters_run: expr ',' parameters_run { $$ = rule ('parameters_run', [$1, token (',', $2), $3]); } | expr { $$ = rule ('parameters_run', [$1]); } | { $$ = token ('EMPTY', ''); };
Le javascript
"use strict"; // import fs for reading var fs = require ('fs'); // import the generated Parser var parser = require ('./program.js').parser; var str = fs.readFileSync (process.argv[2], 'UTF-8'); // add a text to the parser try { // run the parser using a string, why are the values printed upside down? var info = parser.parse (str); console.log ('AST'); console.log (JSON.stringify (info, null, 4)); } catch (e) { // display the error message and data // console.log ('You have an error at line '+(e.hash.line+1)+' at position '+e.hash.loc.first_column); // console.log (e.message); console.log (e); }
Exemple de program
var a of int a=10 function f () var s of int endfunction