From f57187515f9361722abcb79f8f2ec8fde5452960 Mon Sep 17 00:00:00 2001 From: miggazElquez Date: Sun, 1 Sep 2019 23:26:06 +0200 Subject: [PATCH] First commit --- .gitignore | 3 + brainfuck.py | 201 ++++++++++++++++++ grammar.ebnf | 26 +++ to_bf_compiler.py | 522 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 752 insertions(+) create mode 100644 .gitignore create mode 100644 brainfuck.py create mode 100644 grammar.ebnf create mode 100644 to_bf_compiler.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5f5efa9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__ + +*.pyc diff --git a/brainfuck.py b/brainfuck.py new file mode 100644 index 0000000..42f4526 --- /dev/null +++ b/brainfuck.py @@ -0,0 +1,201 @@ +"""interpreter of brainfuck written in python""" +import sys +import os + +from termcolor import cprint + +os.system('color') + +class BFError(RuntimeError): + pass + +def pair_bracket(prog): + result = [] + brackets = [] + for index, instr in enumerate(prog): + if instr == '[': + brackets.append(index) + if instr == ']': + open_brackets = brackets.pop() + result.append((open_brackets,index)) + return dict(result) + + +def interpret(prog): + ar = [0] + ptr = 0 + index = 0 + blocks = [] + bracket_info = pair_bracket(prog) + while index < len(prog): + instr = prog[index] + + if instr == '+': + ar[ptr] += 1 + elif instr == '-': + ar[ptr] -= 1 + elif instr == '>': + ptr += 1 + if ptr == len(ar): + ar.append(0) + elif instr == '<': + ptr -= 1 + if ptr < 0: + raise ValueError("You can't go to a cell before 0") + elif instr == '.': + print(chr(ar[ptr]),end='') + elif instr == ',': + val = input() + if len(val) > 1: + raise ValueError('please put character one by one') + if val: + ar[ptr] = ord(val) + else: + ar[ptr] = 0 + elif instr == '[': + if ar[ptr]: + blocks.append(index) + else: + index = bracket_info[index] + elif instr == ']': + if ar[ptr]: + index = blocks.pop() + continue + elif instr == '#': + print(ar, ptr, ar[ptr]) + + index += 1 + + return ar + + +def format(prog,debug_mode=True): + if debug_mode: + return ''.join(i for i in prog if i in '[]<>+-.,#') + return ''.join(i for i in prog if i in '[]<>+-.,') + + +def repl(): + """ + use brainfuck in an interactive mode + + special commands : + p: where is the pointer + v: val of the cell (the number, not the ascii val) + a: show the entire array + + """ + ar = [0] + ptr = 0 + blocks = [] + while 1: + try: + not_ok = True + while not_ok: + try: + prog = input('> ') + not_ok = False + except KeyboardInterrupt: + print() + except EOFError: + break + + if prog == 'p': + print(ptr) + elif prog == 'v': + print(ar[ptr]) + elif prog == 'a': + print(ar) + elif prog == 'help': + repl_help() + elif prog == 'q': + break + elif prog == 'r': + repl() + return + else: + index = 0 + bracket_info = pair_bracket(prog) + try: + while index < len(prog): + instr = prog[index] + + if instr == '+': + ar[ptr] += 1 + elif instr == '-': + ar[ptr] -= 1 + elif instr == '>': + ptr += 1 + if ptr == len(ar): + ar.append(0) + elif instr == '<': + ptr -= 1 + if ptr < 0: + ptr = 0 + raise BFError(f"You can't go to a cell before 0 (code before : {prog[:index]}, code after : {prog[index:]})") + elif instr == '.': + print(chr(ar[ptr]),end='') + elif instr == ',': + val = input() + if len(val) > 1: + raise ValueError('please put character one by one') + if val: + ar[ptr] = ord(val) + else: + ar[ptr] = 0 + elif instr == '[': + if ar[ptr]: + blocks.append(index) + else: + index = bracket_info[index] + elif instr == ']': + if ar[ptr]: + index = blocks.pop() + continue + else: + blocks.pop() + elif instr == '#': + try: + if prog[index+1] == '(': + index = index+2 + while prog[index] != ')': + print(prog[index],end='') + index += 1 + print(' ',end='') + except IndexError: + pass + print(ar, ptr, ar[ptr]) + + index += 1 + except KeyboardInterrupt: + pass + except BFError as e: + cprint(e,'red') + + if '.' in prog: + print() + + +def repl_help(): + print("""\ +In this repl, you can enter any valid brainfuck expression. +There is also some special command : + 'a' will show you the state of the memory + 'p' will show you where is the pointer + 'v' will show you the value of the cell + 'r' will reset the array""") + + + +def main(): + if len(sys.argv) == 2: + interpret(open(sys.argv[2]).read()) + elif len(sys.argv) == 3: + if sys.argv[1] == '-i': #oui c'est de lam erde comme manière de faire + interpret(sys.argv[2]) + else: + repl() + + +if __name__ == '__main__': + main() diff --git a/grammar.ebnf b/grammar.ebnf new file mode 100644 index 0000000..5279088 --- /dev/null +++ b/grammar.ebnf @@ -0,0 +1,26 @@ +program = statement, {statement} + +block = "{",{statement}, "}" +statement = short_statement | while_block | if_block +short_statement = declaration | assignement | expression | inc-dec, ";" +assignement = name, assignement_symbol, expression +declaration = "var" , name , {",",name}, ";" + +expression = compar + +compar = calcul, [compar_op, calcul] +calcul = terme, [('+'|'-'), terme] +terme = factor [("*"|"/"), factor] +factor = val ["**", val] +val = name | number | "(", expression, ")" +inc-dec = name, ("++" | "--") + +condition = '(', expression, ')' +while_block = "while", condition, block +if_block = "if", condition, block, ["else", block] + + + +bin_op = "+"|"-"|"*"|"/"|"**" +compar_op = ">"|">="|"<"|"<="|"==" +assignement_symbol = "="|"+="|"-="|"/="|"*=" \ No newline at end of file diff --git a/to_bf_compiler.py b/to_bf_compiler.py new file mode 100644 index 0000000..0f3108c --- /dev/null +++ b/to_bf_compiler.py @@ -0,0 +1,522 @@ +import re +from collections import namedtuple +import os + +from termcolor import cprint + +os.system('color') + +REGEXS = [(re.compile(i),j) for i,j in ( + (r'\s+','WHITESPACE'), + (r'//[^\n]*|/\*(?s:.)*\*/','COMMENTS'), + (r'if|for|while|else|var','KEYWORD'), + + (r'>=|<=|==|>|<','COMPAR_OP'), + (r'\+=|-=|\*=|/=|=','ASSIGNEMENT_OP'), + (r'\+\+|--','INC_DEC_OP'), + (r'\+|\*\*|\*|-|/','BIN_OP'), + (r'\(|\)|\{|\}|;|,','CONTROL_CHAR'), + + (r'[a-zA-Z]\w*','IDENTIFIERS'), + (r'\d[\d_]*','NUMBERS') + +)] + +USELESS_TAGS = ('WHITESPACE','COMMENTS') + +Token = namedtuple('Token',('text','tag')) + +def lex(program,keep_whitespace=False): + pos = 0 + tokens = [] + while pos < len(program): + for regex, tag in REGEXS: + match = regex.match(program,pos) + if match is not None: + token = match.group(0) + tokens.append(Token(token,tag)) + pos = match.end(0) + break + else: + raise SyntaxError(f"No match find during the lexing for {program[pos:pos+20]}...") + if keep_whitespace: + return tokens + return clean_tokens(tokens) + +def clean_tokens(tokens): + return [token for token in tokens if token.tag not in USELESS_TAGS] + + +#**************************** + + +ProgramNode = namedtuple('ProgramNode',('list_of_statement')) +BlockNode = namedtuple('BlockNode',('list_of_statement')) + +IfNode = namedtuple('IfNode',('condition','block','else_block')) +WhileNode = namedtuple('WhileNode',('condition','block')) + +AssignementNode = namedtuple('AssignementNode',('target','operator','expression')) +IncDecNode = namedtuple('IncDecNode',('name','operator')) +DeclarationNode = namedtuple('DeclarationNode',('names')) +BinOpNode = namedtuple('BinOpNode',('left','op','right')) + +IdentifierNode = namedtuple('IdentifierNode',('name')) +NumberNode = namedtuple('NumberNode',('value')) + + +class Parser: + def __init__(self,tokens): + self.tokens = tokens + self.pos = 0 + + def parse(self): + return self.program() + + def accept_text(self,value): + try: + token = self.tokens[self.pos] + except IndexError: + #print("reach the end") + return False + if token.text == value: + self.pos += 1 + return token + return False + + def accept_tag(self,tag): + try: + token = self.tokens[self.pos] + except IndexError: + return False + if token.tag == tag: + self.pos += 1 + return token + return False + + + + + def program(self): + nodes = [] + while 1: + statement = self.statement() + if statement: + nodes.append(statement) + else: + break + if not len(nodes): + raise ValueError("Your program seems to be empty") + return ProgramNode(nodes) + + def block(self): + #print(f"enter block ({self.pos})") + if not self.accept_text("{"): + return None + nodes = [] + while 1: + statement = self.statement() + if statement: + nodes.append(statement) + else: + break + if not self.accept_text('}'): + raise SyntaxError("'}' missing at the end of the block") + + #cprint("block",'green') + return BlockNode(nodes) + + def statement(self): + #print(f"enter statement ({self.pos})") + for func in (self.short_statement,self.while_block, self.if_block): + node = func() + if node: + #cprint("statement",'green') + return node + #cprint("no statement",'red') + return None + + def short_statement(self): + #print(f"enter short_statement ({self.pos})") + for func in (self.declaration, self.assignement,self.inc_dec,self.expression,): + node = func() + if node: + if self.accept_text(";"): + #cprint('short_statement','green') + return node + else: + raise SyntaxError(f"Missing ';' ({node})") + #cprint("no short_statement",'red') + return None + + def declaration(self): + if not self.accept_text('var'): + return None + name = self.accept_tag('IDENTIFIERS') + if not name: + raise SyntaxError("You must specifies an identifiers after a 'var' keyword") + names = [IdentifierNode(name.text)] + while 1: + if self.accept_text(','): + name = self.accept_tag('IDENTIFIERS') + if name: + names.append(IdentifierNode(name.text)) + else: + raise SyntaxError("Expected an identifiers after a ',' in a declaration statemement") + else: + break + + return DeclarationNode(names) + + def assignement(self): + #print(f"enter assignement ({self.pos})") + name = self.accept_tag('IDENTIFIERS') + if not name: + #cprint("no assignement (no name)",'red') + return None + sym = self.accept_tag('ASSIGNEMENT_OP') + if not sym: + self.pos -= 1 #for the name + #cprint("no assignement (no op)",'red') + return None + expr = self.expression() + if not expr: + raise SyntaxError("Missing expression in a assignement") + #cprint("assignement",'green') + return AssignementNode(IdentifierNode(name.text),sym.text,expr) + + def inc_dec(self): + #print(f"enter inc_dec ({self.pos})") + name = self.accept_tag('IDENTIFIERS') + if not name: + #cprint("no inc_dec (no name)",'red') + return None + op = self.accept_tag('INC_DEC_OP') + if not op: + self.pos -= 1 #for the name + #cprint("no inc_dec (no op)",'red') + return None + #cprint("inc_dec",'green') + return IncDecNode(IdentifierNode(name.text),op.text) + + def expression(self): + #print(f"enter expression ({self.pos})") + compar = self.compar() + if compar: + #cprint("expression",'green') + return compar + + #cprint("no expression",'red') + return None + + def compar(self): + calcul1 = self.calcul() + if not calcul1: + return None + + op = self.accept_tag('COMPAR_OP') + if op: + calcul2 = self.calcul() + if not calcul2: + raise SyntaxError("missing second term in calcul") + return BinOpNode(calcul1,op.text,calcul2) + return calcul1 + + def calcul(self): + #print(f"enter calcul ({self.pos})") + """There is calcul, terme, factor and val for differentiate between +/-, * or /, and **""" + terme1 = self.terme() + if not terme1: + return None + + op = None + sym = self.accept_text('+') + if sym: + op = sym + sym = self.accept_text('-') + if sym: + op = sym + + if op: + terme2 = self.terme() + if not terme2: + raise SyntaxError("missing second term in calcul") + return BinOpNode(terme1,op.text,terme2) + return terme1 + + + def terme(self): + #print(f"enter terme ({self.pos})") + factor1 = self.factor() + if not factor1: + return None + + op = None + sym = self.accept_text('*') + if sym: + op = sym + sym = self.accept_text('/') + if sym: + op = sym + + if op: + factor2 = self.factor() + if not factor2: + raise SyntaxError("missing second factor in terme") + return BinOpNode(factor1,op.text,factor2) + return factor1 + + def factor(self): + #print(f"enter factor ({self.pos})") + val = self.value() + if not val: + return None + op = self.accept_text('**') + if op: + val2 = self.value() + if not val2: + raise SyntaxError("missing second val un factor") + return BinOpNode(val,op.text,val2) + return val + + def value(self): + #print(f"enter value ({self.pos})") + name = self.accept_tag('IDENTIFIERS') + if name: + return IdentifierNode(name.text) + number = self.accept_tag('NUMBERS') + if number: + return NumberNode(int(number.text)) + if self.accept_text('('): + expr = self.expression() + if not (expr and self.accept_text(')')): + raise SyntaxError("missing expression or matching ')'") + return expr + return None + + def while_block(self): + #print(f"enter while_block ({self.pos})") + if not self.accept_text('while'): + #cprint("no while_block (keyword)",'red') + return None + + if not self.accept_text('('): + raise SyntaxError("Bad while block") + + cond = self.expression() + if not cond: + raise SyntaxError("Bad while block") + if not self.accept_text(')'): + raise SyntaxError("Bad while block") + block = self.block() + if not block: + raise SyntaxError("Bad while block") + #cprint("while_block",'green') + return WhileNode(cond,block) + + def if_block(self): + #print(f"enter if_block ({self.pos})") + if not self.accept_text('if'): + #cprint("no if_block (keyword)",'red') + return None + + if not self.accept_text('('): + raise SyntaxError("Bad if block") + + cond = self.expression() + if not cond: + raise SyntaxError("Bad if block") + if not self.accept_text(')'): + raise SyntaxError("Bad if block") + block = self.block() + if not block: + raise SyntaxError("Bad if block") + + if self.accept_text('else'): + else_block = self.block() + if not else_block: + raise SyntaxError("Bad else block") + return IfNode(cond,block,else_block) + #cprint("if_block",'green') + return IfNode(cond,block,None) + + +def parse(prog): + tokens = clean_tokens(lex(prog)) + parser = Parser(tokens) + return parser.program() + + +#*************** + + +class Ast_to_IR: + + def __init__(self,ast): + self.ast = ast + + def add(self,val): + self.ir.append(val) + + def convert(self): + self.ir = [] + vars_ = [] + for statemement in self.ast.list_of_statement: + if isinstance(statemement,DeclarationNode): + vars_.extend(statemement.names) + vars_ = {val:index for index,val in enumerate(vars_)} + self.vars = vars_ + self.pos = 0 + + self.add(('INIT',len(vars_))) + self.push_block(self.ast) + return self.ir + + def push_block(self,node): + for statement in node.list_of_statement: + func = methods.get(type(statement),default) #switch-case + func(self,statement) + + def assignement_to_ir(self,node): + if node.operator == '=': + expression = node.expression + else: + operator = node.operator[0] #ex : '+=' -> '+' + expression = BinOpNode(node.target,operator,node.expression) + + self.push_expr(expression) + assign_distance = self.pos + self.vars[node.target] + + self.add(('ASSIGN',assign_distance,node.target.name)) + self.pos -= 1 + + def expression_statement_to_ir(self,node): + self.push_expr(node) + self.pos -= 1 + self.add(('POP',)) + + def inc_dec_to_ir(self,node): + assign_distance = self.pos + self.vars[node.name] + if node.operator == "++": + self.add(('INC',assign_distance,node.name.name)) + else: + self.add(('DEC',assign_distance,node.name.name)) + + def push_expr(self,node): + if isinstance(node,NumberNode): + self.pos += 1 + self.add(('PUSH_NUMBER',node.value)) + return + if isinstance(node,IdentifierNode): + self.pos += 1 + distance = self.pos + self.vars[node] + self.add(('PUSH_VAR',distance,node.name)) + return + + if isinstance(node,BinOpNode): + self.push_expr(node.left) + self.push_expr(node.right) + self.pos -= 1 + self.add(('BIN_OP',node.op)) + return + + raise Exception(f"cette expression n'est pas bonne : {node}") + + def while_to_ir(self,node): + self.push_expr(node.condition) + self.add(('WHILE_ENTER',)) + self.pos -= 1 + self.push_block(node.block) + self.push_expr(node.condition) + self.add(('WHILE_END',)) + self.pos -= 1 + + def if_to_ir(self,node): + self.push_expr(node.condition) + self.add(('IF_ENTER',)) + self.pos += 1 + self.push_block(node.block) + if node.else_block: + self.add(('ELSE_ENTER',)) + self.pos -= 2 + self.push_block(node.else_block) + self.add(('ELSE_END',)) + else: + self.add(('IF_END',)) + self.pos -= 2 + + + +methods = { + DeclarationNode: lambda self,node:None, + AssignementNode: Ast_to_IR.assignement_to_ir, + WhileNode: Ast_to_IR.while_to_ir, + IfNode: Ast_to_IR.if_to_ir, + IncDecNode: Ast_to_IR.inc_dec_to_ir, +} +default = Ast_to_IR.expression_statement_to_ir + + +#************ + +def init(nb): + return '>' * (nb-1) + f' INIT {nb}' + +def assign(val,name): + return f"{'<'*val}[-]{'>'*val}[-{'<'*val}+{'>'*val}]< ASSIGN {name}" + +def inc(val,name): + return f"{'<'*val}+{'>'*val} INC {name}" + +def dec(val,name): + return f"{'<'*val}-{'>'*val} DEC {name}" + +def pop(): + return '[-]< POP' + +def push_number(val): + return ">" + "+"*val + f" PUSH_NUMBER {val}" + +def push_var(val,name): + return '<' * (val-1) + f"[-{'>'*val}+>+<{'<'*val}]" + f"{'>'*val}>[-<{'<'*val}+>{'>'*val}]<" + f" PUSH_VAR {name}" + +def bin_op(op): + if op == "+": + bf = "[-<+>]<" + elif op == "-": + bf = "[-<->]<" + else: + raise Exception(f"bin_op {op} not implemented yet") + return bf + f" BIN_OP {op if op not in ('+','-') else {'+':'plus','-':'minus'}[op]}" + +def while_enter(): + return "[[-]< WHILE_ENTER" + +def while_end(): + return "]< WHILE_END" + +def if_enter(): + return ">+<[[-]>- IF_ENTER" + +def else_enter(): + return "<]>[-<< ELSE_ENTER" + +def else_end(): + return ">>]<< ELSE_END" + +def if_end(): + return "<]>[-]<< IF_END" + +def compile_ir(ir): + return '\n'.join(globals()[i[0].lower()](*i[1:]) for i in ir) + + +#************** + +def compile(prog): + tokens = lex(prog) + ast = Parser(tokens).parse() + ir = Ast_to_IR(ast).convert() + brainfuck = compile_ir(ir) + return brainfuck +