brainfuck/to_bf_compiler.py

767 lines
19 KiB
Python

import re
from collections import namedtuple, ChainMap
import os
from termcolor import cprint
os.system('color')
REGEXS = [(re.compile(i),j) for i,j in (
(r'\s+','WHITESPACE'),
(r'//[^\n]*|/\*(?s:.)*\*/','COMMENTS'),
(r'if|while|else|var|def|return|bf|import','KEYWORD'),
(r'\{\{[^}]*\}\}','BRAINFUCK_CODE'),
(r'>=|<=|==|>|<|!=','COMPAR_OP'),
(r'\+=|%=|-=|\*=|/=|=','ASSIGNEMENT_OP'),
(r'\+\+|--','INC_DEC_OP'),
(r'\+|\*\*|\*|-|/|%','BIN_OP'),
(r'\(|\)|\{|\}|;|,','CONTROL_CHAR'),
(r'[a-zA-Z]\w*','IDENTIFIERS'),
(r'\d[\d_]*','NUMBERS'),
(r"'.'",'CHAR'),
(r'"(\\.|[^"\n])*"','STRING'),
)]
USELESS_TAGS = ('WHITESPACE','COMMENTS')
Token = namedtuple('Token',('text','tag'))
def lex(program,keep_whitespace=False):
pos = 0
tokens = []
while pos < len(program):
for regex, tag in REGEXS:
match = regex.match(program,pos)
if match is not None:
token = match.group(0)
tokens.append(Token(token,tag))
pos = match.end(0)
break
else:
raise SyntaxError(f"No match find during the lexing for {program[pos:pos+20]}...")
if keep_whitespace:
return tokens
return clean_tokens(tokens)
def clean_tokens(tokens):
return [token for token in tokens if token.tag not in USELESS_TAGS]
def format_bf(prog,debug_mode=True):
if debug_mode:
t = []
index = 0
while index < len(prog):
if prog[index] in '[]<>+-.,':
t.append(prog[index])
index += 1
elif prog[index] == '#':
t.append('#')
index += 1
try:
if prog[index] == '(':
index += 1
t.append('(')
while prog[index] != ')':
t.append(prog[index])
index+=1
t.append(')')
except IndexError:
pass
else:
index+=1
return ''.join(t)
return ''.join(i for i in prog if i in '[]<>+-.,')
#****************************
ProgramNode = namedtuple('ProgramNode',('list_of_statement','name'))
BlockNode = namedtuple('BlockNode',('list_of_statement'))
IfNode = namedtuple('IfNode',('condition','block','else_block'))
WhileNode = namedtuple('WhileNode',('condition','block'))
FunctionDefNode = namedtuple('FunctionDefNode',('name','args','block'))
BfDefNode = namedtuple('BfDefNode',('name','code'))
AssignementNode = namedtuple('AssignementNode',('target','operator','expression'))
IncDecNode = namedtuple('IncDecNode',('name','operator'))
DeclarationNode = namedtuple('DeclarationNode',('names'))
FunctionCallNode = namedtuple('FunctionCallNode',('name','args'))
ReturnNode = namedtuple('ReturnNode',('expr'))
ImportNode = namedtuple('ImportNode',('path',))
BinOpNode = namedtuple('BinOpNode',('left','op','right'))
IdentifierNode = namedtuple('IdentifierNode',('name'))
NumberNode = namedtuple('NumberNode',('value'))
class Parser:
def __init__(self,tokens):
self.tokens = tokens
self.pos = 0
def parse(self):
return self.program()
def accept_text(self,value):
try:
token = self.tokens[self.pos]
except IndexError:
#print("reach the end")
return False
if token.text == value:
self.pos += 1
return token
return False
def accept_tag(self,tag):
try:
token = self.tokens[self.pos]
except IndexError:
return False
if token.tag == tag:
self.pos += 1
return token
return False
def program(self):
nodes = []
while 1:
statement = self.statement()
if isinstance(statement,ImportNode):
ast = get_ast(statement.path)
nodes.extend(ast)
if statement:
nodes.append(statement)
else:
break
if not len(nodes):
raise ValueError("Your program seems to be empty")
return ProgramNode(nodes,'__main__') #plus simple pour le debug
def block(self):
#print(f"enter block ({self.pos})")
if not self.accept_text("{"):
return None
nodes = []
while 1:
statement = self.statement()
if statement:
nodes.append(statement)
else:
break
if not self.accept_text('}'):
raise SyntaxError("'}' missing at the end of the block")
#cprint("block",'green')
return BlockNode(nodes)
def statement(self):
for func in (self.short_statement,self.while_block, self.if_block,self.function_block, self.bf_block):
node = func()
if node:
return node
return None
def short_statement(self):
#print(f"enter short_statement ({self.pos})")
for func in (self.declaration, self.assignement,self.inc_dec,self.expression,self.return_,self.import_):
node = func()
if node:
if self.accept_text(";"):
#cprint('short_statement','green')
return node
else:
raise SyntaxError(f"Missing ';' ({node})")
#cprint("no short_statement",'red')
return None
def import_(self):
if not self.accept_text('import'):
return None
path = self.accept_tag('STRING')
if not path:
raise SyntaxError('Bad import')
return ImportNode(path.text[1:-1])
def return_(self):
if not self.accept_text('return'):
return None
expr = self.expression()
if not expr:
raise SyntaxError("Bad Return")
return ReturnNode(expr)
def declaration(self):
if not self.accept_text('var'):
return None
name = self.accept_tag('IDENTIFIERS')
if not name:
raise SyntaxError("You must specifies an identifiers after a 'var' keyword")
names = [IdentifierNode(name.text)]
while 1:
if self.accept_text(','):
name = self.accept_tag('IDENTIFIERS')
if name:
names.append(IdentifierNode(name.text))
else:
raise SyntaxError("Expected an identifiers after a ',' in a declaration statement")
else:
break
return DeclarationNode(names)
def assignement(self):
#print(f"enter assignement ({self.pos})")
name = self.accept_tag('IDENTIFIERS')
if not name:
#cprint("no assignement (no name)",'red')
return None
sym = self.accept_tag('ASSIGNEMENT_OP')
if not sym:
self.pos -= 1 #for the name
return None
expr = self.expression()
if not expr:
raise SyntaxError("Missing expression in a assignement")
#cprint("assignement",'green')
return AssignementNode(IdentifierNode(name.text),sym.text,expr)
def inc_dec(self):
#print(f"enter inc_dec ({self.pos})")
name = self.accept_tag('IDENTIFIERS')
if not name:
#cprint("no inc_dec (no name)",'red')
return None
op = self.accept_tag('INC_DEC_OP')
if not op:
self.pos -= 1 #for the name
#cprint("no inc_dec (no op)",'red')
return None
#cprint("inc_dec",'green')
return IncDecNode(IdentifierNode(name.text),op.text)
def expression(self):
compar = self.compar()
if compar:
return compar
return None
def call_function(self):
name = self.accept_tag('IDENTIFIERS')
if not name:
return None
if not self.accept_text('('):
self.pos -= 1
return None
expr = self.expression()
args = []
if expr:
args.append(expr)
while self.accept_text(','):
expr = self.expression()
if not expr:
raise SyntaxError("missing argument")
args.append(expr)
if not self.accept_text(')'):
raise SyntaxError("A function call must have a ')' at the end")
return FunctionCallNode(name.text,args)
def compar(self):
calcul1 = self.calcul()
if not calcul1:
return None
op = self.accept_tag('COMPAR_OP')
if op:
calcul2 = self.calcul()
if not calcul2:
raise SyntaxError("missing second term in calcul")
return BinOpNode(calcul1,op.text,calcul2)
return calcul1
def calcul(self):
#print(f"enter calcul ({self.pos})")
"""There is calcul, terme, factor and val for differentiate between +/-, * or /, and **"""
terme1 = self.terme()
if not terme1:
return None
while 1:
op = None
sym = self.accept_text('+')
if sym:
op = sym
sym = self.accept_text('-')
if sym:
op = sym
if not op:
return terme1
if op:
terme2 = self.terme()
if not terme2:
raise SyntaxError("missing second term in calcul")
terme1 = BinOpNode(terme1,op.text,terme2)
return terme1
def terme(self):
#print(f"enter terme ({self.pos})")
factor1 = self.factor()
if not factor1:
return None
while 1:
op = None
sym = self.accept_text('*')
if sym:
op = sym
sym = self.accept_text('/')
if sym:
op = sym
sym = self.accept_text('%')
if sym:
op = sym
if op:
factor2 = self.factor()
if not factor2:
raise SyntaxError("missing second factor in terme")
factor1 = BinOpNode(factor1,op.text,factor2)
else:
break
return factor1
def factor(self):
#print(f"enter factor ({self.pos})")
val = self.value()
if not val:
return None
while self.accept_text('**'):
val2 = self.value()
if not val2:
raise SyntaxError("missing second val un factor")
val = BinOpNode(val,'**',val2)
return val
def value(self):
#print(f"enter value ({self.pos})")
func = self.call_function() #The orders matters !!!
if func:
return func
name = self.accept_tag('IDENTIFIERS')
if name:
return IdentifierNode(name.text)
number = self.accept_tag('NUMBERS')
if number:
return NumberNode(int(number.text))
char = self.accept_tag('CHAR')
if char:
return NumberNode(ord(char.text[1]))
if self.accept_text('('):
expr = self.expression()
if not (expr and self.accept_text(')')):
raise SyntaxError("missing expression or matching ')'")
return expr
return None
def while_block(self):
#print(f"enter while_block ({self.pos})")
if not self.accept_text('while'):
#cprint("no while_block (keyword)",'red')
return None
if not self.accept_text('('):
raise SyntaxError("Bad while block")
cond = self.expression()
if not cond:
raise SyntaxError("Bad while block")
if not self.accept_text(')'):
raise SyntaxError("Bad while block")
block = self.block()
if not block:
raise SyntaxError("Bad while block")
#cprint("while_block",'green')
return WhileNode(cond,block)
def if_block(self):
#print(f"enter if_block ({self.pos})")
if not self.accept_text('if'):
#cprint("no if_block (keyword)",'red')
return None
if not self.accept_text('('):
raise SyntaxError("Bad if block")
cond = self.expression()
if not cond:
raise SyntaxError("Bad if block")
if not self.accept_text(')'):
raise SyntaxError("Bad if block")
block = self.block()
if not block:
raise SyntaxError("Bad if block")
if self.accept_text('else'):
else_block = self.block()
if not else_block:
raise SyntaxError("Bad else block")
return IfNode(cond,block,else_block)
#cprint("if_block",'green')
return IfNode(cond,block,None)
def function_block(self):
if not self.accept_text('def'):
return None
name = self.accept_tag('IDENTIFIERS')
if not name:
raise SyntaxError("A function need a name")
if not self.accept_text('('):
raise SyntaxError("Bad function block")
arg = self.accept_tag('IDENTIFIERS')
args = []
if arg:
args.append(IdentifierNode(arg.text))
while self.accept_text(','):
arg = self.accept_tag('IDENTIFIERS')
if not arg:
raise SyntaxError("Bad function block")
args.append(IdentifierNode(arg.text))
if not self.accept_text(')'):
raise SyntaxError("Bad function block")
block = self.block()
if not block:
raise SyntaxError("Bad function block")
return FunctionDefNode(name.text,args,block)
def bf_block(self):
if not self.accept_text('bf'):
return None
name = self.accept_tag('IDENTIFIERS')
if not name:
raise SyntaxError("A bf function need a name")
code = self.accept_tag('BRAINFUCK_CODE')
if not code:
raise SyntaxError("Bad bf block")
code = format_bf(code.text[2:-2])
return BfDefNode(name.text,code)
def parse(prog):
tokens = clean_tokens(lex(prog))
parser = Parser(tokens)
return parser.program()
def get_ast(path):
with open(path) as file:
code = file.read()
tokens = lex(code)
ast = Parser(tokens).parse()
return ast.list_of_statement
#***************
class Ast_to_IR:
def __init__(self,ast,functions=None):
if functions is None:
functions = ChainMap({})
self.ast = ast
self.functions = functions
def add(self,val):
self.ir.append(val)
def convert(self):
if isinstance(self.ast,ProgramNode):
statements = self.ast.list_of_statement
args = []
block = self.ast
elif isinstance(self.ast,FunctionDefNode):
statements = self.ast.block.list_of_statement
args = self.ast.args
block = self.ast.block
self.ir = []
self.functions = self.functions.new_child()
for statement in statements:
if isinstance(statement,FunctionDefNode):
self.functions[statement.name] = Ast_to_IR(statement,self.functions).convert()
elif isinstance(statement,BfDefNode):
code = statement.code
self.functions[statement.name] = [('BF',code,statement.name)]
vars_ = []
for statement in statements:
if isinstance(statement,DeclarationNode):
vars_.extend(statement.names)
all_vars = {val:index for index,val in enumerate((args+vars_)[::-1])}
self.vars = all_vars
self.pos = 0
nb_decalage = len(vars_) - (1 if isinstance(self.ast,ProgramNode) else 0)
self.add(('INIT_FUNCTION',nb_decalage ,self.ast.name))
self.push_block(block)
if isinstance(self.ast,FunctionDefNode):
self.add(('END_FUNCTION',len(all_vars),self.ast.name))
return self.ir
def push_block(self,node):
for statement in node.list_of_statement:
func = methods.get(type(statement),default) #switch-case
func(self,statement)
def assignement_to_ir(self,node):
if node.operator == '=':
expression = node.expression
else:
operator = node.operator[0] #ex : '+=' -> '+'
expression = BinOpNode(node.target,operator,node.expression)
self.push_expr(expression)
assign_distance = self.pos + self.vars[node.target]
self.add(('ASSIGN',assign_distance,node.target.name))
self.pos -= 1
def return_to_ir(self,node):
self.push_expr(node.expr)
def expression_statement_to_ir(self,node):
self.push_expr(node)
self.pos -= 1
self.add(('POP',))
def inc_dec_to_ir(self,node):
assign_distance = self.pos + self.vars[node.name]
if node.operator == "++":
self.add(('INC',assign_distance,node.name.name))
else:
self.add(('DEC',assign_distance,node.name.name))
def push_expr(self,node):
if isinstance(node,FunctionCallNode):
for arg in node.args:
self.push_expr(arg)
try:
self.ir.extend(self.functions[node.name])
except KeyError:
raise SyntaxError(f"The function {node.name} doesn't exist")
self.pos -= len(node.args)
self.pos += 1
return
if isinstance(node,NumberNode):
self.pos += 1
self.add(('PUSH_NUMBER',node.value))
return
if isinstance(node,IdentifierNode):
self.pos += 1
distance = self.pos + self.vars[node]
self.add(('PUSH_VAR',distance,node.name))
return
if isinstance(node,BinOpNode):
self.push_expr(node.left)
self.push_expr(node.right)
self.pos -= 1
self.add(('BIN_OP',node.op))
return
raise Exception(f"cette expression n'est pas bonne : {node}")
def while_to_ir(self,node):
self.push_expr(node.condition)
self.add(('WHILE_ENTER',))
self.pos -= 1
self.push_block(node.block)
self.push_expr(node.condition)
self.add(('WHILE_END',))
self.pos -= 1
def if_to_ir(self,node):
self.push_expr(node.condition)
self.add(('IF_ENTER',))
self.pos += 1
self.push_block(node.block)
if node.else_block:
self.add(('ELSE_ENTER',))
self.pos -= 2
self.push_block(node.else_block)
self.add(('ELSE_END',))
else:
self.add(('IF_END',))
self.pos -= 2
methods = {
DeclarationNode: lambda self,node:None,
FunctionDefNode: lambda self,node:None,
AssignementNode: Ast_to_IR.assignement_to_ir,
ReturnNode: Ast_to_IR.return_to_ir,
WhileNode: Ast_to_IR.while_to_ir,
IfNode: Ast_to_IR.if_to_ir,
IncDecNode: Ast_to_IR.inc_dec_to_ir,
BfDefNode: lambda self,node:None,
ImportNode: lambda self,node:None,
}
default = Ast_to_IR.expression_statement_to_ir
#************
def init_function(nb,name):
return '>' * nb + f' INIT_FUNCTION {name}'
def end_function(nb,name):
return '<[-]' * nb + '>' * nb + (f"[-{'<'*nb}+{'>'*nb}]" if nb else "") + '<' * nb+ f' END_FUNCTION {name}'
def bf(code,name):
return f"BF {name}\n{code}\nEND_BF"
def assign(val,name):
return f"{'<'*val}[-]{'>'*val}[-{'<'*val}+{'>'*val}]< ASSIGN {name}"
def inc(val,name):
return f"{'<'*val}+{'>'*val} INC {name}"
def dec(val,name):
return f"{'<'*val}-{'>'*val} DEC {name}"
def pop():
return '[-]< POP'
def push_number(val):
return ">" + "+"*val + f" PUSH_NUMBER {val}"
def push_var(val,name):
return '<' * (val-1) + f"[-{'>'*val}+>+<{'<'*val}]" + f"{'>'*val}>[-<{'<'*val}+>{'>'*val}]<" + f" PUSH_VAR {name}"
def bin_op(op):
if op == '+':
bf = "[-<+>]<"
elif op == '-':
bf = "[-<->]<"
elif op == '*':
bf = "<[->>+<<]>[->[-<<+>>>+<]>[-<+>]<<]>[-]<<"
elif op == '/':
bf = ("[->+>>+<<<]<[->+>>+<<<]>>>[>[->+>+<<]>>[-<<+>>]+<[>-<[-]]>[->+>+<<]>>[-<<+>>]<[<<<<[-]+>+>>>-]<<<-<-]+>[<->[-]]>>[-]<<<"
"[-<[-<->>+<]>[-<+>]<<<+>[->>+>+<<<]>>>[-<<<+>>>]<<[->>+>+<<<]>>>[-<<<+>>>]<<[>[->+>+<<]>>[-<<+>>]+<[>-<[-]]>[->+>+<<]>"
">[-<<+>>]<[<<<<[-]+>+>>>-]<<<-<-]+>[<->[-]]>>[-]<<<]<[-]<[-]<")
elif op == '%':
bf = "[->+>>+<<<]<[->+>>+<<<]>>>[>[->+>+<<]>>[-<<+>>]+<[>-<[-]]>[->+>+<<]>>[-<<+>>]<[<<<<[-]+>+>>>-]<<<-<-]+>[<->[-]]>>[-]<<<" \
"[-<[-<->>+<]>[-<+>]<<[->>+>+<<<]>>>[-<<<+>>>]<<[->>+>+<<<]>>>[-<<<+>>>]<<[>[->+>+<<]>>[-<<+>>]+<[>-<[-]]>[->+>+<<]>>" \
"[-<<+>>]<[<<<<[-]+>+>>>-]<<<-<-]+>[<->[-]]>>[-]<<<]<[-]<[-<+>]<"
elif op == '==':
bf = "<[>[->+>+<<]>>[-<<+>>]+<[>-<[-]]>[->+>+<<]>>[-<<+>>]<[<<<<[-]+>+>>>-]<<<-<-]" + "+>[<->[-]]>>[<<<->>>-]<<<"
elif op == '!=':
bf = "<[>[->+>+<<]>>[-<<+>>]+<[>-<[-]]>[->+>+<<]>>[-<<+>>]<[<<<<[-]+>+>>>-]<<<-<-]" + ">[<+>[-]]>>[<<<+>>>-]<<<"
elif op == '>':
bf = "<[>[->+>+<<]>>[-<<+>>]+<[>-<[-]]>[->+>+<<]>>[-<<+>>]<[<<<<[-]+>+>>>-]<<<-<-]" + ">[-]>>[<<<+>>>-]<<<"
elif op == '>=':
bf = "<[>[->+>+<<]>>[-<<+>>]+<[>-<[-]]>[->+>+<<]>>[-<<+>>]<[<<<<[-]+>+>>>-]<<<-<-]" + "+>[<->[-]]>>[-]<<<"
elif op == '<':
bf = "<[>[->+>+<<]>>[-<<+>>]+<[>-<[-]]>[->+>+<<]>>[-<<+>>]<[<<<<[-]+>+>>>-]<<<-<-]" + ">[<+>[-]]>>[-]<<<"
elif op == '<=':
bf = "<[>[->+>+<<]>>[-<<+>>]+<[>-<[-]]>[->+>+<<]>>[-<<+>>]<[<<<<[-]+>+>>>-]<<<-<-]" + "+>[-]>>[<<<->>>-]<<<"
else:
raise Exception(f"bin_op {op} not implemented yet")
return bf + f" BIN_OP " +\
(op if op not in ('+','-','<','>','<=','>=') else {'+':'plus','-':'minus','>':'gt','<':'st','>=':'gt_eq','<=':'st_eq'}[op])
def while_enter():
return "[[-]< WHILE_ENTER"
def while_end():
return "]< WHILE_END"
def if_enter():
return ">+<[[-]>- IF_ENTER"
def else_enter():
return "<]>[-<< ELSE_ENTER"
def else_end():
return ">>]<< ELSE_END"
def if_end():
return "<]>[-]<< IF_END"
def compile_ir(ir):
return '\n'.join(globals()[i[0].lower()](*i[1:]) for i in ir)
#**************
def compile(prog,level=4):
tokens = lex(prog)
if level == 1:
return tokens
ast = Parser(tokens).parse()
if level == 2:
return ast
ir = Ast_to_IR(ast).convert()
if level == 3:
return ir
brainfuck = compile_ir(ir)
return brainfuck
def main():
import sys
try:
file = sys.argv[1]
except IndexError:
raise Exception("You must set a file")
prog = open(file).read()
bf_prog = compile(prog)
with open(os.path.splitext(file)[0]+'.bf','w') as f:
f.write(bf_prog)
if __name__ == '__main__':
main()