brainfuck/to_bf_compiler.py

523 lines
12 KiB
Python

import re
from collections import namedtuple
import os
from termcolor import cprint
os.system('color')
REGEXS = [(re.compile(i),j) for i,j in (
(r'\s+','WHITESPACE'),
(r'//[^\n]*|/\*(?s:.)*\*/','COMMENTS'),
(r'if|for|while|else|var','KEYWORD'),
(r'>=|<=|==|>|<','COMPAR_OP'),
(r'\+=|-=|\*=|/=|=','ASSIGNEMENT_OP'),
(r'\+\+|--','INC_DEC_OP'),
(r'\+|\*\*|\*|-|/','BIN_OP'),
(r'\(|\)|\{|\}|;|,','CONTROL_CHAR'),
(r'[a-zA-Z]\w*','IDENTIFIERS'),
(r'\d[\d_]*','NUMBERS')
)]
USELESS_TAGS = ('WHITESPACE','COMMENTS')
Token = namedtuple('Token',('text','tag'))
def lex(program,keep_whitespace=False):
pos = 0
tokens = []
while pos < len(program):
for regex, tag in REGEXS:
match = regex.match(program,pos)
if match is not None:
token = match.group(0)
tokens.append(Token(token,tag))
pos = match.end(0)
break
else:
raise SyntaxError(f"No match find during the lexing for {program[pos:pos+20]}...")
if keep_whitespace:
return tokens
return clean_tokens(tokens)
def clean_tokens(tokens):
return [token for token in tokens if token.tag not in USELESS_TAGS]
#****************************
ProgramNode = namedtuple('ProgramNode',('list_of_statement'))
BlockNode = namedtuple('BlockNode',('list_of_statement'))
IfNode = namedtuple('IfNode',('condition','block','else_block'))
WhileNode = namedtuple('WhileNode',('condition','block'))
AssignementNode = namedtuple('AssignementNode',('target','operator','expression'))
IncDecNode = namedtuple('IncDecNode',('name','operator'))
DeclarationNode = namedtuple('DeclarationNode',('names'))
BinOpNode = namedtuple('BinOpNode',('left','op','right'))
IdentifierNode = namedtuple('IdentifierNode',('name'))
NumberNode = namedtuple('NumberNode',('value'))
class Parser:
def __init__(self,tokens):
self.tokens = tokens
self.pos = 0
def parse(self):
return self.program()
def accept_text(self,value):
try:
token = self.tokens[self.pos]
except IndexError:
#print("reach the end")
return False
if token.text == value:
self.pos += 1
return token
return False
def accept_tag(self,tag):
try:
token = self.tokens[self.pos]
except IndexError:
return False
if token.tag == tag:
self.pos += 1
return token
return False
def program(self):
nodes = []
while 1:
statement = self.statement()
if statement:
nodes.append(statement)
else:
break
if not len(nodes):
raise ValueError("Your program seems to be empty")
return ProgramNode(nodes)
def block(self):
#print(f"enter block ({self.pos})")
if not self.accept_text("{"):
return None
nodes = []
while 1:
statement = self.statement()
if statement:
nodes.append(statement)
else:
break
if not self.accept_text('}'):
raise SyntaxError("'}' missing at the end of the block")
#cprint("block",'green')
return BlockNode(nodes)
def statement(self):
#print(f"enter statement ({self.pos})")
for func in (self.short_statement,self.while_block, self.if_block):
node = func()
if node:
#cprint("statement",'green')
return node
#cprint("no statement",'red')
return None
def short_statement(self):
#print(f"enter short_statement ({self.pos})")
for func in (self.declaration, self.assignement,self.inc_dec,self.expression,):
node = func()
if node:
if self.accept_text(";"):
#cprint('short_statement','green')
return node
else:
raise SyntaxError(f"Missing ';' ({node})")
#cprint("no short_statement",'red')
return None
def declaration(self):
if not self.accept_text('var'):
return None
name = self.accept_tag('IDENTIFIERS')
if not name:
raise SyntaxError("You must specifies an identifiers after a 'var' keyword")
names = [IdentifierNode(name.text)]
while 1:
if self.accept_text(','):
name = self.accept_tag('IDENTIFIERS')
if name:
names.append(IdentifierNode(name.text))
else:
raise SyntaxError("Expected an identifiers after a ',' in a declaration statemement")
else:
break
return DeclarationNode(names)
def assignement(self):
#print(f"enter assignement ({self.pos})")
name = self.accept_tag('IDENTIFIERS')
if not name:
#cprint("no assignement (no name)",'red')
return None
sym = self.accept_tag('ASSIGNEMENT_OP')
if not sym:
self.pos -= 1 #for the name
#cprint("no assignement (no op)",'red')
return None
expr = self.expression()
if not expr:
raise SyntaxError("Missing expression in a assignement")
#cprint("assignement",'green')
return AssignementNode(IdentifierNode(name.text),sym.text,expr)
def inc_dec(self):
#print(f"enter inc_dec ({self.pos})")
name = self.accept_tag('IDENTIFIERS')
if not name:
#cprint("no inc_dec (no name)",'red')
return None
op = self.accept_tag('INC_DEC_OP')
if not op:
self.pos -= 1 #for the name
#cprint("no inc_dec (no op)",'red')
return None
#cprint("inc_dec",'green')
return IncDecNode(IdentifierNode(name.text),op.text)
def expression(self):
#print(f"enter expression ({self.pos})")
compar = self.compar()
if compar:
#cprint("expression",'green')
return compar
#cprint("no expression",'red')
return None
def compar(self):
calcul1 = self.calcul()
if not calcul1:
return None
op = self.accept_tag('COMPAR_OP')
if op:
calcul2 = self.calcul()
if not calcul2:
raise SyntaxError("missing second term in calcul")
return BinOpNode(calcul1,op.text,calcul2)
return calcul1
def calcul(self):
#print(f"enter calcul ({self.pos})")
"""There is calcul, terme, factor and val for differentiate between +/-, * or /, and **"""
terme1 = self.terme()
if not terme1:
return None
op = None
sym = self.accept_text('+')
if sym:
op = sym
sym = self.accept_text('-')
if sym:
op = sym
if op:
terme2 = self.terme()
if not terme2:
raise SyntaxError("missing second term in calcul")
return BinOpNode(terme1,op.text,terme2)
return terme1
def terme(self):
#print(f"enter terme ({self.pos})")
factor1 = self.factor()
if not factor1:
return None
op = None
sym = self.accept_text('*')
if sym:
op = sym
sym = self.accept_text('/')
if sym:
op = sym
if op:
factor2 = self.factor()
if not factor2:
raise SyntaxError("missing second factor in terme")
return BinOpNode(factor1,op.text,factor2)
return factor1
def factor(self):
#print(f"enter factor ({self.pos})")
val = self.value()
if not val:
return None
op = self.accept_text('**')
if op:
val2 = self.value()
if not val2:
raise SyntaxError("missing second val un factor")
return BinOpNode(val,op.text,val2)
return val
def value(self):
#print(f"enter value ({self.pos})")
name = self.accept_tag('IDENTIFIERS')
if name:
return IdentifierNode(name.text)
number = self.accept_tag('NUMBERS')
if number:
return NumberNode(int(number.text))
if self.accept_text('('):
expr = self.expression()
if not (expr and self.accept_text(')')):
raise SyntaxError("missing expression or matching ')'")
return expr
return None
def while_block(self):
#print(f"enter while_block ({self.pos})")
if not self.accept_text('while'):
#cprint("no while_block (keyword)",'red')
return None
if not self.accept_text('('):
raise SyntaxError("Bad while block")
cond = self.expression()
if not cond:
raise SyntaxError("Bad while block")
if not self.accept_text(')'):
raise SyntaxError("Bad while block")
block = self.block()
if not block:
raise SyntaxError("Bad while block")
#cprint("while_block",'green')
return WhileNode(cond,block)
def if_block(self):
#print(f"enter if_block ({self.pos})")
if not self.accept_text('if'):
#cprint("no if_block (keyword)",'red')
return None
if not self.accept_text('('):
raise SyntaxError("Bad if block")
cond = self.expression()
if not cond:
raise SyntaxError("Bad if block")
if not self.accept_text(')'):
raise SyntaxError("Bad if block")
block = self.block()
if not block:
raise SyntaxError("Bad if block")
if self.accept_text('else'):
else_block = self.block()
if not else_block:
raise SyntaxError("Bad else block")
return IfNode(cond,block,else_block)
#cprint("if_block",'green')
return IfNode(cond,block,None)
def parse(prog):
tokens = clean_tokens(lex(prog))
parser = Parser(tokens)
return parser.program()
#***************
class Ast_to_IR:
def __init__(self,ast):
self.ast = ast
def add(self,val):
self.ir.append(val)
def convert(self):
self.ir = []
vars_ = []
for statemement in self.ast.list_of_statement:
if isinstance(statemement,DeclarationNode):
vars_.extend(statemement.names)
vars_ = {val:index for index,val in enumerate(vars_)}
self.vars = vars_
self.pos = 0
self.add(('INIT',len(vars_)))
self.push_block(self.ast)
return self.ir
def push_block(self,node):
for statement in node.list_of_statement:
func = methods.get(type(statement),default) #switch-case
func(self,statement)
def assignement_to_ir(self,node):
if node.operator == '=':
expression = node.expression
else:
operator = node.operator[0] #ex : '+=' -> '+'
expression = BinOpNode(node.target,operator,node.expression)
self.push_expr(expression)
assign_distance = self.pos + self.vars[node.target]
self.add(('ASSIGN',assign_distance,node.target.name))
self.pos -= 1
def expression_statement_to_ir(self,node):
self.push_expr(node)
self.pos -= 1
self.add(('POP',))
def inc_dec_to_ir(self,node):
assign_distance = self.pos + self.vars[node.name]
if node.operator == "++":
self.add(('INC',assign_distance,node.name.name))
else:
self.add(('DEC',assign_distance,node.name.name))
def push_expr(self,node):
if isinstance(node,NumberNode):
self.pos += 1
self.add(('PUSH_NUMBER',node.value))
return
if isinstance(node,IdentifierNode):
self.pos += 1
distance = self.pos + self.vars[node]
self.add(('PUSH_VAR',distance,node.name))
return
if isinstance(node,BinOpNode):
self.push_expr(node.left)
self.push_expr(node.right)
self.pos -= 1
self.add(('BIN_OP',node.op))
return
raise Exception(f"cette expression n'est pas bonne : {node}")
def while_to_ir(self,node):
self.push_expr(node.condition)
self.add(('WHILE_ENTER',))
self.pos -= 1
self.push_block(node.block)
self.push_expr(node.condition)
self.add(('WHILE_END',))
self.pos -= 1
def if_to_ir(self,node):
self.push_expr(node.condition)
self.add(('IF_ENTER',))
self.pos += 1
self.push_block(node.block)
if node.else_block:
self.add(('ELSE_ENTER',))
self.pos -= 2
self.push_block(node.else_block)
self.add(('ELSE_END',))
else:
self.add(('IF_END',))
self.pos -= 2
methods = {
DeclarationNode: lambda self,node:None,
AssignementNode: Ast_to_IR.assignement_to_ir,
WhileNode: Ast_to_IR.while_to_ir,
IfNode: Ast_to_IR.if_to_ir,
IncDecNode: Ast_to_IR.inc_dec_to_ir,
}
default = Ast_to_IR.expression_statement_to_ir
#************
def init(nb):
return '>' * (nb-1) + f' INIT {nb}'
def assign(val,name):
return f"{'<'*val}[-]{'>'*val}[-{'<'*val}+{'>'*val}]< ASSIGN {name}"
def inc(val,name):
return f"{'<'*val}+{'>'*val} INC {name}"
def dec(val,name):
return f"{'<'*val}-{'>'*val} DEC {name}"
def pop():
return '[-]< POP'
def push_number(val):
return ">" + "+"*val + f" PUSH_NUMBER {val}"
def push_var(val,name):
return '<' * (val-1) + f"[-{'>'*val}+>+<{'<'*val}]" + f"{'>'*val}>[-<{'<'*val}+>{'>'*val}]<" + f" PUSH_VAR {name}"
def bin_op(op):
if op == "+":
bf = "[-<+>]<"
elif op == "-":
bf = "[-<->]<"
else:
raise Exception(f"bin_op {op} not implemented yet")
return bf + f" BIN_OP {op if op not in ('+','-') else {'+':'plus','-':'minus'}[op]}"
def while_enter():
return "[[-]< WHILE_ENTER"
def while_end():
return "]< WHILE_END"
def if_enter():
return ">+<[[-]>- IF_ENTER"
def else_enter():
return "<]>[-<< ELSE_ENTER"
def else_end():
return ">>]<< ELSE_END"
def if_end():
return "<]>[-]<< IF_END"
def compile_ir(ir):
return '\n'.join(globals()[i[0].lower()](*i[1:]) for i in ir)
#**************
def compile(prog):
tokens = lex(prog)
ast = Parser(tokens).parse()
ir = Ast_to_IR(ast).convert()
brainfuck = compile_ir(ir)
return brainfuck