Skip to content
Open

Py3 #13

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions codetalker/cgrammar.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# cython: profile=True
from libc.stdlib cimport malloc, free
from cpython.version cimport PY_MAJOR_VERSION

from codetalker.pgm.tokens import INDENT, DEDENT, EOF, Token as PyToken, ReToken
from codetalker.pgm.errors import ParseError, TokenError, AstError
Expand Down Expand Up @@ -295,7 +296,7 @@ def get_tokens(gid, text):

cdef Token* tokens

try_get_tokens(gid, text, &tokens)
try_get_tokens(gid, text.encode('utf-8'), &tokens)

pytokens = convert_back_tokens(gid, tokens)
kill_tokens(tokens)
Expand Down Expand Up @@ -325,6 +326,7 @@ def get_parse_tree(gid, text, start_i):
'''
cdef Token* tokens

text = text.encode('latin1')
try_get_tokens(gid, text, &tokens)

cdef TokenStream tstream = tokens_to_stream(tokens)
Expand Down Expand Up @@ -402,6 +404,8 @@ def get_ast(gid, text, start_i, ast_classes, ast_tokens):
cdef TokenStream tstream
cdef cParseNode* ptree

text = text.encode('latin1')

try:
try_get_tokens(gid, text, &tokens)

Expand Down Expand Up @@ -478,6 +482,7 @@ cdef Rule convert_rule(object rule, unsigned int i):
crule.dont_ignore = rule.dont_ignore
crule.num = len(rule.options)
crule.options = <RuleOption*>malloc(sizeof(RuleOption)*crule.num)
rule.name = rule.name.encode('latin1')
crule.name = rule.name
crule.keep_tree = rule.keep_tree
for i from 0<=i<crule.num:
Expand Down Expand Up @@ -562,15 +567,18 @@ cdef object convert_ast_attrs(object ast_attrs, object rules, object tokens, Ast
continue
else:
result[i].pass_single = 0
keys = ast_attrs[i]['attrs'].keys()
keys = list(ast_attrs[i]['attrs'].keys())
result[i].num = len(keys)
if len(keys):
result[i].attrs = <AstAttr*>malloc(sizeof(AstAttr)*result[i].num);
else:
result[i].attrs = NULL

for m from 0<=m<result[i].num:
convert_ast_attr(keys[m], ast_attrs[i]['attrs'][keys[m]], rules, tokens, &result[i].attrs[m])
key = keys[m]
if PY_MAJOR_VERSION >= 3 and isinstance(keys[m], str):
key = keys[m].encode('latin1')
convert_ast_attr(key, ast_attrs[i]['attrs'][keys[m]], rules, tokens, &result[i].attrs[m])

cdef object which_rt(object it, object rules, object tokens):
'''convert an ast type (rule or token object) into the appropriate ID, ready for AST construction.
Expand Down Expand Up @@ -848,14 +856,14 @@ cdef Token* _get_tokens(int gid, char* text, cTokenError* error, char* idchars):
elif tokens[i]._type == RETOKEN:
res = tokens[i].check(state.text[state.at:])
else:
print 'Unknown token type', tokens[i]._type, tokens[i]
print('Unknown token type', tokens[i]._type, tokens[i])
# should this raise an error?

if res:
tmp = <Token*>malloc(sizeof(Token))
tmp.value = <char*>malloc(sizeof(char)*(res+1))
strncpy(tmp.value, state.text + state.at, res)
tmp.value[res] = '\0'
tmp.value[res] = b'\0'
tmp.allocated = 1
# print 'got token!', res, state.at, [tmp.value], state.lineno, state.charno
tmp.which = i
Expand Down Expand Up @@ -897,7 +905,7 @@ cdef Token* advance(int res, Token* current, bint indent, TokenState* state, int
int ind = 0
Token* tmp
for i from state.at <= i < state.at + res:
if state.text[i] == '\n':
if state.text[i] == b'\n':
numlines+=1
last = i
state.lineno += numlines
Expand All @@ -908,7 +916,7 @@ cdef Token* advance(int res, Token* current, bint indent, TokenState* state, int
if not indent:
return current
# if we just consumed a newline, check & update the indents
if indent and res == 1 and state.text[state.at] == <char>'\n':
if indent and res == 1 and state.text[state.at] == <char>b'\n':
ind = t_white(state.at + 1, state.text, state.ln)
if ind < 0:
return current
Expand Down Expand Up @@ -938,7 +946,7 @@ cdef Token* advance(int res, Token* current, bint indent, TokenState* state, int
current = tmp
cindent = state.indents[state.num_indents - 1]
if ind != cindent:
etxt = 'invalid indentation -- %d (expected %d)' % (ind, cindent)
etxt = 'invalid indentation -- {} (expected {})'.format(ind, cindent).encode('latin1')
error.text = etxt
error.lineno = state.lineno
error.charno = state.charno
Expand Down
6 changes: 4 additions & 2 deletions codetalker/contrib/configparser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/usr/bin/env python

from future.utils import lrange

from codetalker.pgm import Grammar, Translator
from codetalker.pgm.special import star, plus, _or
from codetalker.pgm.tokens import *
Expand Down Expand Up @@ -52,10 +54,10 @@ def get_item(self, section, name, check=()):
if '%' not in value: # no need to interpolate
return value
vbls = {}
for i in xrange(1000): # just in case something goes wrong...
for i in lrange(1000): # just in case something goes wrong...
try:
return value % vbls
except KeyError, e:
except KeyError as e:
vbls[e.args[0]] = self.get_item(section, e.args[0], check + (name,))
raise RecursionError('resursive interpolation...')

Expand Down
2 changes: 1 addition & 1 deletion codetalker/contrib/math.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class SYMBOL(CharToken):
ast = grammar.ast_classes

import operator
ops = {'**':operator.pow, '*':operator.mul, '/':operator.div, '%':operator.mod, '+':operator.add, '-':operator.sub}
ops = {'**':operator.pow, '*':operator.mul, '/':operator.truediv, '%':operator.mod, '+':operator.add, '-':operator.sub}

@m.translates(ast.BinOp)
def binop(node):
Expand Down
10 changes: 5 additions & 5 deletions codetalker/pgm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/usr/bin/env python

import token
from grammar import Grammar
from translator import Translator
import special
from tokens import *
from . import token
from .grammar import Grammar
from .translator import Translator
from . import special
from .tokens import *

# vim: et sw=4 sts=4
5 changes: 3 additions & 2 deletions codetalker/pgm/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@ class ParseError(LineError):

class TokenError(LineError):
def __init__(self, msg, text, lineno, charno):
tease = ''
tease = b''
lines = text.splitlines()
if lineno-1 < len(lines):
tease = lines[lineno-1][charno-1:charno+30]
Exception.__init__(self, msg + ' at (%d, %d) \'%s\'' % (lineno, charno, tease.encode('string_escape')))
tease = str(tease)
Exception.__init__(self, str(msg) + ' at (%d, %d) \'%s\'' % (lineno, charno, tease.encode('unicode_escape')))
self.lineno = lineno
self.charno = charno
pass
Expand Down
36 changes: 22 additions & 14 deletions codetalker/pgm/grammar.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
from rules import RuleLoader
from tokens import EOF, INDENT, DEDENT, Token
from errors import *
from __future__ import print_function
from future.utils import iteritems

from nodes import AstNode, ParseTree, TokenStream
from logger import logger
import inspect

from .rules import RuleLoader
from .tokens import EOF, INDENT, DEDENT, Token
from .errors import *

from .nodes import AstNode, ParseTree, TokenStream
from .logger import logger

# from codetalker.pgm.cgrammar.tokenize import tokenize
# from codetalker.pgm.cgrammar import main
# from text import Text, IndentText
Expand Down Expand Up @@ -48,7 +52,8 @@ def __init__(self, start, tokens=(), ignore=(), idchars='', indent=False, ast_to
self.tokens.append(i)
self.ast_tokens = tuple(self.tokens.index(tok) for tok in ast_tokens)
self.indent = indent
self.idchars = idchars
# Note this needs to be reviewed, should we be converting str to bytes or the other way around
self.idchars = idchars.encode('latin1')

self.token_rules = []
self.token_names = []
Expand Down Expand Up @@ -96,10 +101,9 @@ def start(rule):
name = getattr(builder, 'astName', None)
if name is None:
name = camelCase(builder.__name__)

rule = RuleLoader(self)
rule.name = name

self.rule_dict[builder] = num
self.rules.append(rule)
self.rule_names.append(name)
Expand All @@ -110,7 +114,7 @@ def start(rule):
if not rule.options:
raise Exception('no rule options specified in %r' % builder)
attrs = []
for attr, dct in rule.astAttrs.iteritems():
for attr, dct in iteritems(rule.astAttrs):
if type(dct) != dict:
dct = {'type':dct}
if type(dct['type']) not in (tuple, list):
Expand Down Expand Up @@ -261,17 +265,20 @@ def to_ast(self, tree):
def parse_rule(self, rule, tokens, error):
if rule < 0 or rule >= len(self.rules):
raise ParseError('invalid rule: %d' % rule)
if logger.output:print>>logger, 'parsing for rule', self.rule_names[rule]
if logger.output:
print('parsing for rule', self.rule_names[rule], file=logger)
logger.indent += 1
node = ParseTree(rule, self.rule_names[rule])
for option in self.rules[rule]:
res = self.parse_children(rule, option, tokens, error)
if res is not None:
if logger.output:print>>logger, 'yes!',self.rule_names[rule], res
if logger.output:
print('yes!', self.rule_names[rule], res, file=logger)
logger.indent -= 1
node.children = res
return node
if logger.output:print>>logger, 'failed', self.rule_names[rule]
if logger.output:
print('failed', self.rule_names[rule], file=logger)
logger.indent -= 1
return None

Expand All @@ -284,7 +291,8 @@ def parse_children(self, rule, children, tokens, error):
res.append(tokens.current())
tokens.advance()
current = children[i]
if logger.output:print>>logger, 'parsing child',current,i
if logger.output:
print('parsing child', current, i, file=logger)
if type(current) == int:
if current < 0:
ctoken = tokens.current()
Expand Down Expand Up @@ -321,7 +329,7 @@ def parse_children(self, rule, children, tokens, error):
continue
if tokens.at > error[0]:
error[0] = tokens.at
error[1] = 'Unexpected token %s; expected \'%s\' (while parsing %s)' % (repr(ctoken), current.encode('string_escape'), self.rule_names[rule])
error[1] = 'Unexpected token %s; expected \'%s\' (while parsing %s)' % (repr(ctoken), str(current).encode('unicode_escape'), self.rule_names[rule])
if logger.output:print>>logger, 'FAIL string compare:', [current, tokens.current().value]
return None
elif type(current) == tuple:
Expand Down
2 changes: 1 addition & 1 deletion codetalker/pgm/nodes.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python

# from tokens import EOF, Token
from errors import ParseError
from .errors import ParseError

class TokenStream:
def __init__(self, tokens):
Expand Down
8 changes: 4 additions & 4 deletions codetalker/pgm/rules.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/usr/bin/env python

from errors import *
import tokens
from tokens import Token
from .errors import *
from . import tokens
from .tokens import Token
import types
from special import Special
from .special import Special
import inspect

class RuleLoader(object):
Expand Down
4 changes: 2 additions & 2 deletions codetalker/pgm/text.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
from tokens import INDENT, DEDENT
from errors import *
from .tokens import INDENT, DEDENT
from .errors import *

class Text:
'''a small utility class in charge of serving up
Expand Down
8 changes: 5 additions & 3 deletions codetalker/pgm/token.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ def __init__(self, value, lineno=-1, charno=-1):

def __repr__(self):
return u'<%s token "%s" at (%d, %d)>' % (self.__class__.__name__,
self.value.encode('string_escape'), self.lineno, self.charno)
str(self.value).encode('unicode_escape'),
self.lineno,
self.charno)

def __str__(self):
return self.value
return str(self.value.decode('latin1'))

def __eq__(self, other):
if type(other) in (tuple, list):
Expand All @@ -30,7 +32,7 @@ class ReToken(Token):

@classmethod
def check(cls, text):
m = cls.rx.match(text)
m = cls.rx.match(text.decode('latin1'))
if m:
return len(m.group())
return 0
Expand Down
6 changes: 3 additions & 3 deletions codetalker/pgm/tokenize.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python

from tokens import Token, EOF
from errors import TokenError
from .tokens import Token, EOF
from .errors import TokenError

def tokenize(tokens, text):
'''a generator to split some text into tokens'''
Expand All @@ -18,7 +18,7 @@ def tokenize(tokens, text):
break
else:
raise TokenError('no token matches the text at (%d, %d): "%s"' % (text.lineno,
text.charno, text.text[text.at:text.at+10].encode('string_escape')))
text.charno, str(text.text[text.at:text.at+10]).encode('unicode_escape')))
text.advance(len(one.value))

# vim: et sw=4 sts=4
2 changes: 1 addition & 1 deletion codetalker/pgm/tokens.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

from token import Token, ReToken
from .token import Token, ReToken

import re

Expand Down
11 changes: 6 additions & 5 deletions codetalker/pgm/translator.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
#!/usr/bin/env python

from tokens import Token
import types
import inspect
import copy
from nodes import AstNode
from future.utils import iteritems

from .tokens import Token
from .nodes import AstNode

from errors import CodeTalkerException
from .errors import CodeTalkerException

class TranslatorException(CodeTalkerException):
pass
Expand Down Expand Up @@ -68,7 +69,7 @@ def from_ast(self, tree, **args):
stuff.update(args)
Scope = type('Scope', (), {})
scope = Scope()
for k,v in stuff.iteritems():
for k, v in iteritems(stuff):
setattr(scope, k, v)
return self.translate(tree, scope)
elif args:
Expand Down
2 changes: 1 addition & 1 deletion codetalker/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def _fail(string):
def meta():
try:
res = grammar.get_parse_tree(string, start=rule)
except (ParseError, TokenError), e:
except (ParseError, TokenError) as e:
pass
else:
raise AssertionError('parsing was supposed to fail for', string, res)
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
cython
pytest
future
Loading