jaredly · usohail-cisco · Jul 3, 2019 · Jul 4, 2019 · Jul 4, 2019 · Jul 4, 2019
diff --git a/codetalker/cgrammar.pyx b/codetalker/cgrammar.pyx
@@ -1,5 +1,6 @@
 # cython: profile=True
 from libc.stdlib cimport malloc, free
+from cpython.version cimport PY_MAJOR_VERSION
 
 from codetalker.pgm.tokens import INDENT, DEDENT, EOF, Token as PyToken, ReToken
 from codetalker.pgm.errors import ParseError, TokenError, AstError
@@ -295,7 +296,7 @@ def get_tokens(gid, text):
 
     cdef Token* tokens
 
-    try_get_tokens(gid, text, &tokens)
+    try_get_tokens(gid, text.encode('utf-8'), &tokens)
 
     pytokens = convert_back_tokens(gid, tokens)
     kill_tokens(tokens)
@@ -325,6 +326,7 @@ def get_parse_tree(gid, text, start_i):
     '''
     cdef Token* tokens
 
+    text = text.encode('latin1')
     try_get_tokens(gid, text, &tokens)
 
     cdef TokenStream tstream = tokens_to_stream(tokens)
@@ -402,6 +404,8 @@ def get_ast(gid, text, start_i, ast_classes, ast_tokens):
     cdef TokenStream tstream
     cdef cParseNode* ptree
 
+    text = text.encode('latin1')
+
     try:
         try_get_tokens(gid, text, &tokens)
 
@@ -478,6 +482,7 @@ cdef Rule convert_rule(object rule, unsigned int i):
     crule.dont_ignore = rule.dont_ignore
     crule.num = len(rule.options)
     crule.options = <RuleOption*>malloc(sizeof(RuleOption)*crule.num)
+    rule.name = rule.name.encode('latin1')
     crule.name = rule.name
     crule.keep_tree = rule.keep_tree
     for i from 0<=i<crule.num:
@@ -562,15 +567,18 @@ cdef object convert_ast_attrs(object ast_attrs, object rules, object tokens, Ast
             continue
         else:
             result[i].pass_single = 0
-        keys = ast_attrs[i]['attrs'].keys()
+        keys = list(ast_attrs[i]['attrs'].keys())
         result[i].num = len(keys)
         if len(keys):
             result[i].attrs = <AstAttr*>malloc(sizeof(AstAttr)*result[i].num);
         else:
             result[i].attrs = NULL
 
         for m from 0<=m<result[i].num:
-             convert_ast_attr(keys[m], ast_attrs[i]['attrs'][keys[m]], rules, tokens, &result[i].attrs[m])
+             key = keys[m]
+             if PY_MAJOR_VERSION >= 3 and isinstance(keys[m], str):
+                 key = keys[m].encode('latin1')
+             convert_ast_attr(key, ast_attrs[i]['attrs'][keys[m]], rules, tokens, &result[i].attrs[m])
 
 cdef object which_rt(object it, object rules, object tokens):
     '''convert an ast type (rule or token object) into the appropriate ID, ready for AST construction.
@@ -848,14 +856,14 @@ cdef Token* _get_tokens(int gid, char* text, cTokenError* error, char* idchars):
             elif tokens[i]._type == RETOKEN:
                 res = tokens[i].check(state.text[state.at:])
             else:
-                print 'Unknown token type', tokens[i]._type, tokens[i]
+                print('Unknown token type', tokens[i]._type, tokens[i])
                  # should this raise an error?
 
             if res:
                 tmp = <Token*>malloc(sizeof(Token))
                 tmp.value = <char*>malloc(sizeof(char)*(res+1))
                 strncpy(tmp.value, state.text + state.at, res)
-                tmp.value[res] = '\0'
+                tmp.value[res] = b'\0'
                 tmp.allocated = 1
                 # print 'got token!', res, state.at, [tmp.value], state.lineno, state.charno
                 tmp.which = i
@@ -897,7 +905,7 @@ cdef Token* advance(int res, Token* current, bint indent, TokenState* state, int
         int ind = 0
         Token* tmp
     for i from state.at <= i < state.at + res:
-        if state.text[i] == '\n':
+        if state.text[i] == b'\n':
             numlines+=1
             last = i
     state.lineno += numlines
@@ -908,7 +916,7 @@ cdef Token* advance(int res, Token* current, bint indent, TokenState* state, int
     if not indent:
         return current
     # if we just consumed a newline, check & update the indents
-    if indent and res == 1 and state.text[state.at] == <char>'\n':
+    if indent and res == 1 and state.text[state.at] == <char>b'\n':
         ind = t_white(state.at + 1, state.text, state.ln)
         if ind < 0:
             return current
@@ -938,7 +946,7 @@ cdef Token* advance(int res, Token* current, bint indent, TokenState* state, int
                 current = tmp
                 cindent = state.indents[state.num_indents - 1]
             if ind != cindent:
-                etxt = 'invalid indentation -- %d (expected %d)' % (ind, cindent)
+                etxt = 'invalid indentation -- {} (expected {})'.format(ind, cindent).encode('latin1')
                 error.text = etxt
                 error.lineno = state.lineno
                 error.charno = state.charno

diff --git a/codetalker/contrib/configparser.py b/codetalker/contrib/configparser.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
 
+from future.utils import lrange
+
 from codetalker.pgm import Grammar, Translator
 from codetalker.pgm.special import star, plus, _or
 from codetalker.pgm.tokens import *
@@ -52,10 +54,10 @@ def get_item(self, section, name, check=()):
         if '%' not in value: # no need to interpolate
             return value
         vbls = {}
-        for i in xrange(1000): # just in case something goes wrong...
+        for i in lrange(1000): # just in case something goes wrong...
             try:
                 return value % vbls
-            except KeyError, e:
+            except KeyError as e:
                 vbls[e.args[0]] = self.get_item(section, e.args[0], check + (name,))
         raise RecursionError('resursive interpolation...')
 

diff --git a/codetalker/contrib/math.py b/codetalker/contrib/math.py
@@ -41,7 +41,7 @@ class SYMBOL(CharToken):
 ast = grammar.ast_classes
 
 import operator
-ops = {'**':operator.pow, '*':operator.mul, '/':operator.div, '%':operator.mod, '+':operator.add, '-':operator.sub}
+ops = {'**':operator.pow, '*':operator.mul, '/':operator.truediv, '%':operator.mod, '+':operator.add, '-':operator.sub}
 
 @m.translates(ast.BinOp)
 def binop(node):

diff --git a/codetalker/pgm/__init__.py b/codetalker/pgm/__init__.py
@@ -1,9 +1,9 @@
 #!/usr/bin/env python
 
-import token
-from grammar import Grammar
-from translator import Translator
-import special
-from tokens import *
+from . import token
+from .grammar import Grammar
+from .translator import Translator
+from . import special
+from .tokens import *
 
 # vim: et sw=4 sts=4
diff --git a/codetalker/pgm/errors.py b/codetalker/pgm/errors.py
@@ -14,11 +14,12 @@ class ParseError(LineError):
 
 class TokenError(LineError):
     def __init__(self, msg, text, lineno, charno):
-        tease = ''
+        tease = b''
         lines = text.splitlines()
         if lineno-1 < len(lines):
             tease = lines[lineno-1][charno-1:charno+30]
-        Exception.__init__(self, msg + ' at (%d, %d) \'%s\'' % (lineno, charno, tease.encode('string_escape')))
+        tease = str(tease)
+        Exception.__init__(self, str(msg) + ' at (%d, %d) \'%s\'' % (lineno, charno, tease.encode('unicode_escape')))
         self.lineno = lineno
         self.charno = charno
     pass

diff --git a/codetalker/pgm/grammar.py b/codetalker/pgm/grammar.py
@@ -1,11 +1,15 @@
-from rules import RuleLoader
-from tokens import EOF, INDENT, DEDENT, Token
-from errors import *
+from __future__ import print_function
+from future.utils import iteritems
 
-from nodes import AstNode, ParseTree, TokenStream
-from logger import logger
 import inspect
 
+from .rules import RuleLoader
+from .tokens import EOF, INDENT, DEDENT, Token
+from .errors import *
+
+from .nodes import AstNode, ParseTree, TokenStream
+from .logger import logger
+
 # from codetalker.pgm.cgrammar.tokenize import tokenize
 # from codetalker.pgm.cgrammar import main
 # from text import Text, IndentText
@@ -48,7 +52,8 @@ def __init__(self, start, tokens=(), ignore=(), idchars='', indent=False, ast_to
                 self.tokens.append(i)
         self.ast_tokens = tuple(self.tokens.index(tok) for tok in ast_tokens)
         self.indent = indent
-        self.idchars = idchars
+        # Note this needs to be reviewed, should we be converting str to bytes or the other way around
+        self.idchars = idchars.encode('latin1')
 
         self.token_rules = []
         self.token_names = []
@@ -96,10 +101,9 @@ def start(rule):
         name = getattr(builder, 'astName', None)
         if name is None:
             name = camelCase(builder.__name__)
-        
+
         rule = RuleLoader(self)
         rule.name = name
-
         self.rule_dict[builder] = num
         self.rules.append(rule)
         self.rule_names.append(name)
@@ -110,7 +114,7 @@ def start(rule):
         if not rule.options:
             raise Exception('no rule options specified in %r' % builder)
         attrs = []
-        for attr, dct in rule.astAttrs.iteritems():
+        for attr, dct in iteritems(rule.astAttrs):
             if type(dct) != dict:
                 dct = {'type':dct}
             if type(dct['type']) not in (tuple, list):
@@ -261,17 +265,20 @@ def to_ast(self, tree):
     def parse_rule(self, rule, tokens, error):
         if rule < 0 or rule >= len(self.rules):
             raise ParseError('invalid rule: %d' % rule)
-        if logger.output:print>>logger, 'parsing for rule', self.rule_names[rule]
+        if logger.output:
+            print('parsing for rule', self.rule_names[rule], file=logger)
         logger.indent += 1
         node = ParseTree(rule, self.rule_names[rule])
         for option in self.rules[rule]:
             res = self.parse_children(rule, option, tokens, error)
             if res is not None:
-                if logger.output:print>>logger, 'yes!',self.rule_names[rule], res
+                if logger.output:
+                    print('yes!', self.rule_names[rule], res, file=logger)
                 logger.indent -= 1
                 node.children = res
                 return node
-        if logger.output:print>>logger, 'failed', self.rule_names[rule]
+        if logger.output:
+            print('failed', self.rule_names[rule], file=logger)
         logger.indent -= 1
         return None
 
@@ -284,7 +291,8 @@ def parse_children(self, rule, children, tokens, error):
                     res.append(tokens.current())
                     tokens.advance()
             current = children[i]
-            if logger.output:print>>logger, 'parsing child',current,i
+            if logger.output:
+                print('parsing child', current, i, file=logger)
             if type(current) == int:
                 if current < 0:
                     ctoken = tokens.current()
@@ -321,7 +329,7 @@ def parse_children(self, rule, children, tokens, error):
                     continue
                 if tokens.at > error[0]:
                     error[0] = tokens.at
-                    error[1] = 'Unexpected token %s; expected \'%s\' (while parsing %s)' % (repr(ctoken), current.encode('string_escape'), self.rule_names[rule])
+                    error[1] = 'Unexpected token %s; expected \'%s\' (while parsing %s)' % (repr(ctoken), str(current).encode('unicode_escape'), self.rule_names[rule])
                 if logger.output:print>>logger, 'FAIL string compare:', [current, tokens.current().value]
                 return None
             elif type(current) == tuple:

diff --git a/codetalker/pgm/nodes.py b/codetalker/pgm/nodes.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 # from tokens import EOF, Token
-from errors import ParseError
+from .errors import ParseError
 
 class TokenStream:
     def __init__(self, tokens):

diff --git a/codetalker/pgm/rules.py b/codetalker/pgm/rules.py
@@ -1,10 +1,10 @@
 #!/usr/bin/env python
 
-from errors import *
-import tokens
-from tokens import Token
+from .errors import *
+from . import tokens
+from .tokens import Token
 import types
-from special import Special
+from .special import Special
 import inspect
 
 class RuleLoader(object):

diff --git a/codetalker/pgm/text.py b/codetalker/pgm/text.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
-from tokens import INDENT, DEDENT
-from errors import *
+from .tokens import INDENT, DEDENT
+from .errors import *
 
 class Text:
     '''a small utility class in charge of serving up

diff --git a/codetalker/pgm/token.py b/codetalker/pgm/token.py
@@ -10,10 +10,12 @@ def __init__(self, value, lineno=-1, charno=-1):
 
     def __repr__(self):
         return u'<%s token "%s" at (%d, %d)>' % (self.__class__.__name__,
-                self.value.encode('string_escape'), self.lineno, self.charno)
+                                                 str(self.value).encode('unicode_escape'),
+                                                 self.lineno,
+                                                 self.charno)
 
     def __str__(self):
-        return self.value
+        return str(self.value.decode('latin1'))
 
     def __eq__(self, other):
         if type(other) in (tuple, list):
@@ -30,7 +32,7 @@ class ReToken(Token):
 
     @classmethod
     def check(cls, text):
-        m = cls.rx.match(text)
+        m = cls.rx.match(text.decode('latin1'))
         if m:
             return len(m.group())
         return 0

diff --git a/codetalker/pgm/tokenize.py b/codetalker/pgm/tokenize.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
-from tokens import Token, EOF
-from errors import TokenError
+from .tokens import Token, EOF
+from .errors import TokenError
 
 def tokenize(tokens, text):
     '''a generator to split some text into tokens'''
@@ -18,7 +18,7 @@ def tokenize(tokens, text):
                 break
         else:
             raise TokenError('no token matches the text at (%d, %d): "%s"' % (text.lineno,
-                text.charno, text.text[text.at:text.at+10].encode('string_escape')))
+                text.charno, str(text.text[text.at:text.at+10]).encode('unicode_escape')))
         text.advance(len(one.value))
 
 # vim: et sw=4 sts=4
diff --git a/codetalker/pgm/tokens.py b/codetalker/pgm/tokens.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-from token import Token, ReToken
+from .token import Token, ReToken
 
 import re
 

diff --git a/codetalker/pgm/translator.py b/codetalker/pgm/translator.py
@@ -1,12 +1,13 @@
 #!/usr/bin/env python
-
-from tokens import Token
 import types
 import inspect
 import copy
-from nodes import AstNode
+from future.utils import iteritems
+
+from .tokens import Token
+from .nodes import AstNode
 
-from errors import CodeTalkerException
+from .errors import CodeTalkerException
 
 class TranslatorException(CodeTalkerException):
     pass
@@ -68,7 +69,7 @@ def from_ast(self, tree, **args):
                 stuff.update(args)
                 Scope = type('Scope', (), {})
                 scope = Scope()
-                for k,v in stuff.iteritems():
+                for k, v in iteritems(stuff):
                     setattr(scope, k, v)
             return self.translate(tree, scope)
         elif args:

diff --git a/codetalker/testing.py b/codetalker/testing.py
@@ -14,7 +14,7 @@ def _fail(string):
             def meta():
                 try:
                     res = grammar.get_parse_tree(string, start=rule)
-                except (ParseError, TokenError), e:
+                except (ParseError, TokenError) as e:
                     pass
                 else:
                     raise AssertionError('parsing was supposed to fail for', string, res)

diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,3 @@
 cython
 pytest
+future
-Original file line number
+Diff line change
@@ -1,2 +1,3 @@
     cython
     pytest
+    future