From 550254aede01254bd286da0f73c5b27118a57b0b Mon Sep 17 00:00:00 2001
From: Andrei Aaron <andaaron@cisco.com>
Date: Wed, 3 Jul 2019 17:29:25 +0300
Subject: [PATCH 01/15] Convert test execution command to work with both
 Python2 and Python3

---
 requirements.txt |  1 +
 setup.py         |  4 ++--
 test_cmd.py      | 14 +++++++-------
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index c9fe344..01d3bf2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
 cython
 pytest
+future
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 42f6c23..7c76bb0 100755
--- a/setup.py
+++ b/setup.py
@@ -4,13 +4,13 @@
     from distutils.core import setup
     from distutils.extension import Extension
 except ImportError:
-    print 'distutils is required to install this module. If you have pip installed, run: pip instal distutils'
+    print('distutils is required to install this module. If you have pip installed, run: pip instal distutils')
     raise
 
 try:
     from Cython.Distutils import build_ext
 except ImportError:
-    print 'Cython is required to install this module'
+    print('Cython is required to install this module')
     raise
 
 import os
diff --git a/test_cmd.py b/test_cmd.py
index fbb3f3a..6408c2c 100644
--- a/test_cmd.py
+++ b/test_cmd.py
@@ -1,9 +1,9 @@
 from distutils.core import Command
 from distutils.errors import DistutilsOptionError
-from distutils.fancy_getopt import longopt_xlate
-import string
 import sys
 from unittest import TestLoader, main
+from future.utils import itervalues
+from future.types.newstr import newstr
 
 uninitialized = object()
 
@@ -20,9 +20,9 @@ class test(Command):
 
     def initialize_options(self):
         self.test_type = 'py.test'
-        for (_,_,_,_,options) in self.test_commands.values():
+        for (_, _, _, _, options) in list(itervalues(self.test_commands)):
             for option in options:
-                name = string.translate(option[0], longopt_xlate).rstrip('=')
+                name = newstr(option[0]).translate(newstr.maketrans('-', '_')).rstrip('=')
                 setattr(self, name, uninitialized)
 
     @classmethod
@@ -42,7 +42,7 @@ def finalize_options(self):
             validate(self)
         else:
             for option in options:
-                name = string.translate(option[0], longopt_xlate).rstrip('=')
+                name = newstr(option[0]).translate(newstr.maketrans('-', '_')).rstrip('=')
                 value = getattr(self, name,)
                 if value is uninitialized:
                     if name in defaults:
@@ -119,13 +119,13 @@ def add_dir(dr):
     if py:
         py.test.cmdline.main(test_files)
     else:
-        print 'WARNING: py.test not found. falling back to unittest. For more informative errors, install py.test'
+        print('WARNING: py.test not found. falling back to unittest. For more informative errors, install py.test')
         import unittest
         suite = unittest.TestSuite()
         for filen in test_files:
             mod = get_pyfile(filen)
             suite.addTest(make_testcase(filen,
-                (fn for fn in mod.__dict__.values() if getattr(fn, '__name__', '').startswith('test_'))
+                (fn for fn in list(itervalues(mod.__dict__)) if getattr(fn, '__name__', '').startswith('test_'))
             ))
         t = unittest.TextTestRunner()
         t.run(suite)

From 3b9bd96817125ebacb5f3543c78336428c97125d Mon Sep 17 00:00:00 2001
From: Andrei Aaron <andaaron@cisco.com>
Date: Thu, 4 Jul 2019 18:46:11 +0300
Subject: [PATCH 02/15] Fix relative imports for Python 3

---
 codetalker/pgm/__init__.py   | 10 +++++-----
 codetalker/pgm/grammar.py    | 10 +++++-----
 codetalker/pgm/nodes.py      |  2 +-
 codetalker/pgm/rules.py      |  8 ++++----
 codetalker/pgm/text.py       |  4 ++--
 codetalker/pgm/tokenize.py   |  4 ++--
 codetalker/pgm/tokens.py     |  2 +-
 codetalker/pgm/translator.py |  6 +++---
 tests/tokenize/ctokens.py    |  5 +++++
 9 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/codetalker/pgm/__init__.py b/codetalker/pgm/__init__.py
index 26fa60a..dc9db6f 100644
--- a/codetalker/pgm/__init__.py
+++ b/codetalker/pgm/__init__.py
@@ -1,9 +1,9 @@
 #!/usr/bin/env python
 
-import token
-from grammar import Grammar
-from translator import Translator
-import special
-from tokens import *
+from . import token
+from .grammar import Grammar
+from .translator import Translator
+from . import special
+from .tokens import *
 
 # vim: et sw=4 sts=4
diff --git a/codetalker/pgm/grammar.py b/codetalker/pgm/grammar.py
index 4a6ad07..eff3733 100644
--- a/codetalker/pgm/grammar.py
+++ b/codetalker/pgm/grammar.py
@@ -1,9 +1,9 @@
-from rules import RuleLoader
-from tokens import EOF, INDENT, DEDENT, Token
-from errors import *
+from .rules import RuleLoader
+from .tokens import EOF, INDENT, DEDENT, Token
+from .errors import *
 
-from nodes import AstNode, ParseTree, TokenStream
-from logger import logger
+from .nodes import AstNode, ParseTree, TokenStream
+from .logger import logger
 import inspect
 
 # from codetalker.pgm.cgrammar.tokenize import tokenize
diff --git a/codetalker/pgm/nodes.py b/codetalker/pgm/nodes.py
index 1a63577..94a0f4b 100644
--- a/codetalker/pgm/nodes.py
+++ b/codetalker/pgm/nodes.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 # from tokens import EOF, Token
-from errors import ParseError
+from .errors import ParseError
 
 class TokenStream:
     def __init__(self, tokens):
diff --git a/codetalker/pgm/rules.py b/codetalker/pgm/rules.py
index 011ea1d..b24d970 100644
--- a/codetalker/pgm/rules.py
+++ b/codetalker/pgm/rules.py
@@ -1,10 +1,10 @@
 #!/usr/bin/env python
 
-from errors import *
-import tokens
-from tokens import Token
+from .errors import *
+from . import tokens
+from .tokens import Token
 import types
-from special import Special
+from .special import Special
 import inspect
 
 class RuleLoader(object):
diff --git a/codetalker/pgm/text.py b/codetalker/pgm/text.py
index fc3a4b2..45e66d5 100644
--- a/codetalker/pgm/text.py
+++ b/codetalker/pgm/text.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
-from tokens import INDENT, DEDENT
-from errors import *
+from .tokens import INDENT, DEDENT
+from .errors import *
 
 class Text:
     '''a small utility class in charge of serving up
diff --git a/codetalker/pgm/tokenize.py b/codetalker/pgm/tokenize.py
index 307ea3a..1d4d095 100644
--- a/codetalker/pgm/tokenize.py
+++ b/codetalker/pgm/tokenize.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
-from tokens import Token, EOF
-from errors import TokenError
+from .tokens import Token, EOF
+from .errors import TokenError
 
 def tokenize(tokens, text):
     '''a generator to split some text into tokens'''
diff --git a/codetalker/pgm/tokens.py b/codetalker/pgm/tokens.py
index ed4a020..8761364 100644
--- a/codetalker/pgm/tokens.py
+++ b/codetalker/pgm/tokens.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-from token import Token, ReToken
+from .token import Token, ReToken
 
 import re
 
diff --git a/codetalker/pgm/translator.py b/codetalker/pgm/translator.py
index fba2c0f..4a6deca 100644
--- a/codetalker/pgm/translator.py
+++ b/codetalker/pgm/translator.py
@@ -1,12 +1,12 @@
 #!/usr/bin/env python
 
-from tokens import Token
+from .tokens import Token
 import types
 import inspect
 import copy
-from nodes import AstNode
+from .nodes import AstNode
 
-from errors import CodeTalkerException
+from .errors import CodeTalkerException
 
 class TranslatorException(CodeTalkerException):
     pass
diff --git a/tests/tokenize/ctokens.py b/tests/tokenize/ctokens.py
index 97354d1..ccca8d7 100644
--- a/tests/tokenize/ctokens.py
+++ b/tests/tokenize/ctokens.py
@@ -1,5 +1,10 @@
 #!/usr/bin/env python
 
+# Workaround for relative imports not working outside package in Python 3
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+
 from util import just_tokenize, make_tests, make_fails, TSTRING, STRING, SSTRING, ID, WHITE, NUMBER, INT, HEX, CCOMMENT, CMCOMMENT, PYCOMMENT, NEWLINE, ANY
 
 def make_single(tok, *tests):

From 4115b797c8d33d2e6aec0bb324c8f9f48df39cbb Mon Sep 17 00:00:00 2001
From: Andrei Aaron <andaaron@cisco.com>
Date: Thu, 4 Jul 2019 19:00:05 +0300
Subject: [PATCH 03/15] Python 3 compatibility for tests

---
 codetalker/testing.py      |  2 +-
 tests/data/getcexamples.py |  6 +++---
 tests/data/test.py         | 20 ++++++++++----------
 tests/parse/indent.py      |  6 +++---
 tests/parse/maths.py       |  6 +++---
 tests/parse/multi_ast.py   |  6 +++---
 tests/parse/noignore.py    |  6 +++---
 tests/parse/not.py         |  6 +++---
 tests/parse/segfixes.py    |  6 +++---
 tests/parse/small.py       |  4 ++--
 tests/tokenize/util.py     |  2 +-
 11 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/codetalker/testing.py b/codetalker/testing.py
index 52da75e..c4635bb 100644
--- a/codetalker/testing.py
+++ b/codetalker/testing.py
@@ -14,7 +14,7 @@ def _fail(string):
             def meta():
                 try:
                     res = grammar.get_parse_tree(string, start=rule)
-                except (ParseError, TokenError), e:
+                except (ParseError, TokenError) as e:
                     pass
                 else:
                     raise AssertionError('parsing was supposed to fail for', string, res)
diff --git a/tests/data/getcexamples.py b/tests/data/getcexamples.py
index eafe863..d14d588 100644
--- a/tests/data/getcexamples.py
+++ b/tests/data/getcexamples.py
@@ -1,12 +1,12 @@
 #!/usr/bin/env python
-from urllib import urlopen as upen
+from future.moves.urllib.request import urlopen as upen
 import re
 
 def get_code(num):
-    print 'get page...'
+    print('get page...')
     url = 'http://www.c.happycodings.com/code_snippets/code%d.html' % num
     text = upen(url).read()
-    print 'got'
+    print('got')
     code = re.findall('<TEXTAREA[^>]*>(.+?)</TEXTAREA>', text, re.S)
     return code[0]
 
diff --git a/tests/data/test.py b/tests/data/test.py
index 4ac7196..23845d1 100644
--- a/tests/data/test.py
+++ b/tests/data/test.py
@@ -7,27 +7,27 @@ def main():
     small = '1 2 manhatten; ()'
     nodes, i, const = parser.parse(small, c.tokens)
     if len(nodes)!=1:
-        print 'bad node length',nodes
+        print('bad node length', nodes)
         sys.exit(1)
     if i!=len(small):
-        print 'not everything was parsed'
-        print str(nodes[0])
-        print const['error'],const['pos']
+        print('not everything was parsed')
+        print(str(nodes[0]))
+        print(const['error'], const['pos'])
         sys.exit(1)
     if str(nodes[0]) != small:
-        print 'parsed badly:\ninput:\t"%s"\nparsed:\t"%s"' % (small, nodes[o])
+        print('parsed badly:\ninput:\t"%s"\nparsed:\t"%s"' % (small, nodes[0]))
         sys.exit(1)
 
     nodes, i, const = parser.parse(text, c.tokens)
     if len(nodes) != 1:
-        print 'bad node length',nodes
+        print('bad node length', nodes)
         sys.exit(1)
     if i != len(text):
-        print 'not everything was parsed'
-        print str(nodes[0])
-        print const['error'],const['pos']
+        print('not everything was parsed')
+        print(str(nodes[0]))
+        print(const['error'], const['pos'])
         sys.exit(1)
-    print 'all test were successful'
+    print('all test were successful')
 
 
 if __name__=='__main__':
diff --git a/tests/parse/indent.py b/tests/parse/indent.py
index bf7cf98..ea2372a 100644
--- a/tests/parse/indent.py
+++ b/tests/parse/indent.py
@@ -22,9 +22,9 @@ def test_dedent():
 if __name__ == '__main__':
     for name, fn in globals().items():
         if name.startswith('test_'):
-            print 'testing', fn
+            print('testing', fn)
             fn()
-            print 'test passed'
-    print 'Finished!'
+            print('test passed')
+    print('Finished!')
 
 # vim: et sw=4 sts=4
diff --git a/tests/parse/maths.py b/tests/parse/maths.py
index 3f8075a..13e4a2b 100644
--- a/tests/parse/maths.py
+++ b/tests/parse/maths.py
@@ -37,9 +37,9 @@ def test_8():
 if __name__ == '__main__':
     for name, fn in sorted(globals().items()):
         if name.startswith('test_'):
-            print 'testing', name
+            print('testing', name)
             fn()
-            print 'test passed'
-    print 'Finished!'
+            print('test passed')
+    print('Finished!')
 
 # vim: et sw=4 sts=4
diff --git a/tests/parse/multi_ast.py b/tests/parse/multi_ast.py
index 2917c13..e1a2914 100644
--- a/tests/parse/multi_ast.py
+++ b/tests/parse/multi_ast.py
@@ -34,7 +34,7 @@ def start3(rule):
 def test_three():
     try:
         g3 = pgm.Grammar(start=start3, tokens=[], ignore=[])
-    except AstError, e:
+    except AstError as e:
         pass
     else:
         raise AssertionError('was supposed to fail -- invalid ast type')
@@ -43,8 +43,8 @@ def test_three():
     for name, fn in globals().items():
         if name.startswith('test_'):
             fn()
-            print 'test passed'
-    print 'Finished!'
+            print('test passed')
+    print('Finished!')
 
 
 # vim: et sw=4 sts=4
diff --git a/tests/parse/noignore.py b/tests/parse/noignore.py
index 3778a02..30022b2 100644
--- a/tests/parse/noignore.py
+++ b/tests/parse/noignore.py
@@ -29,10 +29,10 @@ def at(rule):
 if __name__ == '__main__':
     for name, fn in sorted(globals().items()):
         if name.startswith('test_'):
-            print 'testing', name
+            print('testing', name)
             fn()
-            print 'test passed'
-    print 'Finished!'
+            print('test passed')
+    print('Finished!')
 
 
 
diff --git a/tests/parse/not.py b/tests/parse/not.py
index 519631c..feeaf4e 100644
--- a/tests/parse/not.py
+++ b/tests/parse/not.py
@@ -31,9 +31,9 @@ def at(rule):
 if __name__ == '__main__':
     for name, fn in sorted(globals().items()):
         if name.startswith('test_'):
-            print 'testing', name
+            print('testing', name)
             fn()
-            print 'test passed'
-    print 'Finished!'
+            print('test passed')
+    print('Finished!')
 
 # vim: et sw=4 sts=4
diff --git a/tests/parse/segfixes.py b/tests/parse/segfixes.py
index 6e79ba6..6fdee0c 100644
--- a/tests/parse/segfixes.py
+++ b/tests/parse/segfixes.py
@@ -34,10 +34,10 @@ def at(rule):
 if __name__ == '__main__':
     for name, fn in sorted(globals().items()):
         if name.startswith('test_'):
-            print 'testing', name
+            print('testing', name)
             fn()
-            print 'test passed'
-    print 'Finished!'
+            print('test passed')
+    print('Finished!')
 
 
 # vim: et sw=4 sts=4
diff --git a/tests/parse/small.py b/tests/parse/small.py
index 38d1546..cfc07ed 100644
--- a/tests/parse/small.py
+++ b/tests/parse/small.py
@@ -22,7 +22,7 @@ def test_one():
     for name, fn in globals().items():
         if name.startswith('test_'):
             fn()
-            print 'test passed'
-    print 'Finished!'
+            print('test passed')
+    print('Finished!')
 
 # vim: et sw=4 sts=4
diff --git a/tests/tokenize/util.py b/tests/tokenize/util.py
index 832997e..6469bb0 100644
--- a/tests/tokenize/util.py
+++ b/tests/tokenize/util.py
@@ -41,7 +41,7 @@ def meta():
     return meta
 
 def make_tests(globs, name, tokenize, tests):
-    print 'hi'
+    print('hi')
     for i, (string, expected) in enumerate(tests):
         globs['test %s #%d' % (name, i)] = make_test(tokenize, string, expected)
 

From 6ecfd6e3d000d11002d84512e39b38da984a64db Mon Sep 17 00:00:00 2001
From: Andrei Aaron <andaaron@cisco.com>
Date: Thu, 4 Jul 2019 19:29:43 +0300
Subject: [PATCH 04/15] Python 3 compatibility for configparser.py

---
 codetalker/contrib/configparser.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/codetalker/contrib/configparser.py b/codetalker/contrib/configparser.py
index 2e0296d..3d27b92 100644
--- a/codetalker/contrib/configparser.py
+++ b/codetalker/contrib/configparser.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
 
+from future.utils import lrange
+
 from codetalker.pgm import Grammar, Translator
 from codetalker.pgm.special import star, plus, _or
 from codetalker.pgm.tokens import *
@@ -52,10 +54,10 @@ def get_item(self, section, name, check=()):
         if '%' not in value: # no need to interpolate
             return value
         vbls = {}
-        for i in xrange(1000): # just in case something goes wrong...
+        for i in lrange(1000): # just in case something goes wrong...
             try:
                 return value % vbls
-            except KeyError, e:
+            except KeyError as e:
                 vbls[e.args[0]] = self.get_item(section, e.args[0], check + (name,))
         raise RecursionError('resursive interpolation...')
 

From 743c0bd211deccf3d11cf30c0ede127bad888f88 Mon Sep 17 00:00:00 2001
From: Andrei Aaron <andaaron@cisco.com>
Date: Thu, 4 Jul 2019 20:23:48 +0300
Subject: [PATCH 05/15] Fix iteritems

---
 codetalker/pgm/grammar.py    | 6 ++++--
 codetalker/pgm/translator.py | 7 ++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/codetalker/pgm/grammar.py b/codetalker/pgm/grammar.py
index eff3733..a0a905d 100644
--- a/codetalker/pgm/grammar.py
+++ b/codetalker/pgm/grammar.py
@@ -1,10 +1,12 @@
+import inspect
+from future.utils import iteritems
+
 from .rules import RuleLoader
 from .tokens import EOF, INDENT, DEDENT, Token
 from .errors import *
 
 from .nodes import AstNode, ParseTree, TokenStream
 from .logger import logger
-import inspect
 
 # from codetalker.pgm.cgrammar.tokenize import tokenize
 # from codetalker.pgm.cgrammar import main
@@ -110,7 +112,7 @@ def start(rule):
         if not rule.options:
             raise Exception('no rule options specified in %r' % builder)
         attrs = []
-        for attr, dct in rule.astAttrs.iteritems():
+        for attr, dct in iteritems(rule.astAttrs):
             if type(dct) != dict:
                 dct = {'type':dct}
             if type(dct['type']) not in (tuple, list):
diff --git a/codetalker/pgm/translator.py b/codetalker/pgm/translator.py
index 4a6deca..9c1afa0 100644
--- a/codetalker/pgm/translator.py
+++ b/codetalker/pgm/translator.py
@@ -1,9 +1,10 @@
 #!/usr/bin/env python
-
-from .tokens import Token
 import types
 import inspect
 import copy
+from future.utils import iteritems
+
+from .tokens import Token
 from .nodes import AstNode
 
 from .errors import CodeTalkerException
@@ -68,7 +69,7 @@ def from_ast(self, tree, **args):
                 stuff.update(args)
                 Scope = type('Scope', (), {})
                 scope = Scope()
-                for k,v in stuff.iteritems():
+                for k, v in iteritems(stuff):
                     setattr(scope, k, v)
             return self.translate(tree, scope)
         elif args:

From 24c26c9d68579d5eb9ef7b0d6003d6cf8925f964 Mon Sep 17 00:00:00 2001
From: Andrei Aaron <andaaron@cisco.com>
Date: Thu, 4 Jul 2019 23:07:09 +0300
Subject: [PATCH 06/15] Partial work on converting cython code, attempts to
 solve str to bytes conversions

---
 codetalker/cgrammar.pyx   |  2 +-
 codetalker/pgm/grammar.py | 23 +++++++++++++++--------
 codetalker/pgm/rules.py   |  2 +-
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/codetalker/cgrammar.pyx b/codetalker/cgrammar.pyx
index d03c13e..36315af 100644
--- a/codetalker/cgrammar.pyx
+++ b/codetalker/cgrammar.pyx
@@ -562,7 +562,7 @@ cdef object convert_ast_attrs(object ast_attrs, object rules, object tokens, Ast
             continue
         else:
             result[i].pass_single = 0
-        keys = ast_attrs[i]['attrs'].keys()
+        keys = list(ast_attrs[i]['attrs'].keys())
         result[i].num = len(keys)
         if len(keys):
             result[i].attrs = <AstAttr*>malloc(sizeof(AstAttr)*result[i].num);
diff --git a/codetalker/pgm/grammar.py b/codetalker/pgm/grammar.py
index a0a905d..0b60072 100644
--- a/codetalker/pgm/grammar.py
+++ b/codetalker/pgm/grammar.py
@@ -1,6 +1,8 @@
-import inspect
+from __future__ import print_function
 from future.utils import iteritems
 
+import inspect
+
 from .rules import RuleLoader
 from .tokens import EOF, INDENT, DEDENT, Token
 from .errors import *
@@ -50,7 +52,8 @@ def __init__(self, start, tokens=(), ignore=(), idchars='', indent=False, ast_to
                 self.tokens.append(i)
         self.ast_tokens = tuple(self.tokens.index(tok) for tok in ast_tokens)
         self.indent = indent
-        self.idchars = idchars
+        # Note this needs to be reviewed, should we be converting str to bytes or the other way around
+        self.idchars = idchars.encode()
 
         self.token_rules = []
         self.token_names = []
@@ -98,9 +101,9 @@ def start(rule):
         name = getattr(builder, 'astName', None)
         if name is None:
             name = camelCase(builder.__name__)
-        
+
         rule = RuleLoader(self)
-        rule.name = name
+        rule.name = name.encode()
 
         self.rule_dict[builder] = num
         self.rules.append(rule)
@@ -263,17 +266,20 @@ def to_ast(self, tree):
     def parse_rule(self, rule, tokens, error):
         if rule < 0 or rule >= len(self.rules):
             raise ParseError('invalid rule: %d' % rule)
-        if logger.output:print>>logger, 'parsing for rule', self.rule_names[rule]
+        if logger.output:
+            print('parsing for rule', self.rule_names[rule], file=logger)
         logger.indent += 1
         node = ParseTree(rule, self.rule_names[rule])
         for option in self.rules[rule]:
             res = self.parse_children(rule, option, tokens, error)
             if res is not None:
-                if logger.output:print>>logger, 'yes!',self.rule_names[rule], res
+                if logger.output:
+                    print('yes!', self.rule_names[rule], res, file=logger)
                 logger.indent -= 1
                 node.children = res
                 return node
-        if logger.output:print>>logger, 'failed', self.rule_names[rule]
+        if logger.output:
+            print('failed', self.rule_names[rule], file=logger)
         logger.indent -= 1
         return None
     
@@ -286,7 +292,8 @@ def parse_children(self, rule, children, tokens, error):
                     res.append(tokens.current())
                     tokens.advance()
             current = children[i]
-            if logger.output:print>>logger, 'parsing child',current,i
+            if logger.output:
+                print('parsing child', current, i, file=logger)
             if type(current) == int:
                 if current < 0:
                     ctoken = tokens.current()
diff --git a/codetalker/pgm/rules.py b/codetalker/pgm/rules.py
index b24d970..4790f19 100644
--- a/codetalker/pgm/rules.py
+++ b/codetalker/pgm/rules.py
@@ -27,7 +27,7 @@ def add_option(self, other):
 
     def process(self, what):
         if type(what) == str:
-            return [what]
+            return [what.encode()]
         elif inspect.isclass(what) and issubclass(what, Token):
             if what not in self.grammar.tokens and what not in self.grammar.special_tokens:
                 # print 'adding', what

From a8e2526071aea9a58b6fe280c22331552f68a098 Mon Sep 17 00:00:00 2001
From: Andrei Aaron <andaaron@cisco.com>
Date: Tue, 9 Jul 2019 18:00:27 +0300
Subject: [PATCH 07/15] Partially fix conversion from str to bytes for python3

---
 codetalker/cgrammar.pyx   | 7 ++++++-
 codetalker/pgm/grammar.py | 3 +--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/codetalker/cgrammar.pyx b/codetalker/cgrammar.pyx
index 36315af..4c917a2 100644
--- a/codetalker/cgrammar.pyx
+++ b/codetalker/cgrammar.pyx
@@ -1,5 +1,6 @@
 # cython: profile=True
 from libc.stdlib cimport malloc, free
+from cpython.version cimport PY_MAJOR_VERSION
 
 from codetalker.pgm.tokens import INDENT, DEDENT, EOF, Token as PyToken, ReToken
 from codetalker.pgm.errors import ParseError, TokenError, AstError
@@ -478,6 +479,7 @@ cdef Rule convert_rule(object rule, unsigned int i):
     crule.dont_ignore = rule.dont_ignore
     crule.num = len(rule.options)
     crule.options = <RuleOption*>malloc(sizeof(RuleOption)*crule.num)
+    rule.name = rule.name.encode()
     crule.name = rule.name
     crule.keep_tree = rule.keep_tree
     for i from 0<=i<crule.num:
@@ -570,7 +572,10 @@ cdef object convert_ast_attrs(object ast_attrs, object rules, object tokens, Ast
             result[i].attrs = NULL
 
         for m from 0<=m<result[i].num:
-             convert_ast_attr(keys[m], ast_attrs[i]['attrs'][keys[m]], rules, tokens, &result[i].attrs[m])
+             key = keys[m]
+             if PY_MAJOR_VERSION >= 3 and isinstance(keys[m], str):
+                 key = keys[m].encode()
+             convert_ast_attr(key, ast_attrs[i]['attrs'][keys[m]], rules, tokens, &result[i].attrs[m])
 
 cdef object which_rt(object it, object rules, object tokens):
     '''convert an ast type (rule or token object) into the appropriate ID, ready for AST construction.
diff --git a/codetalker/pgm/grammar.py b/codetalker/pgm/grammar.py
index 0b60072..7cdaf4a 100644
--- a/codetalker/pgm/grammar.py
+++ b/codetalker/pgm/grammar.py
@@ -103,8 +103,7 @@ def start(rule):
             name = camelCase(builder.__name__)
 
         rule = RuleLoader(self)
-        rule.name = name.encode()
-
+        rule.name = name
         self.rule_dict[builder] = num
         self.rules.append(rule)
         self.rule_names.append(name)

From 49cce1e8bc3c6f23064e43269a02b248fbdba778 Mon Sep 17 00:00:00 2001
From: Andrei Aaron <andaaron@cisco.com>
Date: Tue, 9 Jul 2019 18:39:06 +0300
Subject: [PATCH 08/15] Fix operator.div

---
 codetalker/contrib/math.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codetalker/contrib/math.py b/codetalker/contrib/math.py
index 2fe578c..ec279a4 100644
--- a/codetalker/contrib/math.py
+++ b/codetalker/contrib/math.py
@@ -41,7 +41,7 @@ class SYMBOL(CharToken):
 ast = grammar.ast_classes
 
 import operator
-ops = {'**':operator.pow, '*':operator.mul, '/':operator.div, '%':operator.mod, '+':operator.add, '-':operator.sub}
+ops = {'**':operator.pow, '*':operator.mul, '/':operator.truediv, '%':operator.mod, '+':operator.add, '-':operator.sub}
 
 @m.translates(ast.BinOp)
 def binop(node):

From 09db17e3decdc197928cf50e0625cd04a512c53f Mon Sep 17 00:00:00 2001
From: Andrei Aaron <andaaron@cisco.com>
Date: Thu, 11 Jul 2019 19:17:44 +0300
Subject: [PATCH 09/15] Update setup.py with debug and language level settings

---
 setup.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 7c76bb0..3b2c465 100755
--- a/setup.py
+++ b/setup.py
@@ -9,11 +9,13 @@
 
 try:
     from Cython.Distutils import build_ext
+    from Cython.Build import cythonize
 except ImportError:
     print('Cython is required to install this module')
     raise
 
 import os
+import sys
 import glob
 
 try:
@@ -50,7 +52,8 @@
     requires=['cython'],
 
     cmdclass = {'build_ext': build_ext , 'test':test},
-    ext_modules = pyx_mods,
+    #ext_modules = pyx_mods,
+    ext_modules = cythonize(pyx_mods, gdb_debug=True, compiler_directives={'language_level': sys.version_info[0]}),
     include_dirs = 'codetalker',
     packages = ['codetalker', 'codetalker.pgm', 'codetalker.contrib'],
 )

From 03f3a7be6f09be98fc54eb3f0dedbdbc4b08e511 Mon Sep 17 00:00:00 2001
From: Andrei Aaron <andaaron@cisco.com>
Date: Thu, 11 Jul 2019 20:39:10 +0300
Subject: [PATCH 10/15] Fix segmentation faults

---
 codetalker/cgrammar.pyx   | 17 ++++++++++-------
 codetalker/pgm/grammar.py |  2 +-
 codetalker/pgm/rules.py   |  2 +-
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/codetalker/cgrammar.pyx b/codetalker/cgrammar.pyx
index 4c917a2..b5733bf 100644
--- a/codetalker/cgrammar.pyx
+++ b/codetalker/cgrammar.pyx
@@ -326,6 +326,7 @@ def get_parse_tree(gid, text, start_i):
     '''
     cdef Token* tokens
 
+    text = text.encode('latin1')
     try_get_tokens(gid, text, &tokens)
 
     cdef TokenStream tstream = tokens_to_stream(tokens)
@@ -403,6 +404,8 @@ def get_ast(gid, text, start_i, ast_classes, ast_tokens):
     cdef TokenStream tstream
     cdef cParseNode* ptree
 
+    text = text.encode('latin1')
+
     try:
         try_get_tokens(gid, text, &tokens)
 
@@ -479,7 +482,7 @@ cdef Rule convert_rule(object rule, unsigned int i):
     crule.dont_ignore = rule.dont_ignore
     crule.num = len(rule.options)
     crule.options = <RuleOption*>malloc(sizeof(RuleOption)*crule.num)
-    rule.name = rule.name.encode()
+    rule.name = rule.name.encode('latin1')
     crule.name = rule.name
     crule.keep_tree = rule.keep_tree
     for i from 0<=i<crule.num:
@@ -574,7 +577,7 @@ cdef object convert_ast_attrs(object ast_attrs, object rules, object tokens, Ast
         for m from 0<=m<result[i].num:
              key = keys[m]
              if PY_MAJOR_VERSION >= 3 and isinstance(keys[m], str):
-                 key = keys[m].encode()
+                 key = keys[m].encode('latin1')
              convert_ast_attr(key, ast_attrs[i]['attrs'][keys[m]], rules, tokens, &result[i].attrs[m])
 
 cdef object which_rt(object it, object rules, object tokens):
@@ -853,14 +856,14 @@ cdef Token* _get_tokens(int gid, char* text, cTokenError* error, char* idchars):
             elif tokens[i]._type == RETOKEN:
                 res = tokens[i].check(state.text[state.at:])
             else:
-                print 'Unknown token type', tokens[i]._type, tokens[i]
+                print('Unknown token type', tokens[i]._type, tokens[i])
                  # should this raise an error?
 
             if res:
                 tmp = <Token*>malloc(sizeof(Token))
                 tmp.value = <char*>malloc(sizeof(char)*(res+1))
                 strncpy(tmp.value, state.text + state.at, res)
-                tmp.value[res] = '\0'
+                tmp.value[res] = b'\0'
                 tmp.allocated = 1
                 # print 'got token!', res, state.at, [tmp.value], state.lineno, state.charno
                 tmp.which = i
@@ -902,7 +905,7 @@ cdef Token* advance(int res, Token* current, bint indent, TokenState* state, int
         int ind = 0
         Token* tmp
     for i from state.at <= i < state.at + res:
-        if state.text[i] == '\n':
+        if state.text[i] == b'\n':
             numlines+=1
             last = i
     state.lineno += numlines
@@ -913,7 +916,7 @@ cdef Token* advance(int res, Token* current, bint indent, TokenState* state, int
     if not indent:
         return current
     # if we just consumed a newline, check & update the indents
-    if indent and res == 1 and state.text[state.at] == <char>'\n':
+    if indent and res == 1 and state.text[state.at] == <char>b'\n':
         ind = t_white(state.at + 1, state.text, state.ln)
         if ind < 0:
             return current
@@ -943,7 +946,7 @@ cdef Token* advance(int res, Token* current, bint indent, TokenState* state, int
                 current = tmp
                 cindent = state.indents[state.num_indents - 1]
             if ind != cindent:
-                etxt = 'invalid indentation -- %d (expected %d)' % (ind, cindent)
+                etxt = 'invalid indentation -- {} (expected {})'.format(ind, cindent).encode('latin1')
                 error.text = etxt
                 error.lineno = state.lineno
                 error.charno = state.charno
diff --git a/codetalker/pgm/grammar.py b/codetalker/pgm/grammar.py
index 7cdaf4a..bf1c7e0 100644
--- a/codetalker/pgm/grammar.py
+++ b/codetalker/pgm/grammar.py
@@ -53,7 +53,7 @@ def __init__(self, start, tokens=(), ignore=(), idchars='', indent=False, ast_to
         self.ast_tokens = tuple(self.tokens.index(tok) for tok in ast_tokens)
         self.indent = indent
         # Note this needs to be reviewed, should we be converting str to bytes or the other way around
-        self.idchars = idchars.encode()
+        self.idchars = idchars.encode('latin1')
 
         self.token_rules = []
         self.token_names = []
diff --git a/codetalker/pgm/rules.py b/codetalker/pgm/rules.py
index 4790f19..b24d970 100644
--- a/codetalker/pgm/rules.py
+++ b/codetalker/pgm/rules.py
@@ -27,7 +27,7 @@ def add_option(self, other):
 
     def process(self, what):
         if type(what) == str:
-            return [what.encode()]
+            return [what]
         elif inspect.isclass(what) and issubclass(what, Token):
             if what not in self.grammar.tokens and what not in self.grammar.special_tokens:
                 # print 'adding', what

From 9482f247d798f33281ba3c3fde0c5be8a8b1fd65 Mon Sep 17 00:00:00 2001
From: Andrei Aaron <andaaron@cisco.com>
Date: Thu, 11 Jul 2019 20:52:11 +0300
Subject: [PATCH 11/15] Fix reading json files

---
 tests/contrib/json.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/contrib/json.py b/tests/contrib/json.py
index a22fb30..5bb578b 100644
--- a/tests/contrib/json.py
+++ b/tests/contrib/json.py
@@ -2,6 +2,8 @@
 
 import os
 import glob
+import io
+
 HERE = os.path.dirname(__file__)
 
 files = glob.glob(os.path.join(HERE, '../data/json/*.json'))
@@ -12,7 +14,9 @@
 parse_rule = testing.parse_rule(__name__, json.grammar)
 
 def make_parse(fname):
-    text = open(fname).read()
+    with io.open(fname, encoding='utf-8') as f:
+        text = f.read()
+
     def meta():
         if os.path.basename(fname).startswith('fail'):
             try:

From 6070580b6fd4e0df272bef2fbdac21754f4c3d2a Mon Sep 17 00:00:00 2001
From: Andrei Aaron <andaaron@cisco.com>
Date: Thu, 11 Jul 2019 21:27:35 +0300
Subject: [PATCH 12/15] Fix TokenError raised in tests

---
 codetalker/pgm/errors.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/codetalker/pgm/errors.py b/codetalker/pgm/errors.py
index 2ecedae..92adeaa 100644
--- a/codetalker/pgm/errors.py
+++ b/codetalker/pgm/errors.py
@@ -14,11 +14,12 @@ class ParseError(LineError):
 
 class TokenError(LineError):
     def __init__(self, msg, text, lineno, charno):
-        tease = ''
+        tease = b''
         lines = text.splitlines()
         if lineno-1 < len(lines):
             tease = lines[lineno-1][charno-1:charno+30]
-        Exception.__init__(self, msg + ' at (%d, %d) \'%s\'' % (lineno, charno, tease.encode('string_escape')))
+        tease = str(tease)
+        Exception.__init__(self, str(msg) + ' at (%d, %d) \'%s\'' % (lineno, charno, tease.encode('unicode_escape')))
         self.lineno = lineno
         self.charno = charno
     pass

From 69f53d7bb54c37bc6b498b0ec7c210f9fc0b924f Mon Sep 17 00:00:00 2001
From: Andrei Aaron <andaaron@cisco.com>
Date: Fri, 12 Jul 2019 17:16:01 +0300
Subject: [PATCH 13/15] More conversion from unicode to str, fixing some of the
 tests

---
 codetalker/pgm/grammar.py  | 2 +-
 codetalker/pgm/token.py    | 8 +++++---
 codetalker/pgm/tokenize.py | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/codetalker/pgm/grammar.py b/codetalker/pgm/grammar.py
index bf1c7e0..fb0714e 100644
--- a/codetalker/pgm/grammar.py
+++ b/codetalker/pgm/grammar.py
@@ -329,7 +329,7 @@ def parse_children(self, rule, children, tokens, error):
                     continue
                 if tokens.at > error[0]:
                     error[0] = tokens.at
-                    error[1] = 'Unexpected token %s; expected \'%s\' (while parsing %s)' % (repr(ctoken), current.encode('string_escape'), self.rule_names[rule])
+                    error[1] = 'Unexpected token %s; expected \'%s\' (while parsing %s)' % (repr(ctoken), str(current).encode('unicode_escape'), self.rule_names[rule])
                 if logger.output:print>>logger, 'FAIL string compare:', [current, tokens.current().value]
                 return None
             elif type(current) == tuple:
diff --git a/codetalker/pgm/token.py b/codetalker/pgm/token.py
index 6a871f9..7b959d1 100644
--- a/codetalker/pgm/token.py
+++ b/codetalker/pgm/token.py
@@ -10,10 +10,12 @@ def __init__(self, value, lineno=-1, charno=-1):
 
     def __repr__(self):
         return u'<%s token "%s" at (%d, %d)>' % (self.__class__.__name__,
-                self.value.encode('string_escape'), self.lineno, self.charno)
+                                                 str(self.value).encode('unicode_escape'),
+                                                 self.lineno,
+                                                 self.charno)
 
     def __str__(self):
-        return self.value
+        return str(self.value.decode('latin1'))
     
     def __eq__(self, other):
         if type(other) in (tuple, list):
@@ -30,7 +32,7 @@ class ReToken(Token):
 
     @classmethod
     def check(cls, text):
-        m = cls.rx.match(text)
+        m = cls.rx.match(text.decode('latin1'))
         if m:
             return len(m.group())
         return 0
diff --git a/codetalker/pgm/tokenize.py b/codetalker/pgm/tokenize.py
index 1d4d095..e982a52 100644
--- a/codetalker/pgm/tokenize.py
+++ b/codetalker/pgm/tokenize.py
@@ -18,7 +18,7 @@ def tokenize(tokens, text):
                 break
         else:
             raise TokenError('no token matches the text at (%d, %d): "%s"' % (text.lineno,
-                text.charno, text.text[text.at:text.at+10].encode('string_escape')))
+                text.charno, str(text.text[text.at:text.at+10]).encode('unicode_escape')))
         text.advance(len(one.value))
 
 # vim: et sw=4 sts=4

From 06508796edf5388c708624f5b1611ed63482afc0 Mon Sep 17 00:00:00 2001
From: Usman Sohail <usohail@cisco.com>
Date: Fri, 19 Jul 2019 16:17:40 -0700
Subject: [PATCH 14/15]  53 tests passing

the segmenation fault caused by config_parser is being ignored by removing the contrib
directory for now. By ignoring this for now, we can at least see how many tests are
failing, and work on those. I got the number of passing tests to increase by fixingh
an encoding error.
---
 codetalker/cgrammar.pyx | 2 +-
 setup.py                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/codetalker/cgrammar.pyx b/codetalker/cgrammar.pyx
index b5733bf..646e46e 100644
--- a/codetalker/cgrammar.pyx
+++ b/codetalker/cgrammar.pyx
@@ -296,7 +296,7 @@ def get_tokens(gid, text):
 
     cdef Token* tokens
 
-    try_get_tokens(gid, text, &tokens)
+    try_get_tokens(gid, text.encode('utf-8'), &tokens)
 
     pytokens = convert_back_tokens(gid, tokens)
     kill_tokens(tokens)
diff --git a/setup.py b/setup.py
index 3b2c465..07594e5 100755
--- a/setup.py
+++ b/setup.py
@@ -46,7 +46,7 @@
     ],
     options={
         'test':{
-            'test_dir':['tests/parse', 'tests/tokenize', 'tests/contrib']
+            'test_dir':['tests/parse', 'tests/tokenize']
         },
     },
     requires=['cython'],

From 9d2c04de54fdd261fcaeed102319e4af0bb5dbfb Mon Sep 17 00:00:00 2001
From: Usman Sohail <usohail@cisco.com>
Date: Fri, 19 Jul 2019 16:50:55 -0700
Subject: [PATCH 15/15]  fixed more encoding issues

down to 55 tests failing
---
 tests/tokenize/util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/tokenize/util.py b/tests/tokenize/util.py
index 6469bb0..8b36d3e 100644
--- a/tests/tokenize/util.py
+++ b/tests/tokenize/util.py
@@ -11,7 +11,7 @@ def just_tokenize(*tokens):
     g = pgm.Grammar(noop, tokens)
     def meta(text):
         _tokens = g.get_tokens(text)
-        assert ''.join(tok.value for tok in _tokens) == text
+        assert ''.join(tok.value.decode('utf-8') for tok in _tokens) == text
         return _tokens
     return meta