add byte literal and make a bfvm

douyixuan · douyixuan · commit 4f68973aa8c9 · 2024-04-19T01:18:51.000+08:00
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 internal/grammar/.antlr
-output
+output
+.idea
diff --git a/Makefile b/Makefile
@@ -124,6 +124,10 @@ test/example:
 
 	${CELL} -t riscv tests/examples/multi-files && ckb-debugger --bin multi-files
 	${CELL} -t riscv tests/examples/import-package && ckb-debugger --bin import-package
+	
+	${CELL} -t riscv tests/examples/brainfuck-vm.cell && ckb-debugger --bin import-package
+	${CELL} -t riscv tests/examples/byte.cell && ckb-debugger --bin import-package
+
 test/cross:
 	@echo " >>> test cross compiling"
 	@echo cross hi.ll with linking dummy.c
diff --git a/README.md b/README.md
@@ -59,7 +59,8 @@ brew install openjdk@21
 brew install llvm@16
 brew tap riscv-software-src/riscv
 brew install riscv-tools
-export PATH=/opt/homebrew/bin:$PATH
+export PATH="/opt/homebrew/bin:$PATH"
+export PATH="/opt/homebrew/opt/llvm@16/bin:$PATH"
 brew install --cask spike
 make build
 source install.sh
diff --git a/compiler/compiler/array.go b/compiler/compiler/array.go
@@ -1,9 +1,9 @@
 package compiler
 
 import (
-	"github.com/llir/llvm/ir"
+	// "github.com/llir/llvm/ir"
 	"github.com/llir/llvm/ir/constant"
-	"github.com/llir/llvm/ir/enum"
+	// "github.com/llir/llvm/ir/enum"
 	llvmTypes "github.com/llir/llvm/ir/types"
 	llvmValue "github.com/llir/llvm/ir/value"
 
@@ -143,7 +143,7 @@ func (c *Compiler) compileLoadArrayElement(v *parser.LoadArrayElement) value.Val
 		panic("unable to LoadArrayElement: could not calculate max length")
 	}
 
-	isCheckedAtCompileTime := false
+	// isCheckedAtCompileTime := false
 
 	if lengthKnownAtCompileTime {
 		if compileTimeLength < 0 {
@@ -152,7 +152,7 @@ func (c *Compiler) compileLoadArrayElement(v *parser.LoadArrayElement) value.Val
 
 		if intType, ok := index.Value.(*constant.Int); ok {
 			if intType.X.IsInt64() {
-				isCheckedAtCompileTime = true
+				// isCheckedAtCompileTime = true
 
 				if intType.X.Uint64() > compileTimeLength {
 					compilePanic("index out of range")
@@ -161,29 +161,30 @@ func (c *Compiler) compileLoadArrayElement(v *parser.LoadArrayElement) value.Val
 		}
 	}
 
-	if !isCheckedAtCompileTime {
-		outsideOfLengthBlock := c.contextBlock.Parent.NewBlock(name.Block() + "-array-index-out-of-range")
-		c.panic(outsideOfLengthBlock, "index out of range")
-		outsideOfLengthBlock.NewUnreachable()
+	// if !isCheckedAtCompileTime {
+	// 	outsideOfLengthBlock := c.contextBlock.Parent.NewBlock(name.Block() + "-array-index-out-of-range")
+	// 	c.panic(outsideOfLengthBlock, "index out of range")
+	// 	outsideOfLengthBlock.NewUnreachable()
 
-		safeBlock := c.contextBlock.Parent.NewBlock(name.Block() + "-after-array-index-check")
+	// 	safeBlock := c.contextBlock.Parent.NewBlock(name.Block() + "-after-array-index-check")
+	// 	safeBlock.Term = ir.NewUnreachable()
 
-		var runtimeOrCompiletimeCmp *ir.InstICmp
-		if lengthKnownAtCompileTime {
-			runtimeOrCompiletimeCmp = c.contextBlock.NewICmp(enum.IPredSGE, indexVal, constant.NewInt(llvmTypes.I32, int64(compileTimeLength)))
-		} else {
-			runtimeOrCompiletimeCmp = c.contextBlock.NewICmp(enum.IPredSGE, indexVal, runtimeLength)
-		}
+	// 	var runtimeOrCompiletimeCmp *ir.InstICmp
+	// 	if lengthKnownAtCompileTime {
+	// 		runtimeOrCompiletimeCmp = c.contextBlock.NewICmp(enum.IPredSGE, indexVal, constant.NewInt(llvmTypes.I32, int64(compileTimeLength)))
+	// 	} else {
+	// 		runtimeOrCompiletimeCmp = c.contextBlock.NewICmp(enum.IPredSGE, indexVal, runtimeLength)
+	// 	}
 
-		outOfRangeCmp := c.contextBlock.NewOr(
-			c.contextBlock.NewICmp(enum.IPredSLT, indexVal, constant.NewInt(llvmTypes.I64, 0)),
-			runtimeOrCompiletimeCmp,
-		)
+	// 	outOfRangeCmp := c.contextBlock.NewOr(
+	// 		c.contextBlock.NewICmp(enum.IPredSLT, indexVal, constant.NewInt(llvmTypes.I64, 0)),
+	// 		runtimeOrCompiletimeCmp,
+	// 	)
 
-		c.contextBlock.NewCondBr(outOfRangeCmp, outsideOfLengthBlock, safeBlock)
+	// 	c.contextBlock.NewCondBr(outOfRangeCmp, outsideOfLengthBlock, safeBlock)
 
-		c.contextBlock = safeBlock
-	}
+	// 	c.contextBlock = safeBlock
+	// }
 
 	var indicies []llvmValue.Value
 	if isLlvmArrayBased {
diff --git a/compiler/compiler/constants.go b/compiler/compiler/constants.go
@@ -14,6 +14,12 @@ import (
 
 func (c *Compiler) compileConstantNode(v *parser.ConstantNode) value.Value {
 	switch v.Type {
+	case parser.BYTE:
+		return value.Value{
+			Value:      constant.NewInt(i8.Type, v.Value),
+			Type:       i8,
+			IsVariable: false,
+		}
 	case parser.NUMBER:
 		var intType *types.Int = i64
 
diff --git a/compiler/lexer/lexer.go b/compiler/lexer/lexer.go
@@ -12,6 +12,7 @@ const (
 	KEYWORD
 	NUMBER
 	STRING
+	BYTE
 	OPERATOR
 	EOF
 	EOL
@@ -34,6 +35,8 @@ func (i Item) String() string {
 		t = "NUMBER"
 	case STRING:
 		t = "STRING"
+	case BYTE:
+		t = "BYTE"
 	case OPERATOR:
 		t = "OPERATOR"
 	case EOF:
@@ -99,6 +102,8 @@ var operations = map[string]struct{}{
 	"..": {}, // is not a real operation. Is there so that ... can be found.
 }
 
+var escapedChar = map[byte]byte{'t': '\t', 'n': '\n', 'r': '\r', '\'': '\''}
+
 func Lex(inputFullSource string) []Item {
 	var res []Item
 
@@ -174,6 +179,40 @@ func Lex(inputFullSource string) []Item {
 				continue
 			}
 
+			if input[i] == '\'' {
+				// String continues until next unescaped '
+				var str string
+
+				i++
+
+				for i < len(input) {
+					if input[i] == '\'' {
+						break
+					}
+					// parse escape char
+					if input[i] == '\\' {
+						i++
+						va, exist := escapedChar[input[i]]
+						if !exist {
+							panic("Unsupported escaped character")
+						}
+						str = string(va)
+						i++
+						continue
+					}
+
+					str = string(input[i])
+					i++
+				}
+
+				i++
+				if str == "" {
+					panic("Ilegal byte")
+				}
+				res = append(res, Item{Type: BYTE, Val: string(str), Line: line})
+				continue
+			}
+
 			// NAME
 			// Consists of a-z, parse until the last allowed char
 			if (input[i] >= 'a' && input[i] <= 'z') || (input[i] >= 'A' && input[i] <= 'Z') || input[i] == '_' {
diff --git a/compiler/lexer/lexer_test.go b/compiler/lexer/lexer_test.go
@@ -112,6 +112,30 @@ func TestEscapedString(t *testing.T) {
 	assert.Equal(t, expected, r)
 }
 
+func TestChar(t *testing.T) {
+	r := Lex(`'a'`)
+
+	expected := []Item{
+		{Type: BYTE, Val: "a", Line: 1},
+		{Type: EOL},
+		{Type: EOF},
+	}
+
+	assert.Equal(t, expected, r)
+}
+
+func TestEscapedChar(t *testing.T) {
+	r := Lex(`'\''`)
+
+	expected := []Item{
+		{Type: BYTE, Val: "'", Line: 1},
+		{Type: EOL},
+		{Type: EOF},
+	}
+
+	assert.Equal(t, expected, r)
+}
+
 func TestLexerSimpleCallWithTwoStrings(t *testing.T) {
 	r := Lex(`foo("bar", "baz")`)
 
diff --git a/compiler/parser/node.go b/compiler/parser/node.go
@@ -132,6 +132,7 @@ type DataType uint8
 
 const (
 	STRING DataType = iota
+	BYTE
 	NUMBER
 	BOOL
 )
diff --git a/compiler/parser/parser.go b/compiler/parser/parser.go
@@ -80,8 +80,8 @@ func (p *parser) parseOneWithOptions(withAheadParse, withArithAhead, withIdentif
 		}
 		return
 
-		// NUMBER always returns a ConstantNode
-		// Convert string representation to int64
+	// NUMBER always returns a ConstantNode
+	// Convert string representation to int64
 	case lexer.NUMBER:
 		val, err := strconv.ParseInt(current.Val, 10, 64)
 		if err != nil {
@@ -97,7 +97,7 @@ func (p *parser) parseOneWithOptions(withAheadParse, withArithAhead, withIdentif
 		}
 		return
 
-		// STRING is always a ConstantNode, the value is not modified
+	// STRING is always a ConstantNode, the value is not modified
 	case lexer.STRING:
 		res = &ConstantNode{
 			Type:     STRING,
@@ -107,6 +107,15 @@ func (p *parser) parseOneWithOptions(withAheadParse, withArithAhead, withIdentif
 			res = p.aheadParse(res)
 		}
 		return
+	case lexer.BYTE:
+		res = &ConstantNode{
+			Type:     BYTE,
+			Value: int64(current.Val[0]),
+		}
+		if withAheadParse {
+			res = p.aheadParse(res)
+		}
+		return
 
 	case lexer.OPERATOR:
 		if current.Val == "&" {
diff --git a/compiler/parser/var_alloc_test.go b/compiler/parser/var_alloc_test.go
@@ -120,6 +120,7 @@ func TestAllocConstGroup(t *testing.T) {
 	a = 10
 	b, c = "bbb", "ccc"
 	d = 20
+	e, f = '\t', '\''
 )`)
 
 	expected := &FileNode{
@@ -141,6 +142,11 @@ func TestAllocConstGroup(t *testing.T) {
 						Val:     []Node{&ConstantNode{Type: NUMBER, Value: 20}},
 						IsConst: true,
 					},
+					{
+						Name:    []string{"e", "f"},
+						Val:     []Node{&ConstantNode{Type: BYTE, ValueStr: "\t"}, &ConstantNode{Type: BYTE, ValueStr: "'"}},
+						IsConst: true,
+					},
 				},
 			},
 		},
diff --git a/tests/examples/brainfuck-vm.cell b/tests/examples/brainfuck-vm.cell
@@ -0,0 +1,53 @@
+// Test simulating a Turing machine.
+package main
+
+import "debug"
+
+// brainfuck
+var p, pc int64 = 0, 0 // p for position
+var a [30000]uint8
+var r string
+
+var prog = "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++."
+
+function scan(dir int64) {
+	for nest := dir;  (nest * dir) != 0 ; pc += dir {
+		if prog[pc+dir] == ']' {
+			nest--
+		} else if prog[pc+dir] == '[' {
+			nest++
+		}
+	}
+}
+ 
+function main() {
+	j := 0
+	for pc = 0; pc < len(prog); pc++ { 
+		if prog[pc] == '>' {
+			p++
+        } else if prog[pc] == '<' {
+			p--
+		} else if prog[pc] == '+' {
+			a[p]++
+		} else if prog[pc] == '-' {
+			a[p]--
+		} else if prog[pc] == '.' {
+			r[j] = a[p]
+			j++
+			debug.Printf("%c", a[p])
+		} else if prog[pc] == '[' {
+			if a[p] == uint8(0) {
+				scan(1)
+			}
+		} else if prog[pc] == ']' {
+			if a[p] != uint8(0) {
+				scan(-1)
+			}
+        } else {
+			debug.Printf("%s", r)
+			return 0
+		}
+	}
+	debug.Printf("%s", r)
+	return 0
+}
diff --git a/tests/examples/byte.cell b/tests/examples/byte.cell
@@ -0,0 +1,30 @@
+import "debug"
+
+function main() {
+    var a = '\''
+    debug.Printf("%c\n", a)
+    // var b = a + 3
+    // debug.Printf("%c\n", b)
+    b := '*'
+    b++
+    b--
+    if b == '*' {
+        debug.Printf("b == '*'") 
+    }
+    var s = "asd"
+    if s[0] == b {
+        debug.Printf("s[0] == b")
+    }
+    
+    for i := 0; i < len(s); i++ {
+        debug.Printf("%c", s[i])
+    }
+
+    if s[0] == '+' { // '+'
+        s[1]++
+    } else if s[0] == '-' { // '-'
+        s[1]--
+    }
+
+    return 0
+}

-Original file line number
+Diff line change
@@ @@ -1,2 +1,3 @@ @@
 internal/grammar/.antlr
 -output
 +output
 +.idea
Original file line number	Diff line number	Diff line change
`@@ -132,6 +132,7 @@ type DataType uint8`
`132`	`132`
`133`	`133`	`const (`
`134`	`134`	`STRING DataType = iota`
	`135`	`+ BYTE`
`135`	`136`	`NUMBER`
`136`	`137`	`BOOL`
`137`	`138`	`)`