Skip to content

Commit 09a701b

Browse files
authored
[AIG][ImportAIGER] Fix incorrect tokenization and simplify parser (#8588)
- Fix lexer tokenization by removing premature whitespace skipping - Add location-aware error reporting with SMLoc tracking throughout parser The tokenization fix resolves issues where whitespace was being consumed prematurely, causing parsing errors.
1 parent 5abe8c7 commit 09a701b

File tree

3 files changed

+141
-45
lines changed

3 files changed

+141
-45
lines changed

lib/Conversion/ImportAIGER/ImportAIGER.cpp

Lines changed: 47 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "llvm/ADT/StringRef.h"
3030
#include "llvm/Support/Debug.h"
3131
#include "llvm/Support/ErrorHandling.h"
32+
#include "llvm/Support/SMLoc.h"
3233
#include "llvm/Support/SourceMgr.h"
3334
#include "llvm/Support/raw_ostream.h"
3435
#include <cctype>
@@ -258,7 +259,7 @@ class AIGERParser {
258259
ParseResult expectToken(AIGERTokenKind kind, const Twine &message);
259260

260261
/// Parse a number token into result
261-
ParseResult parseNumber(unsigned &result);
262+
ParseResult parseNumber(unsigned &result, SMLoc *loc = nullptr);
262263

263264
/// Parse a binary encoded number (variable-length encoding)
264265
ParseResult parseBinaryNumber(unsigned &result);
@@ -350,7 +351,6 @@ AIGERToken AIGERLexer::nextToken() {
350351
};
351352

352353
auto token = impl();
353-
skipWhitespace();
354354
return token;
355355
}
356356

@@ -390,15 +390,16 @@ ParseResult AIGERParser::expectToken(AIGERTokenKind kind,
390390
return success();
391391
}
392392

393-
ParseResult AIGERParser::parseNumber(unsigned &result) {
394-
auto startLoc = lexer.getCurrentLoc();
393+
ParseResult AIGERParser::parseNumber(unsigned &result, SMLoc *loc) {
395394
auto token = lexer.nextToken();
395+
if (loc)
396+
*loc = token.location;
396397

397398
if (token.kind != AIGERTokenKind::Number)
398-
return emitError(startLoc, "expected number");
399+
return emitError(token.location, "expected number");
399400

400401
if (token.spelling.getAsInteger(10, result))
401-
return emitError(startLoc, "invalid number format");
402+
return emitError(token.location, "invalid number format");
402403

403404
return success();
404405
}
@@ -441,7 +442,6 @@ ParseResult AIGERParser::parseHeader() {
441442
while (lexer.peekToken().kind != AIGERTokenKind::Identifier)
442443
lexer.nextToken();
443444

444-
auto startLoc = lexer.getCurrentLoc();
445445
auto formatToken = lexer.nextToken();
446446
if (formatToken.spelling == "aag") {
447447
isBinaryFormat = false;
@@ -450,24 +450,26 @@ ParseResult AIGERParser::parseHeader() {
450450
isBinaryFormat = true;
451451
LLVM_DEBUG(llvm::dbgs() << "Format: aig (binary)\n");
452452
} else {
453-
return emitError(startLoc, "expected 'aag' or 'aig' format identifier");
453+
return emitError(formatToken.location,
454+
"expected 'aag' or 'aig' format identifier");
454455
}
455456

456457
// Parse M I L O A (numbers separated by spaces)
457-
if (parseNumber(maxVarIndex))
458-
return emitError("failed to parse M (max variable index)");
458+
SMLoc loc;
459+
if (parseNumber(maxVarIndex, &loc))
460+
return emitError(loc, "failed to parse M (max variable index)");
459461

460-
if (parseNumber(numInputs))
461-
return emitError("failed to parse I (number of inputs)");
462+
if (parseNumber(numInputs, &loc))
463+
return emitError(loc, "failed to parse I (number of inputs)");
462464

463-
if (parseNumber(numLatches))
464-
return emitError("failed to parse L (number of latches)");
465+
if (parseNumber(numLatches, &loc))
466+
return emitError(loc, "failed to parse L (number of latches)");
465467

466-
if (parseNumber(numOutputs))
467-
return emitError("failed to parse O (number of outputs)");
468+
if (parseNumber(numOutputs, &loc))
469+
return emitError(loc, "failed to parse O (number of outputs)");
468470

469-
if (parseNumber(numAnds))
470-
return emitError("failed to parse A (number of AND gates)");
471+
if (parseNumber(numAnds, &loc))
472+
return emitError(loc, "failed to parse A (number of AND gates)");
471473

472474
LLVM_DEBUG(llvm::dbgs() << "Header: M=" << maxVarIndex << " I=" << numInputs
473475
<< " L=" << numLatches << " O=" << numOutputs
@@ -480,7 +482,7 @@ ParseResult AIGERParser::parseHeader() {
480482
ParseResult AIGERParser::parseNewLine() {
481483
auto token = lexer.nextToken();
482484
if (token.kind != AIGERTokenKind::Newline)
483-
return emitError("expected newline");
485+
return emitError(token.location, "expected newline");
484486

485487
return success();
486488
}
@@ -496,9 +498,9 @@ ParseResult AIGERParser::parseInputs() {
496498

497499
for (unsigned i = 0; i < numInputs; ++i) {
498500
unsigned literal;
499-
auto startLoc = lexer.getCurrentLoc();
500-
if (parseNumber(literal) || parseNewLine())
501-
return emitError(startLoc, "failed to parse input literal");
501+
SMLoc loc;
502+
if (parseNumber(literal, &loc) || parseNewLine())
503+
return emitError(loc, "failed to parse input literal");
502504
inputLiterals.push_back(literal);
503505
}
504506

@@ -507,15 +509,15 @@ ParseResult AIGERParser::parseInputs() {
507509

508510
ParseResult AIGERParser::parseLatches() {
509511
LLVM_DEBUG(llvm::dbgs() << "Parsing " << numLatches << " latches\n");
510-
auto startLoc = lexer.getCurrentLoc();
511512
if (isBinaryFormat) {
512513
// In binary format, latches are implicit (literals 2, 4, 6, ...)
513514
for (unsigned i = 0; i < numLatches; ++i) {
514515
unsigned literal;
515-
if (parseNumber(literal))
516-
return emitError(startLoc, "failed to parse latch next state literal");
516+
SMLoc loc;
517+
if (parseNumber(literal, &loc))
518+
return emitError(loc, "failed to parse latch next state literal");
517519

518-
latchDefs.push_back({2 * (i + 1 + numInputs), literal, startLoc});
520+
latchDefs.push_back({2 * (i + 1 + numInputs), literal, loc});
519521

520522
// Expect newline after each latch next state
521523
if (parseNewLine())
@@ -527,17 +529,19 @@ ParseResult AIGERParser::parseLatches() {
527529
// Parse latch definitions: current_state next_state
528530
for (unsigned i = 0; i < numLatches; ++i) {
529531
unsigned currentState, nextState;
530-
if (parseNumber(currentState) || parseNumber(nextState) || parseNewLine())
531-
return emitError(startLoc, "failed to parse latch definition");
532+
SMLoc loc;
533+
if (parseNumber(currentState, &loc) || parseNumber(nextState) ||
534+
parseNewLine())
535+
return emitError(loc, "failed to parse latch definition");
532536

533537
LLVM_DEBUG(llvm::dbgs() << "Latch " << i << ": " << currentState << " -> "
534538
<< nextState << "\n");
535539

536540
// Validate current state literal (should be even and positive)
537541
if (currentState % 2 != 0 || currentState == 0)
538-
return emitError(startLoc, "invalid latch current state literal");
542+
return emitError(loc, "invalid latch current state literal");
539543

540-
latchDefs.push_back({currentState, nextState, startLoc});
544+
latchDefs.push_back({currentState, nextState, loc});
541545
}
542546

543547
return success();
@@ -546,18 +550,17 @@ ParseResult AIGERParser::parseLatches() {
546550
ParseResult AIGERParser::parseOutputs() {
547551
LLVM_DEBUG(llvm::dbgs() << "Parsing " << numOutputs << " outputs\n");
548552
// NOTE: Parsing is same for binary and ASCII formats
549-
auto startLoc = lexer.getCurrentLoc();
550-
551553
// Parse output literals
552554
for (unsigned i = 0; i < numOutputs; ++i) {
553555
unsigned literal;
554-
if (parseNumber(literal) || parseNewLine())
555-
return emitError(startLoc, "failed to parse output literal");
556+
SMLoc loc;
557+
if (parseNumber(literal, &loc) || parseNewLine())
558+
return emitError(loc, "failed to parse output literal");
556559

557560
LLVM_DEBUG(llvm::dbgs() << "Output " << i << ": " << literal << "\n");
558561

559562
// Output literals can be any valid literal (including inverted)
560-
outputLiterals.push_back({literal, startLoc});
563+
outputLiterals.push_back({literal, loc});
561564
}
562565

563566
return success();
@@ -573,28 +576,27 @@ ParseResult AIGERParser::parseAndGates() {
573576
}
574577

575578
ParseResult AIGERParser::parseAndGatesASCII() {
576-
auto startLoc = lexer.getCurrentLoc();
577579
// Parse AND gate definitions: lhs rhs0 rhs1
578580
for (unsigned i = 0; i < numAnds; ++i) {
579581
unsigned lhs, rhs0, rhs1;
580-
if (parseNumber(lhs) || parseNumber(rhs0) || parseNumber(rhs1) ||
582+
SMLoc loc;
583+
if (parseNumber(lhs, &loc) || parseNumber(rhs0) || parseNumber(rhs1) ||
581584
parseNewLine())
582-
return emitError(startLoc, "failed to parse AND gate definition");
585+
return emitError(loc, "failed to parse AND gate definition");
583586

584587
LLVM_DEBUG(llvm::dbgs() << "AND Gate " << i << ": " << lhs << " = " << rhs0
585588
<< " & " << rhs1 << "\n");
586589

587590
// Validate LHS (should be even and positive)
588591
if (lhs % 2 != 0 || lhs == 0)
589-
return emitError(startLoc, "invalid AND gate LHS literal");
592+
return emitError(loc, "invalid AND gate LHS literal");
590593

591594
// Validate literal bounds
592595
if (lhs / 2 > maxVarIndex || rhs0 / 2 > maxVarIndex ||
593596
rhs1 / 2 > maxVarIndex)
594-
return emitError(startLoc,
595-
"AND gate literal exceeds maximum variable index");
597+
return emitError(loc, "AND gate literal exceeds maximum variable index");
596598

597-
andGateDefs.push_back({lhs, rhs0, rhs1, startLoc});
599+
andGateDefs.push_back({lhs, rhs0, rhs1, loc});
598600
}
599601

600602
return success();
@@ -604,7 +606,6 @@ ParseResult AIGERParser::parseAndGatesBinary() {
604606
// In binary format, AND gates are encoded with delta compression
605607
// Each AND gate is encoded as: delta0 delta1
606608
// where: rhs0 = lhs - delta0, rhs1 = rhs0 - delta1
607-
auto startLoc = lexer.getCurrentLoc();
608609

609610
LLVM_DEBUG(llvm::dbgs() << "Starting binary AND gate parsing\n");
610611

@@ -621,8 +622,9 @@ ParseResult AIGERParser::parseAndGatesBinary() {
621622

622623
for (unsigned i = 0; i < numAnds; ++i) {
623624
unsigned delta0, delta1;
625+
SMLoc loc = lexer.getCurrentLoc();
624626
if (parseBinaryNumber(delta0) || parseBinaryNumber(delta1))
625-
return emitError("failed to parse binary AND gate deltas");
627+
return emitError(loc, "failed to parse binary AND gate deltas");
626628

627629
auto lhs = static_cast<int64_t>(currentLHS);
628630

@@ -650,7 +652,7 @@ ParseResult AIGERParser::parseAndGatesBinary() {
650652

651653
andGateDefs.push_back({static_cast<unsigned>(lhs),
652654
static_cast<unsigned>(rhs0),
653-
static_cast<unsigned>(rhs1), startLoc});
655+
static_cast<unsigned>(rhs1), loc});
654656
currentLHS += 2; // Next AND gate LHS
655657
}
656658

test/Conversion/ImportAIGER/invalid-index.aig

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
aig 1026 16 16 16 994
2+
66
3+
342
4+
862
5+
1194
6+
1406
7+
1542
8+
1642
9+
1696
10+
1838
11+
1842
12+
1848
13+
1854
14+
1860
15+
1866
16+
1872
17+
2052
18+
34
19+
36
20+
38
21+
40
22+
42
23+
44
24+
46
25+
48
26+
50
27+
52
28+
54
29+
56
30+
58
31+
60
32+
62
33+
64
34+
" $ & ( * , . 0 2 4 6 8 : < >@@ @"@$@&@(@*@,@.@0@2@4@6@8@:^^^ ^"^$^&^(^*^,^.^0^2^4^6zzzz z"z$z&z(z*z,z.z0z2����� �"�$�&�(�*�,�.������ �"�$�&�(�*������� �"�$�&�������� �"���������������� ������
35+
� �����
36+
� ����
37+
���������������������������������������������������������������������������������������������������������������������������������� � � � ��� � ��� � ��� � ������������ ���� ���� ���� ���� ���� ���� ���� ���� ���� ���� ���� �������w�u�u�s���u�s���u�s���u�s���u�s���u�s���u�s������ ���� ���� ������ �� ���� ���� ���� ���� ���� ���� ���� ���� ���� �����������?�=�=�;���=�;���=�;�������� �� ���� ���� jj�� rr�� zz�� ||�� ||�� ||����}�}�wzor gj _b ��_b ��_b ��_b ��_b \\Y\Y\Y\Y\SVKN CF ;> 36 +.+.+.+.+.+.+.+.&(#&(ld6��� ��!6216216216212F1&R $'   & '6�`b!`0Q1S2O3Q(Y)[*W+Y*W+Y�����
38+
��
39+
�������� � � �� �����**� � ����� � �������������e�g����**ZZi0 a[0]
40+
i1 a[1]
41+
i2 a[2]
42+
i3 a[3]
43+
i4 a[4]
44+
i5 a[5]
45+
i6 a[6]
46+
i7 a[7]
47+
i8 a[8]
48+
i9 a[9]
49+
i10 a[10]
50+
i11 a[11]
51+
i12 a[12]
52+
i13 a[13]
53+
i14 a[14]
54+
i15 a[15]
55+
l0 reg[0]
56+
l1 reg[1]
57+
l2 reg[2]
58+
l3 reg[3]
59+
l4 reg[4]
60+
l5 reg[5]
61+
l6 reg[6]
62+
l7 reg[7]
63+
l8 reg[8]
64+
l9 reg[9]
65+
l10 reg[10]
66+
l11 reg[11]
67+
l12 reg[12]
68+
l13 reg[13]
69+
l14 reg[14]
70+
l15 reg[15]
71+
o0 result[0]
72+
o1 result[1]
73+
o2 result[2]
74+
o3 result[3]
75+
o4 result[4]
76+
o5 result[5]
77+
o6 result[6]
78+
o7 result[7]
79+
o8 result[8]
80+
o9 result[9]
81+
o10 result[10]
82+
o11 result[11]
83+
o12 result[12]
84+
o13 result[13]
85+
o14 result[14]
86+
o15 result[15]
87+
c
88+
Generated by CIRCT unknown git version
89+
module: counter
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// RUN: circt-translate --import-aiger %S/invalid-index.aig | FileCheck %s
2+
// This AIGER file has leading whitespace in AND gate definition which
3+
// a parser incorrectly skipped.
4+
5+
// CHECK: @aiger_top

0 commit comments

Comments
 (0)