From e90335754cb0f1df51e7c24c348b0ce783868b53 Mon Sep 17 00:00:00 2001 From: David 'Digit' Turner Date: Tue, 4 Feb 2025 14:25:26 +0100 Subject: [PATCH] Fix depfile parser to support ? The `?` is actually valid on Unix filesystems, as well as on Windows when it appears at the start of the path, as in `\\?\C:\foo` or `//?/C:/foo`. Before this patch, the depfile parser considered it as whitespace and would mistakenly decompose `//?/C:/foo` into two separate paths (`//` and `/C:/foo`) which broke Ninja incremental builds. The major change in in src/depfile_parser.in.cc Note that re2c version 3.1 was used to regenerate depfile_parser.cc from that file. Fixes #2568 --- src/depfile_parser.cc | 154 ++++++++++++++++++------------------- src/depfile_parser.in.cc | 2 +- src/depfile_parser_test.cc | 10 +++ 3 files changed, 88 insertions(+), 78 deletions(-) diff --git a/src/depfile_parser.cc b/src/depfile_parser.cc index 7ce7290614..9f7b393b4b 100644 --- a/src/depfile_parser.cc +++ b/src/depfile_parser.cc @@ -77,7 +77,7 @@ bool DepfileParser::Parse(string* content, string* err) { 0, 128, 0, 0, 0, 128, 0, 0, 128, 128, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 0, 0, 128, 0, 0, + 128, 128, 128, 0, 0, 128, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, @@ -105,55 +105,55 @@ bool DepfileParser::Parse(string* content, string* err) { }; yych = *in; if (yybm[0+yych] & 128) { - goto yy9; + goto yy5; } if (yych <= '\r') { if (yych <= '\t') { - if (yych >= 0x01) goto yy4; + if (yych >= 0x01) goto yy1; } else { - if (yych <= '\n') goto yy6; - if (yych <= '\f') goto yy4; - goto yy8; + if (yych <= '\n') goto yy3; + if (yych <= '\f') goto yy1; + goto yy4; } } else { if (yych <= '$') { - if (yych <= '#') goto yy4; - goto yy12; + if (yych <= '#') goto yy1; + goto yy7; } else { - if (yych <= '?') goto yy4; - if (yych <= '\\') goto yy13; - goto yy4; + if (yych <= '>') goto yy1; + if (yych <= '\\') goto yy8; + goto yy1; } } ++in; { break; } -yy4: +yy1: ++in; -yy5: +yy2: { // For any other character (e.g. whitespace), swallow it here, // allowing the outer logic to loop around again. break; } -yy6: +yy3: ++in; { // A newline ends the current file name and the current rule. have_newline = true; break; } -yy8: +yy4: yych = *++in; - if (yych == '\n') goto yy6; - goto yy5; -yy9: + if (yych == '\n') goto yy3; + goto yy2; +yy5: yych = *++in; if (yybm[0+yych] & 128) { - goto yy9; + goto yy5; } -yy11: +yy6: { // Got a span of plain text. int len = (int)(in - start); @@ -163,54 +163,54 @@ bool DepfileParser::Parse(string* content, string* err) { out += len; continue; } -yy12: +yy7: yych = *++in; - if (yych == '$') goto yy14; - goto yy5; -yy13: + if (yych == '$') goto yy9; + goto yy2; +yy8: yych = *(yymarker = ++in); if (yych <= ' ') { if (yych <= '\n') { - if (yych <= 0x00) goto yy5; - if (yych <= '\t') goto yy16; - goto yy17; + if (yych <= 0x00) goto yy2; + if (yych <= '\t') goto yy10; + goto yy11; } else { - if (yych == '\r') goto yy19; - if (yych <= 0x1F) goto yy16; - goto yy21; + if (yych == '\r') goto yy12; + if (yych <= 0x1F) goto yy10; + goto yy13; } } else { if (yych <= '9') { - if (yych == '#') goto yy23; - goto yy16; + if (yych == '#') goto yy14; + goto yy10; } else { - if (yych <= ':') goto yy25; - if (yych == '\\') goto yy27; - goto yy16; + if (yych <= ':') goto yy15; + if (yych == '\\') goto yy17; + goto yy10; } } -yy14: +yy9: ++in; { // De-escape dollar character. *out++ = '$'; continue; } -yy16: +yy10: ++in; - goto yy11; -yy17: + goto yy6; +yy11: ++in; { // A line continuation ends the current file name. break; } -yy19: +yy12: yych = *++in; - if (yych == '\n') goto yy17; + if (yych == '\n') goto yy11; in = yymarker; - goto yy5; -yy21: + goto yy2; +yy13: ++in; { // 2N+1 backslashes plus space -> N backslashes plus space. @@ -222,7 +222,7 @@ bool DepfileParser::Parse(string* content, string* err) { *out++ = ' '; continue; } -yy23: +yy14: ++in; { // De-escape hash sign, but preserve other leading backslashes. @@ -233,17 +233,17 @@ bool DepfileParser::Parse(string* content, string* err) { *out++ = '#'; continue; } -yy25: +yy15: yych = *++in; if (yych <= '\f') { - if (yych <= 0x00) goto yy28; - if (yych <= 0x08) goto yy26; - if (yych <= '\n') goto yy28; + if (yych <= 0x00) goto yy18; + if (yych <= 0x08) goto yy16; + if (yych <= '\n') goto yy18; } else { - if (yych <= '\r') goto yy28; - if (yych == ' ') goto yy28; + if (yych <= '\r') goto yy18; + if (yych == ' ') goto yy18; } -yy26: +yy16: { // De-escape colon sign, but preserve other leading backslashes. // Regular expression uses lookahead to make sure that no whitespace @@ -255,29 +255,29 @@ bool DepfileParser::Parse(string* content, string* err) { *out++ = ':'; continue; } -yy27: +yy17: yych = *++in; if (yych <= ' ') { if (yych <= '\n') { - if (yych <= 0x00) goto yy11; - if (yych <= '\t') goto yy16; - goto yy11; + if (yych <= 0x00) goto yy6; + if (yych <= '\t') goto yy10; + goto yy6; } else { - if (yych == '\r') goto yy11; - if (yych <= 0x1F) goto yy16; - goto yy30; + if (yych == '\r') goto yy6; + if (yych <= 0x1F) goto yy10; + goto yy19; } } else { if (yych <= '9') { - if (yych == '#') goto yy23; - goto yy16; + if (yych == '#') goto yy14; + goto yy10; } else { - if (yych <= ':') goto yy25; - if (yych == '\\') goto yy32; - goto yy16; + if (yych <= ':') goto yy15; + if (yych == '\\') goto yy20; + goto yy10; } } -yy28: +yy18: ++in; { // Backslash followed by : and whitespace. @@ -291,7 +291,7 @@ bool DepfileParser::Parse(string* content, string* err) { have_newline = true; break; } -yy30: +yy19: ++in; { // 2N backslashes plus space -> 2N backslashes, end of filename. @@ -301,26 +301,26 @@ bool DepfileParser::Parse(string* content, string* err) { out += len - 1; break; } -yy32: +yy20: yych = *++in; if (yych <= ' ') { if (yych <= '\n') { - if (yych <= 0x00) goto yy11; - if (yych <= '\t') goto yy16; - goto yy11; + if (yych <= 0x00) goto yy6; + if (yych <= '\t') goto yy10; + goto yy6; } else { - if (yych == '\r') goto yy11; - if (yych <= 0x1F) goto yy16; - goto yy21; + if (yych == '\r') goto yy6; + if (yych <= 0x1F) goto yy10; + goto yy13; } } else { if (yych <= '9') { - if (yych == '#') goto yy23; - goto yy16; + if (yych == '#') goto yy14; + goto yy10; } else { - if (yych <= ':') goto yy25; - if (yych == '\\') goto yy27; - goto yy16; + if (yych <= ':') goto yy15; + if (yych == '\\') goto yy17; + goto yy10; } } } diff --git a/src/depfile_parser.in.cc b/src/depfile_parser.in.cc index 4b5f5fe4c1..5bef2585bf 100644 --- a/src/depfile_parser.in.cc +++ b/src/depfile_parser.in.cc @@ -134,7 +134,7 @@ bool DepfileParser::Parse(string* content, string* err) { *out++ = '$'; continue; } - '\\'+ [^\000\r\n] | [a-zA-Z0-9+,/_:.~()}{%=@\x5B\x5D!\x80-\xFF-]+ { + '\\'+ [^\000\r\n] | [a-zA-Z0-9+?,/_:.~()}{%=@\x5B\x5D!\x80-\xFF-]+ { // Got a span of plain text. int len = (int)(in - start); // Need to shift it over if we're overwriting backslashes. diff --git a/src/depfile_parser_test.cc b/src/depfile_parser_test.cc index 947ae764bc..4c379fdcf3 100644 --- a/src/depfile_parser_test.cc +++ b/src/depfile_parser_test.cc @@ -62,6 +62,16 @@ TEST_F(DepfileParserTest, Continuation) { EXPECT_EQ(2u, parser_.ins_.size()); } +TEST_F(DepfileParserTest, WindowsDrivePaths) { + string err; + EXPECT_TRUE(Parse("foo.o: //?/c:/bar.h\n", &err)); + ASSERT_EQ("", err); + ASSERT_EQ(1u, parser_.outs_.size()); + EXPECT_EQ("foo.o", parser_.outs_[0].AsString()); + EXPECT_EQ(1u, parser_.ins_.size()); + EXPECT_EQ("//?/c:/bar.h", parser_.ins_[0].AsString()); +} + TEST_F(DepfileParserTest, CarriageReturnContinuation) { string err; EXPECT_TRUE(Parse(