|
1 | | -namespace RobotsTxt |
| 1 | +using System.Runtime.CompilerServices; |
| 2 | + |
| 3 | +namespace RobotsTxt; |
| 4 | + |
| 5 | +/// <summary> |
| 6 | +/// A RobotsMatchStrategy defines a strategy for matching individual lines in a |
| 7 | +/// robots.txt file. Each Match* method should return a match priority, which is |
| 8 | +/// interpreted as: |
| 9 | +/// |
| 10 | +/// match priority < 0: |
| 11 | +/// No match. |
| 12 | +/// |
| 13 | +/// match priority == 0: |
| 14 | +/// Match, but treat it as if matched an empty pattern. |
| 15 | +/// |
| 16 | +/// match priority > 0: |
| 17 | +/// Match. |
| 18 | +/// </summary> |
| 19 | +internal static class LongestMatchRobotsMatchStrategy |
2 | 20 | { |
| 21 | + internal static int MatchAllowSlow(ReadOnlySpan<byte> path, ReadOnlySpan<byte> pattern) |
| 22 | + { |
| 23 | + return MatchesSlow(path, pattern) ? pattern.Length : -1; |
| 24 | + } |
| 25 | + |
| 26 | + internal static int MatchDisallowSlow(ReadOnlySpan<byte> path, ReadOnlySpan<byte> pattern) |
| 27 | + { |
| 28 | + return MatchesSlow(path, pattern) ? pattern.Length : -1; |
| 29 | + } |
3 | 30 |
|
4 | | - /// <summary> |
5 | | - /// A RobotsMatchStrategy defines a strategy for matching individual lines in a |
6 | | - /// robots.txt file. Each Match* method should return a match priority, which is |
7 | | - /// interpreted as: |
8 | | - /// |
9 | | - /// match priority < 0: |
10 | | - /// No match. |
11 | | - /// |
12 | | - /// match priority == 0: |
13 | | - /// Match, but treat it as if matched an empty pattern. |
14 | | - /// |
15 | | - /// match priority > 0: |
16 | | - /// Match. |
17 | | - /// </summary> |
18 | | - internal static class LongestMatchRobotsMatchStrategy |
| 31 | + internal static bool MatchesSlow(ReadOnlySpan<byte> path, ReadOnlySpan<byte> pattern) |
19 | 32 | { |
20 | | - internal static int MatchAllow(ReadOnlySpan<byte> path, ReadOnlySpan<byte> pattern) |
| 33 | + var pathlen = path.Length; |
| 34 | + var pos = new int[pathlen + 1]; |
| 35 | + var numpos = 1; |
| 36 | + var patlen = pattern.Length; |
| 37 | + for (var j = 0; j < patlen; j++) |
21 | 38 | { |
22 | | - return Matches(path, pattern) ? pattern.Length : -1; |
| 39 | + var ch = pattern[j]; |
| 40 | + if (ch == '$' && j + 1 == patlen) |
| 41 | + { |
| 42 | + return pos[numpos - 1] == pathlen; |
| 43 | + } |
| 44 | + |
| 45 | + if (ch == '*') |
| 46 | + { |
| 47 | + numpos = pathlen - pos[0] + 1; |
| 48 | + for (var i = 1; i < numpos; i++) |
| 49 | + { |
| 50 | + pos[i] = pos[i - 1] + 1; |
| 51 | + } |
| 52 | + } |
| 53 | + else |
| 54 | + { |
| 55 | + // Includes '$' when not at end of pattern. |
| 56 | + var newnumpos = 0; |
| 57 | + for (var i = 0; i < numpos; i++) |
| 58 | + { |
| 59 | + if (pos[i] < pathlen && path[pos[i]] == ch) |
| 60 | + { |
| 61 | + pos[newnumpos++] = pos[i] + 1; |
| 62 | + } |
| 63 | + } |
| 64 | + |
| 65 | + numpos = newnumpos; |
| 66 | + if (numpos == 0) return false; |
| 67 | + } |
23 | 68 | } |
24 | 69 |
|
25 | | - internal static int MatchDisallow(ReadOnlySpan<byte> path, ReadOnlySpan<byte> pattern) |
| 70 | + return true; |
| 71 | + } |
| 72 | + |
| 73 | + [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| 74 | + internal static int MatchAllowFast(ReadOnlySpan<byte> path, ReadOnlySpan<byte> pattern, bool haveWildcards) |
| 75 | + { |
| 76 | + return MatchesFast(path, pattern, haveWildcards) ? pattern.Length : -1; |
| 77 | + } |
| 78 | + |
| 79 | + [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| 80 | + internal static int MatchDisallowFast(ReadOnlySpan<byte> path, ReadOnlySpan<byte> pattern, bool haveWildcards) |
| 81 | + { |
| 82 | + return MatchesFast(path, pattern, haveWildcards) ? pattern.Length : -1; |
| 83 | + } |
| 84 | + |
| 85 | + [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| 86 | + internal static bool MatchesFast(ReadOnlySpan<byte> path, ReadOnlySpan<byte> pattern, bool haveWildcards) |
| 87 | + { |
| 88 | + if (pattern.Length == 0) return true; |
| 89 | + if (path.Length == 0) return pattern.Length == 0; |
| 90 | + |
| 91 | + if (!haveWildcards) |
26 | 92 | { |
27 | | - return Matches(path, pattern) ? pattern.Length : -1; |
| 93 | + return path.IndexOf(pattern) != -1; |
28 | 94 | } |
29 | 95 |
|
30 | | - internal static bool Matches(ReadOnlySpan<byte> path, ReadOnlySpan<byte> pattern) |
| 96 | + Span<int> pos = stackalloc int[path.Length + 1]; |
| 97 | + var numpos = 1; |
| 98 | + |
| 99 | + for (var j = 0; j < pattern.Length; j++) |
31 | 100 | { |
32 | | - var pathlen = path.Length; |
33 | | - var pos = new int[pathlen + 1]; |
34 | | - int numpos = 1; |
35 | | - var patlen = pattern.Length; |
36 | | - for (var j = 0; j < patlen; j++) |
| 101 | + var ch = pattern[j]; |
| 102 | + |
| 103 | + // Check for end anchor |
| 104 | + if (ch == '$' && j + 1 == pattern.Length) |
| 105 | + { |
| 106 | + return pos[numpos - 1] == path.Length; |
| 107 | + } |
| 108 | + |
| 109 | + if (ch == '*') |
37 | 110 | { |
38 | | - var ch = pattern[j]; |
39 | | - if (ch == '$' && j + 1 == patlen) |
| 111 | + var startPos = pos[0]; |
| 112 | + numpos = path.Length - startPos + 1; |
| 113 | + |
| 114 | + for (var i = 0; i < numpos; i++) |
40 | 115 | { |
41 | | - return (pos[numpos - 1] == pathlen); |
| 116 | + pos[i] = startPos + i; |
42 | 117 | } |
| 118 | + } |
| 119 | + else |
| 120 | + { |
| 121 | + var newnumpos = 0; |
| 122 | + var pathLen = path.Length; |
43 | 123 |
|
44 | | - if (ch == '*') |
| 124 | + for (var i = 0; i < numpos && pos[i] < pathLen; i++) |
45 | 125 | { |
46 | | - numpos = pathlen - pos[0] + 1; |
47 | | - for (int i = 1; i < numpos; i++) |
| 126 | + if (path[pos[i]] == ch) |
48 | 127 | { |
49 | | - pos[i] = pos[i - 1] + 1; |
| 128 | + pos[newnumpos++] = pos[i] + 1; |
50 | 129 | } |
51 | 130 | } |
52 | | - else |
53 | | - { |
54 | | - // Includes '$' when not at end of pattern. |
55 | | - int newnumpos = 0; |
56 | | - for (int i = 0; i < numpos; i++) |
57 | | - { |
58 | | - if (pos[i] < pathlen && path[pos[i]] == ch) |
59 | | - { |
60 | | - pos[newnumpos++] = pos[i] + 1; |
61 | | - } |
62 | | - } |
63 | 131 |
|
64 | | - numpos = newnumpos; |
65 | | - if (numpos == 0) return false; |
66 | | - } |
| 132 | + if (newnumpos == 0) return false; |
| 133 | + numpos = newnumpos; |
67 | 134 | } |
68 | | - |
69 | | - return true; |
70 | 135 | } |
| 136 | + |
| 137 | + return true; |
71 | 138 | } |
72 | 139 | } |
0 commit comments