Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 63 additions & 50 deletions RobotsTxt/Extensions.cs
Original file line number Diff line number Diff line change
@@ -1,73 +1,86 @@
namespace RobotsTxt
namespace RobotsTxt;

public static class MyExtensions
{
public static class MyExtensions
#if !NETCOREAPP
public static bool Contains(this ReadOnlySpan<byte> self, byte other)
{
public static bool EqualsIgnoreCase(this ReadOnlySpan<byte> self, ReadOnlySpan<byte> other)
foreach (var c in self)
{
if (self.Length != other.Length)
{
return false;
}

for (var i = 0; i < self.Length; i++)
if (c == other)
{
var c1 = self[i];
var c2 = other[i];
if ('A' <= c1 && c1 <= 'Z')
c1 += 32;
if ('A' <= c2 && c2 <= 'Z')
c2 += 32;
if (c1 != c2)
{
return false;
}
return true;
}
}
return false;
}
#endif

return true;
public static bool EqualsIgnoreCase(this ReadOnlySpan<byte> self, ReadOnlySpan<byte> other)
{
if (self.Length != other.Length)
{
return false;
}

public static bool StartsWithIgnoreCase(this ReadOnlySpan<byte> span, ReadOnlySpan<byte> value)
for (var i = 0; i < self.Length; i++)
{
if (span.Length < value.Length)
var c1 = self[i];
var c2 = other[i];
if ('A' <= c1 && c1 <= 'Z')
c1 += 32;
if ('A' <= c2 && c2 <= 'Z')
c2 += 32;
if (c1 != c2)
{
return false;
}

for (var i = 0; i < value.Length; i++)
{
var c1 = span[i];
var c2 = value[i];
if ('A' <= c1 && c1 <= 'Z')
c1 += 32;
if ('A' <= c2 && c2 <= 'Z')
c2 += (byte)' ';
if (c1 != c2)
{
return false;
}
}

return true;
}

public static bool IsXDigit(this byte c)
{
return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F');
}
return true;
}

public static bool IsAlpha(this byte c)
public static bool StartsWithIgnoreCase(this ReadOnlySpan<byte> span, ReadOnlySpan<byte> value)
{
if (span.Length < value.Length)
{
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
return false;
}

public static bool IsSpace(this byte c)
for (var i = 0; i < value.Length; i++)
{
return c == ' ' || c == '\t';
var c1 = span[i];
var c2 = value[i];
if ('A' <= c1 && c1 <= 'Z')
c1 += 32;
if ('A' <= c2 && c2 <= 'Z')
c2 += (byte)' ';
if (c1 != c2)
{
return false;
}
}

public static byte ToUpper(this byte c)
{
return (byte)('a' <= c && c <= 'z' ? c - ' ' : c);
}
return true;
}

public static bool IsXDigit(this byte c)
{
return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F');
}

public static bool IsAlpha(this byte c)
{
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
}

public static bool IsSpace(this byte c)
{
return c == ' ' || c == '\t';
}

public static byte ToUpper(this byte c)
{
return (byte)('a' <= c && c <= 'z' ? c - ' ' : c);
}
}
21 changes: 10 additions & 11 deletions RobotsTxt/IRobotsParseHandler.cs
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
namespace RobotsTxt
namespace RobotsTxt;

public interface IRobotsParseHandler
{
public interface IRobotsParseHandler
{
void HandleRobotsStart();
void HandleRobotsEnd();
void HandleRobotsStart();
void HandleRobotsEnd();

void HandleUserAgent(int lineNum, ReadOnlySpan<byte> value);
void HandleAllow(int lineNum, ReadOnlySpan<byte> value);
void HandleDisallow(int lineNum, ReadOnlySpan<byte> value);
void HandleSitemap(int lineNum, ReadOnlySpan<byte> value);
void HandleUnknownAction(int lineNum, ReadOnlySpan<byte> action, ReadOnlySpan<byte> value);
}
void HandleUserAgent(int lineNum, ReadOnlySpan<byte> userAgent);
void HandleAllow(int lineNum, ReadOnlySpan<byte> value);
void HandleDisallow(int lineNum, ReadOnlySpan<byte> value);
void HandleSitemap(int lineNum, ReadOnlySpan<byte> value);
void HandleUnknownAction(int lineNum, ReadOnlySpan<byte> action, ReadOnlySpan<byte> value);
}
165 changes: 116 additions & 49 deletions RobotsTxt/LongestMatchRobotsMatchStrategy.cs
Original file line number Diff line number Diff line change
@@ -1,72 +1,139 @@
namespace RobotsTxt
using System.Runtime.CompilerServices;

namespace RobotsTxt;

/// <summary>
/// A RobotsMatchStrategy defines a strategy for matching individual lines in a
/// robots.txt file. Each Match* method should return a match priority, which is
/// interpreted as:
///
/// match priority &lt; 0:
/// No match.
///
/// match priority == 0:
/// Match, but treat it as if matched an empty pattern.
///
/// match priority &gt; 0:
/// Match.
/// </summary>
internal static class LongestMatchRobotsMatchStrategy
{
internal static int MatchAllowSlow(ReadOnlySpan<byte> path, ReadOnlySpan<byte> pattern)
{
return MatchesSlow(path, pattern) ? pattern.Length : -1;
}

internal static int MatchDisallowSlow(ReadOnlySpan<byte> path, ReadOnlySpan<byte> pattern)
{
return MatchesSlow(path, pattern) ? pattern.Length : -1;
}

/// <summary>
/// A RobotsMatchStrategy defines a strategy for matching individual lines in a
/// robots.txt file. Each Match* method should return a match priority, which is
/// interpreted as:
///
/// match priority &lt; 0:
/// No match.
///
/// match priority == 0:
/// Match, but treat it as if matched an empty pattern.
///
/// match priority &gt; 0:
/// Match.
/// </summary>
internal static class LongestMatchRobotsMatchStrategy
internal static bool MatchesSlow(ReadOnlySpan<byte> path, ReadOnlySpan<byte> pattern)
{
internal static int MatchAllow(ReadOnlySpan<byte> path, ReadOnlySpan<byte> pattern)
var pathlen = path.Length;
var pos = new int[pathlen + 1];
var numpos = 1;
var patlen = pattern.Length;
for (var j = 0; j < patlen; j++)
{
return Matches(path, pattern) ? pattern.Length : -1;
var ch = pattern[j];
if (ch == '$' && j + 1 == patlen)
{
return pos[numpos - 1] == pathlen;
}

if (ch == '*')
{
numpos = pathlen - pos[0] + 1;
for (var i = 1; i < numpos; i++)
{
pos[i] = pos[i - 1] + 1;
}
}
else
{
// Includes '$' when not at end of pattern.
var newnumpos = 0;
for (var i = 0; i < numpos; i++)
{
if (pos[i] < pathlen && path[pos[i]] == ch)
{
pos[newnumpos++] = pos[i] + 1;
}
}

numpos = newnumpos;
if (numpos == 0) return false;
}
}

internal static int MatchDisallow(ReadOnlySpan<byte> path, ReadOnlySpan<byte> pattern)
return true;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static int MatchAllowFast(ReadOnlySpan<byte> path, ReadOnlySpan<byte> pattern, bool haveWildcards)
{
return MatchesFast(path, pattern, haveWildcards) ? pattern.Length : -1;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static int MatchDisallowFast(ReadOnlySpan<byte> path, ReadOnlySpan<byte> pattern, bool haveWildcards)
{
return MatchesFast(path, pattern, haveWildcards) ? pattern.Length : -1;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool MatchesFast(ReadOnlySpan<byte> path, ReadOnlySpan<byte> pattern, bool haveWildcards)
{
if (pattern.Length == 0) return true;
if (path.Length == 0) return pattern.Length == 0;

if (!haveWildcards)
{
return Matches(path, pattern) ? pattern.Length : -1;
return path.IndexOf(pattern) != -1;
}

internal static bool Matches(ReadOnlySpan<byte> path, ReadOnlySpan<byte> pattern)
Span<int> pos = stackalloc int[path.Length + 1];
var numpos = 1;

for (var j = 0; j < pattern.Length; j++)
{
var pathlen = path.Length;
var pos = new int[pathlen + 1];
int numpos = 1;
var patlen = pattern.Length;
for (var j = 0; j < patlen; j++)
var ch = pattern[j];

// Check for end anchor
if (ch == '$' && j + 1 == pattern.Length)
{
return pos[numpos - 1] == path.Length;
}

if (ch == '*')
{
var ch = pattern[j];
if (ch == '$' && j + 1 == patlen)
var startPos = pos[0];
numpos = path.Length - startPos + 1;

for (var i = 0; i < numpos; i++)
{
return (pos[numpos - 1] == pathlen);
pos[i] = startPos + i;
}
}
else
{
var newnumpos = 0;
var pathLen = path.Length;

if (ch == '*')
for (var i = 0; i < numpos && pos[i] < pathLen; i++)
{
numpos = pathlen - pos[0] + 1;
for (int i = 1; i < numpos; i++)
if (path[pos[i]] == ch)
{
pos[i] = pos[i - 1] + 1;
pos[newnumpos++] = pos[i] + 1;
}
}
else
{
// Includes '$' when not at end of pattern.
int newnumpos = 0;
for (int i = 0; i < numpos; i++)
{
if (pos[i] < pathlen && path[pos[i]] == ch)
{
pos[newnumpos++] = pos[i] + 1;
}
}

numpos = newnumpos;
if (numpos == 0) return false;
}
if (newnumpos == 0) return false;
numpos = newnumpos;
}

return true;
}

return true;
}
}
Loading
Loading