Skip to content

Commit

Permalink
Redlib - Extract created date and use for RedditGameEntry
Browse files Browse the repository at this point in the history
- Extracted the created date from the Redlib HTML response.
- Added a new field `Date` to `RedlibGameEntry` to store the extracted date.
- Updated `RedlibHtmlParser.ParseGamesFromHtml` to return a collection of `RedlibGameEntry` with the populated `Date` field.
- Modified `RedlibGameEntry.ToRedditGameEntry` to use the provided `Date` when available, falling back to the current time otherwise.

This commit improves data accuracy and enables the use of the created date in the `RedditGameEntry` object.
  • Loading branch information
maxisoft committed Sep 11, 2024
1 parent 8230a1f commit 69569ec
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 10 deletions.
6 changes: 5 additions & 1 deletion ASFFreeGames.Tests/Redlib/RedlibHtmlParserTests.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
using System.Collections.Generic;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Text;
using System.Threading.Tasks;
Expand All @@ -19,6 +21,8 @@ public async void Test() {
Assert.NotEmpty(result);
Assert.Equal(25, result.Count);

Assert.Equal(new DateTimeOffset(2024, 6, 1, 23, 43, 40, TimeSpan.Zero), result.Skip(1).FirstOrDefault().Date);

// ReSharper disable once ArgumentsStyleLiteral
result = RedlibHtmlParser.ParseGamesFromHtml(html, dedup: true);
Assert.NotEmpty(result);
Expand Down
19 changes: 17 additions & 2 deletions ASFFreeGames/FreeGames/Strategies/RedlibListFreeGamesStrategy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ public async Task<IReadOnlyCollection<RedditGameEntry>> GetGames([NotNull] ListF
private async Task<IReadOnlyCollection<RedditGameEntry>> DoDownloadUsingInstance(SimpleHttpClient client, Uri uri, CancellationToken cancellationToken) {
await DownloadSemaphore.WaitAsync(cancellationToken).ConfigureAwait(false);
string content;
DateTimeOffset date = default;

try {
#pragma warning disable CAC001
Expand All @@ -101,16 +102,30 @@ private async Task<IReadOnlyCollection<RedditGameEntry>> DoDownloadUsingInstance
}
else {
content = await resp.ReadAsStringAsync(cancellationToken).ConfigureAwait(false);

// read the date using response headers
try {
date = resp.Response.Headers.Date ?? date;
}
catch (Exception e) when (e is MethodAccessException or TypeLoadException or MemberAccessException) {
// ignored
}
}
}
finally {
DownloadSemaphore.Release();
}

IReadOnlyCollection<RedlibGameEntry> entries = RedlibHtmlParser.ParseGamesFromHtml(content);
long now = DateTimeOffset.Now.ToUnixTimeMilliseconds(); // TODO read the date from the response's content
DateTimeOffset now = DateTimeOffset.Now;

if ((date == default(DateTimeOffset)) || ((now - date).Duration() > TimeSpan.FromDays(1))) {
date = now;
}

long dateMillis = date.ToUnixTimeMilliseconds();

return entries.Select(entry => entry.ToRedditGameEntry(now)).ToArray();
return entries.Select(entry => entry.ToRedditGameEntry(dateMillis)).ToArray();
}

private async Task<IReadOnlyCollection<RedditGameEntry>> DownloadUsingInstance(SimpleHttpClient client, Uri uri, uint retry, CancellationToken cancellationToken) {
Expand Down
2 changes: 1 addition & 1 deletion ASFFreeGames/Redlib/Html/ParserIndices.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
namespace Maxisoft.ASF.Redlib.Html;

internal readonly record struct ParserIndices(int StartOfCommandIndex, int EndOfCommandIndex, int StartOfFooterIndex, int HrefStartIndex, int HrefEndIndex);
internal readonly record struct ParserIndices(int StartOfCommandIndex, int EndOfCommandIndex, int StartOfFooterIndex, int HrefStartIndex, int HrefEndIndex, int DateStartIndex, int DateEndIndex);
69 changes: 66 additions & 3 deletions ASFFreeGames/Redlib/Html/RedditHtmlParser.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using ASFFreeGames.ASFExtentions.Games;
using Maxisoft.ASF.Reddit;
using Maxisoft.Utils.Collections.Dictionaries;
Expand All @@ -22,7 +23,7 @@ public static IReadOnlyCollection<RedlibGameEntry> ParseGamesFromHtml(ReadOnlySp
try {
indices = ParseIndices(html, startIndex);

(int startOfCommandIndex, int endOfCommandIndex, int _, _, _) = indices;
(int startOfCommandIndex, int endOfCommandIndex, int _, _, _, _, _) = indices;

ReadOnlySpan<char> command = html[startOfCommandIndex..endOfCommandIndex].Trim();

Expand All @@ -39,7 +40,18 @@ public static IReadOnlyCollection<RedlibGameEntry> ParseGamesFromHtml(ReadOnlySp
EGameType flag = ParseGameTypeFlags(html[indices.StartOfCommandIndex..indices.StartOfFooterIndex]);

ReadOnlySpan<char> title = ExtractTitle(html, indices);
RedlibGameEntry entry = new(effectiveGameIdentifiers.ToArray(), title.ToString(), flag);

DateTimeOffset createdDate = default;

if ((indices.DateStartIndex < indices.DateEndIndex) && (indices.DateEndIndex > 0)) {
ReadOnlySpan<char> dateString = html[indices.DateStartIndex..indices.DateEndIndex].Trim();

if (!TryParseCreatedDate(dateString, out createdDate)) {
createdDate = default(DateTimeOffset);
}
}

RedlibGameEntry entry = new(effectiveGameIdentifiers.ToArray(), title.ToString(), flag, createdDate);

try {
entries.Add(entry, default(EmptyStruct));
Expand All @@ -60,6 +72,32 @@ public static IReadOnlyCollection<RedlibGameEntry> ParseGamesFromHtml(ReadOnlySp
return (IReadOnlyCollection<RedlibGameEntry>) entries.Keys;
}

private static readonly string[] CommonDateFormat = ["MM dd yyyy, HH:mm:ss zzz", "MM dd yyyy, HH:mm:ss zzz", "MMM dd yyyy, HH:mm:ss UTC", "yyyy-MM-ddTHH:mm:ssZ", "yyyy-MM-ddTHH:mm:ss", "yyyy-MM-dd HH:mm:ss zzz", "yyyy-MM-dd HH:mm:ss.fffffff zzz", "yyyy-MM-ddTHH:mm:ss.fffffffzzz", "yyyy-MM-dd HH:mm:ss", "yyyyMMddHHmmss", "yyyyMMddHHmmss.fffffff"];

private static bool TryParseCreatedDate(ReadOnlySpan<char> dateString, out DateTimeOffset createdDate) {
// parse date like May 31 2024, 12:28:53 UTC

if (dateString.IsEmpty) {
createdDate = DateTimeOffset.Now;

return false;
}

foreach (string format in CommonDateFormat) {
if (DateTimeOffset.TryParseExact(dateString, format, DateTimeFormatInfo.InvariantInfo, DateTimeStyles.AssumeUniversal | DateTimeStyles.AllowWhiteSpaces, out createdDate)) {
return true;
}
}

if (DateTimeOffset.TryParse(dateString, DateTimeFormatInfo.InvariantInfo, out createdDate)) {
return true;
}

createdDate = DateTimeOffset.Now;

return false;
}

internal static ReadOnlySpan<char> ExtractTitle(ReadOnlySpan<char> html, ParserIndices indices) {
Span<Range> ranges = stackalloc Range[MaxIdentifierPerEntry];
ReadOnlySpan<char> hrefSpan = html[indices.HrefStartIndex..indices.HrefEndIndex];
Expand Down Expand Up @@ -114,6 +152,31 @@ internal static ParserIndices ParseIndices(ReadOnlySpan<char> html, int start) {

commentLinkIndex += start;

int createdStartIndex = html[commentLinkIndex..startIndex].IndexOf("<span class=\"created\"", StringComparison.InvariantCultureIgnoreCase);

if (createdStartIndex < 0) {
throw new SkipAndContinueParsingException("No created span found") { StartIndex = startIndex + 1 };
}

createdStartIndex += commentLinkIndex;

const string title = "title=\"";
int createdTitleStartIndex = html[createdStartIndex..startIndex].IndexOf(title, StringComparison.InvariantCultureIgnoreCase);

if (createdTitleStartIndex < 0) {
throw new SkipAndContinueParsingException("No created title attribute found") { StartIndex = startIndex + 1 };
}

createdTitleStartIndex += createdStartIndex + title.Length;

int createdTitleEndIndex = html[createdTitleStartIndex..startIndex].IndexOf("\"", StringComparison.InvariantCultureIgnoreCase);

if (createdTitleEndIndex < 0) {
throw new SkipAndContinueParsingException("No created title attribute end found") { StartIndex = startIndex + 1 };
}

createdTitleEndIndex += createdTitleStartIndex;

int hrefStartIndex = html[commentLinkIndex..startIndex].IndexOf("href", StringComparison.InvariantCultureIgnoreCase);

if (hrefStartIndex < 0) {
Expand Down Expand Up @@ -170,7 +233,7 @@ internal static ParserIndices ParseIndices(ReadOnlySpan<char> html, int start) {

startIndex = html[startIndex..commandEndIndex].IndexOf("!addlicense", StringComparison.OrdinalIgnoreCase) + startIndex;

return new ParserIndices(startIndex, commandEndIndex, infoFooterStartIndex, hrefStartIndex, hrefEndIndex);
return new ParserIndices(startIndex, commandEndIndex, infoFooterStartIndex, hrefStartIndex, hrefEndIndex, createdTitleStartIndex, createdTitleEndIndex);
}

internal static Span<GameIdentifier> SplitCommandAndGetGameIdentifiers(ReadOnlySpan<char> command, Span<GameIdentifier> gameIdentifiers) {
Expand Down
14 changes: 11 additions & 3 deletions ASFFreeGames/Redlib/RedlibGameEntry.cs
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
using System.Collections.Generic;
using System;
using System.Collections.Generic;
using ASFFreeGames.ASFExtentions.Games;
using Maxisoft.ASF.Reddit;

// ReSharper disable once CheckNamespace
namespace Maxisoft.ASF.Redlib;

#pragma warning disable CA1819

public readonly record struct RedlibGameEntry(IReadOnlyCollection<GameIdentifier> GameIdentifiers, string CommentLink, EGameType TypeFlags) {
public RedditGameEntry ToRedditGameEntry(long date = default) => new(string.Join(',', GameIdentifiers), TypeFlags.ToRedditGameEntryKind(), date);
public readonly record struct RedlibGameEntry(IReadOnlyCollection<GameIdentifier> GameIdentifiers, string CommentLink, EGameType TypeFlags, DateTimeOffset Date) {
public RedditGameEntry ToRedditGameEntry(long date = default) {
if ((Date != default(DateTimeOffset)) && (Date != DateTimeOffset.MinValue)) {
date = Date.ToUnixTimeMilliseconds();
}

return new RedditGameEntry(string.Join(',', GameIdentifiers), TypeFlags.ToRedditGameEntryKind(), date);
}
}

#pragma warning restore CA1819

0 comments on commit 69569ec

Please sign in to comment.