From 69569ec81d5418cf8dfede8e7bf523642a8058f0 Mon Sep 17 00:00:00 2001 From: maxisoft Date: Wed, 11 Sep 2024 12:55:51 +0200 Subject: [PATCH] Redlib - Extract created date and use for RedditGameEntry - Extracted the created date from the Redlib HTML response. - Added a new field `Date` to `RedlibGameEntry` to store the extracted date. - Updated `RedlibHtmlParser.ParseGamesFromHtml` to return a collection of `RedlibGameEntry` with the populated `Date` field. - Modified `RedlibGameEntry.ToRedditGameEntry` to use the provided `Date` when available, falling back to the current time otherwise. This commit improves data accuracy and enables the use of the created date in the `RedditGameEntry` object. --- .../Redlib/RedlibHtmlParserTests.cs | 6 +- .../Strategies/RedlibListFreeGamesStrategy.cs | 19 ++++- ASFFreeGames/Redlib/Html/ParserIndices.cs | 2 +- ASFFreeGames/Redlib/Html/RedditHtmlParser.cs | 69 ++++++++++++++++++- ASFFreeGames/Redlib/RedlibGameEntry.cs | 14 +++- 5 files changed, 100 insertions(+), 10 deletions(-) diff --git a/ASFFreeGames.Tests/Redlib/RedlibHtmlParserTests.cs b/ASFFreeGames.Tests/Redlib/RedlibHtmlParserTests.cs index 8b8189f..7b43d9c 100644 --- a/ASFFreeGames.Tests/Redlib/RedlibHtmlParserTests.cs +++ b/ASFFreeGames.Tests/Redlib/RedlibHtmlParserTests.cs @@ -1,5 +1,7 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.IO; +using System.Linq; using System.Reflection; using System.Text; using System.Threading.Tasks; @@ -19,6 +21,8 @@ public async void Test() { Assert.NotEmpty(result); Assert.Equal(25, result.Count); + Assert.Equal(new DateTimeOffset(2024, 6, 1, 23, 43, 40, TimeSpan.Zero), result.Skip(1).FirstOrDefault().Date); + // ReSharper disable once ArgumentsStyleLiteral result = RedlibHtmlParser.ParseGamesFromHtml(html, dedup: true); Assert.NotEmpty(result); diff --git a/ASFFreeGames/FreeGames/Strategies/RedlibListFreeGamesStrategy.cs b/ASFFreeGames/FreeGames/Strategies/RedlibListFreeGamesStrategy.cs index e996d1a..839bd39 100644 --- a/ASFFreeGames/FreeGames/Strategies/RedlibListFreeGamesStrategy.cs +++ b/ASFFreeGames/FreeGames/Strategies/RedlibListFreeGamesStrategy.cs @@ -79,6 +79,7 @@ public async Task> GetGames([NotNull] ListF private async Task> DoDownloadUsingInstance(SimpleHttpClient client, Uri uri, CancellationToken cancellationToken) { await DownloadSemaphore.WaitAsync(cancellationToken).ConfigureAwait(false); string content; + DateTimeOffset date = default; try { #pragma warning disable CAC001 @@ -101,6 +102,14 @@ private async Task> DoDownloadUsingInstance } else { content = await resp.ReadAsStringAsync(cancellationToken).ConfigureAwait(false); + + // read the date using response headers + try { + date = resp.Response.Headers.Date ?? date; + } + catch (Exception e) when (e is MethodAccessException or TypeLoadException or MemberAccessException) { + // ignored + } } } finally { @@ -108,9 +117,15 @@ private async Task> DoDownloadUsingInstance } IReadOnlyCollection entries = RedlibHtmlParser.ParseGamesFromHtml(content); - long now = DateTimeOffset.Now.ToUnixTimeMilliseconds(); // TODO read the date from the response's content + DateTimeOffset now = DateTimeOffset.Now; + + if ((date == default(DateTimeOffset)) || ((now - date).Duration() > TimeSpan.FromDays(1))) { + date = now; + } + + long dateMillis = date.ToUnixTimeMilliseconds(); - return entries.Select(entry => entry.ToRedditGameEntry(now)).ToArray(); + return entries.Select(entry => entry.ToRedditGameEntry(dateMillis)).ToArray(); } private async Task> DownloadUsingInstance(SimpleHttpClient client, Uri uri, uint retry, CancellationToken cancellationToken) { diff --git a/ASFFreeGames/Redlib/Html/ParserIndices.cs b/ASFFreeGames/Redlib/Html/ParserIndices.cs index 0c13b63..bbfe81d 100644 --- a/ASFFreeGames/Redlib/Html/ParserIndices.cs +++ b/ASFFreeGames/Redlib/Html/ParserIndices.cs @@ -1,3 +1,3 @@ namespace Maxisoft.ASF.Redlib.Html; -internal readonly record struct ParserIndices(int StartOfCommandIndex, int EndOfCommandIndex, int StartOfFooterIndex, int HrefStartIndex, int HrefEndIndex); +internal readonly record struct ParserIndices(int StartOfCommandIndex, int EndOfCommandIndex, int StartOfFooterIndex, int HrefStartIndex, int HrefEndIndex, int DateStartIndex, int DateEndIndex); diff --git a/ASFFreeGames/Redlib/Html/RedditHtmlParser.cs b/ASFFreeGames/Redlib/Html/RedditHtmlParser.cs index c628e05..4003369 100644 --- a/ASFFreeGames/Redlib/Html/RedditHtmlParser.cs +++ b/ASFFreeGames/Redlib/Html/RedditHtmlParser.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Diagnostics; +using System.Globalization; using ASFFreeGames.ASFExtentions.Games; using Maxisoft.ASF.Reddit; using Maxisoft.Utils.Collections.Dictionaries; @@ -22,7 +23,7 @@ public static IReadOnlyCollection ParseGamesFromHtml(ReadOnlySp try { indices = ParseIndices(html, startIndex); - (int startOfCommandIndex, int endOfCommandIndex, int _, _, _) = indices; + (int startOfCommandIndex, int endOfCommandIndex, int _, _, _, _, _) = indices; ReadOnlySpan command = html[startOfCommandIndex..endOfCommandIndex].Trim(); @@ -39,7 +40,18 @@ public static IReadOnlyCollection ParseGamesFromHtml(ReadOnlySp EGameType flag = ParseGameTypeFlags(html[indices.StartOfCommandIndex..indices.StartOfFooterIndex]); ReadOnlySpan title = ExtractTitle(html, indices); - RedlibGameEntry entry = new(effectiveGameIdentifiers.ToArray(), title.ToString(), flag); + + DateTimeOffset createdDate = default; + + if ((indices.DateStartIndex < indices.DateEndIndex) && (indices.DateEndIndex > 0)) { + ReadOnlySpan dateString = html[indices.DateStartIndex..indices.DateEndIndex].Trim(); + + if (!TryParseCreatedDate(dateString, out createdDate)) { + createdDate = default(DateTimeOffset); + } + } + + RedlibGameEntry entry = new(effectiveGameIdentifiers.ToArray(), title.ToString(), flag, createdDate); try { entries.Add(entry, default(EmptyStruct)); @@ -60,6 +72,32 @@ public static IReadOnlyCollection ParseGamesFromHtml(ReadOnlySp return (IReadOnlyCollection) entries.Keys; } + private static readonly string[] CommonDateFormat = ["MM dd yyyy, HH:mm:ss zzz", "MM dd yyyy, HH:mm:ss zzz", "MMM dd yyyy, HH:mm:ss UTC", "yyyy-MM-ddTHH:mm:ssZ", "yyyy-MM-ddTHH:mm:ss", "yyyy-MM-dd HH:mm:ss zzz", "yyyy-MM-dd HH:mm:ss.fffffff zzz", "yyyy-MM-ddTHH:mm:ss.fffffffzzz", "yyyy-MM-dd HH:mm:ss", "yyyyMMddHHmmss", "yyyyMMddHHmmss.fffffff"]; + + private static bool TryParseCreatedDate(ReadOnlySpan dateString, out DateTimeOffset createdDate) { + // parse date like May 31 2024, 12:28:53 UTC + + if (dateString.IsEmpty) { + createdDate = DateTimeOffset.Now; + + return false; + } + + foreach (string format in CommonDateFormat) { + if (DateTimeOffset.TryParseExact(dateString, format, DateTimeFormatInfo.InvariantInfo, DateTimeStyles.AssumeUniversal | DateTimeStyles.AllowWhiteSpaces, out createdDate)) { + return true; + } + } + + if (DateTimeOffset.TryParse(dateString, DateTimeFormatInfo.InvariantInfo, out createdDate)) { + return true; + } + + createdDate = DateTimeOffset.Now; + + return false; + } + internal static ReadOnlySpan ExtractTitle(ReadOnlySpan html, ParserIndices indices) { Span ranges = stackalloc Range[MaxIdentifierPerEntry]; ReadOnlySpan hrefSpan = html[indices.HrefStartIndex..indices.HrefEndIndex]; @@ -114,6 +152,31 @@ internal static ParserIndices ParseIndices(ReadOnlySpan html, int start) { commentLinkIndex += start; + int createdStartIndex = html[commentLinkIndex..startIndex].IndexOf(" html, int start) { startIndex = html[startIndex..commandEndIndex].IndexOf("!addlicense", StringComparison.OrdinalIgnoreCase) + startIndex; - return new ParserIndices(startIndex, commandEndIndex, infoFooterStartIndex, hrefStartIndex, hrefEndIndex); + return new ParserIndices(startIndex, commandEndIndex, infoFooterStartIndex, hrefStartIndex, hrefEndIndex, createdTitleStartIndex, createdTitleEndIndex); } internal static Span SplitCommandAndGetGameIdentifiers(ReadOnlySpan command, Span gameIdentifiers) { diff --git a/ASFFreeGames/Redlib/RedlibGameEntry.cs b/ASFFreeGames/Redlib/RedlibGameEntry.cs index 2ed3b38..3bb73d2 100644 --- a/ASFFreeGames/Redlib/RedlibGameEntry.cs +++ b/ASFFreeGames/Redlib/RedlibGameEntry.cs @@ -1,13 +1,21 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using ASFFreeGames.ASFExtentions.Games; using Maxisoft.ASF.Reddit; +// ReSharper disable once CheckNamespace namespace Maxisoft.ASF.Redlib; #pragma warning disable CA1819 -public readonly record struct RedlibGameEntry(IReadOnlyCollection GameIdentifiers, string CommentLink, EGameType TypeFlags) { - public RedditGameEntry ToRedditGameEntry(long date = default) => new(string.Join(',', GameIdentifiers), TypeFlags.ToRedditGameEntryKind(), date); +public readonly record struct RedlibGameEntry(IReadOnlyCollection GameIdentifiers, string CommentLink, EGameType TypeFlags, DateTimeOffset Date) { + public RedditGameEntry ToRedditGameEntry(long date = default) { + if ((Date != default(DateTimeOffset)) && (Date != DateTimeOffset.MinValue)) { + date = Date.ToUnixTimeMilliseconds(); + } + + return new RedditGameEntry(string.Join(',', GameIdentifiers), TypeFlags.ToRedditGameEntryKind(), date); + } } #pragma warning restore CA1819