From 3368de39c3bf90133f9d6ec75e1ae82f06cee55d Mon Sep 17 00:00:00 2001 From: Yucked Date: Thu, 20 Jun 2024 23:45:14 -0400 Subject: [PATCH] begin caching rooms, need to fix pagination --- Ankh.Backend/Workers/RoomCacheWorker.cs | 81 +++++++++++++++++++------ 1 file changed, 62 insertions(+), 19 deletions(-) diff --git a/Ankh.Backend/Workers/RoomCacheWorker.cs b/Ankh.Backend/Workers/RoomCacheWorker.cs index bd18896..864c5e2 100644 --- a/Ankh.Backend/Workers/RoomCacheWorker.cs +++ b/Ankh.Backend/Workers/RoomCacheWorker.cs @@ -1,35 +1,78 @@ -namespace Ankh.Backend.Workers; +using Ankh.Handlers; +using Nito.AsyncEx; -public sealed class RoomCacheWorker(Spyder spyder) : BackgroundService { +namespace Ankh.Backend.Workers; + +public sealed class RoomCacheWorker( + Spyder spyder, + RoomHandler roomHandler, + Database database, + ILogger logger) : BackgroundService { private const string ROOMS_URL = "https://www.imvu.com/rooms"; + private readonly HashSet _roomIds = new(); protected override async Task ExecuteAsync(CancellationToken stoppingToken) { var letterTracker = string.Empty; while (!stoppingToken.IsCancellationRequested) { - var requestUrl = string.IsNullOrWhiteSpace(letterTracker) - ? ROOMS_URL - : $"{ROOMS_URL}/?letter={letterTracker}"; - - var page = await spyder.RequestPageAsync(requestUrl); - var elements = await page.QuerySelectorAllAsync("span.letter-link > a"); - var hrefs = await Task.WhenAll(elements.Select(x => x.GetAttributeAsync("href"))); - - await Parallel.ForEachAsync(hrefs, stoppingToken, ParseRoomInfoAsync); + try { + var requestUrl = string.IsNullOrWhiteSpace(letterTracker) + ? ROOMS_URL + : $"{ROOMS_URL}/?letter={letterTracker}"; + + var page = await spyder.RequestPageAsync(requestUrl); + var elements = await page!.QuerySelectorAllAsync("span.letter-link > a"); + + await Parallel.ForEachAsync( + await elements.Select(x => x.GetAttributeAsync("href")).WhenAll(), + stoppingToken, + HandleLetterAsync); + + await Parallel.ForEachAsync(_roomIds, stoppingToken, async (x, _) => { + var roomModel = await roomHandler.GetRoomByIdAsync(Database.RandomLogin, x); + await database.SaveAsync(roomModel); + }); + } + catch (Exception exception) { + logger.LogError("{exception}", exception); + } } } - private async ValueTask ParseRoomInfoAsync(string? href, CancellationToken token) { + private async ValueTask HandleLetterAsync(string? href, CancellationToken stoppingToken) { if (string.IsNullOrWhiteSpace(href)) { return; } - var page = await spyder.RequestPageAsync($"{ROOMS_URL}/{href}"); - var nextPage = (await page.QuerySelectorAllAsync("div[align] > a")).Last(); - - var rooms = Task.WhenAll((await page.QuerySelectorAllAsync("a.roomdirectory-link")) - .Select(x => x.GetAttributeAsync("href"))); - - + try { + bool goToNext; + var requestUrl = $"{ROOMS_URL}/{href}"; + + do { + var page = (await spyder.RequestPageAsync(requestUrl))!; + + // Get next page + var nextPage = (await page.QuerySelectorAllAsync("div[align] > a")).LastOrDefault(); + if (nextPage == null) { + logger.LogError("Unable to find next page element for {}", page.Url); + return; + } + + goToNext = await nextPage.TextContentAsync() == "Previous"; + requestUrl = $"{ROOMS_URL}{await nextPage.GetAttributeAsync("href")}"; + + // Parse links on current page + var roomDirectoryElement = await page.QuerySelectorAllAsync("a.roomdirectory-link"); + var roomHrefElement = await roomDirectoryElement + .Select(x => x.GetAttributeAsync("href")) + .WhenAll(); + + var roomIds = roomHrefElement.Select(x => x![(x.IndexOf('=') + 1)..]); + _roomIds.UnionWith(roomIds); + } while (goToNext); + } + catch (Exception exception) { + logger.LogError("{exception}", exception); + } } } \ No newline at end of file