Skip to content

Commit de356f8

Browse files
authored
Merge pull request #43 from martial-god/40-please-add-noveldramacom-website
40 please add noveldrama.com website
2 parents b52696d + 7157169 commit de356f8

File tree

11 files changed

+269
-96
lines changed

11 files changed

+269
-96
lines changed

Benny-Scraper.BusinessLogic/Config/Selectors.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ public class Selectors
1818
public string? ChapterTitle { get; set; }
1919
public string? ChapterContent { get; set; }
2020
public string? AlternativeChapterContent { get; set; }
21-
public string? TableOfContnetsPaginationListItems { get; set; }
21+
public string? TableOfContentsPaginationListItems { get; set; }
2222
public string? ThumbnailUrlAttribute { get; set; }
2323
public string? ChapterContentImageUrlAttribute { get; set; }
2424
}

Benny-Scraper.BusinessLogic/HttpNovelScraper.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ void AddSupportForWebsite()
3232
AddSiteToMap("https://mangakakalot.to", new MangaKakalotStrategy());
3333
AddSiteToMap("https://mangareader.to", new MangaReaderStrategy());
3434
AddSiteToMap("https://mangakatana.com", new MangaKatanaStrategy());
35+
AddSiteToMap("https://noveldrama.com", new NovelDramaStrategy());
3536
}
3637
#endregion
3738

@@ -53,5 +54,15 @@ void AddSupportForWebsite()
5354
Logger.Error($"No scraper strategy found for {baseUrl}");
5455
return null;
5556
}
57+
58+
public List<string> GetSupportedSites()
59+
{
60+
var websites = new List<string>();
61+
foreach (var website in _websiteMap)
62+
{
63+
websites.Add(website.Key);
64+
}
65+
return websites;
66+
}
5667
}
5768
}

Benny-Scraper.BusinessLogic/Scrapers/Strategy/LightNovelWorldStrategy.cs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
using Benny_Scraper.BusinessLogic.Config;
1+
using System.Collections.Specialized;
2+
using System.Web;
23
using Benny_Scraper.BusinessLogic.Scrapers.Strategy.Impl;
34
using Benny_Scraper.Models;
45
using HtmlAgilityPack;
5-
using System.Collections.Specialized;
6-
using System.Web;
76

87
namespace Benny_Scraper.BusinessLogic.Scrapers.Strategy
98
{
@@ -17,11 +16,11 @@ public static void FetchNovelContent(NovelDataBuffer novelDataBuffer, HtmlDocume
1716
{
1817
Attr.Title,
1918
Attr.Author,
20-
Attr.Status,
19+
Attr.NovelStatus,
2120
Attr.Description,
2221
Attr.ThumbnailUrl,
2322
Attr.Genres,
24-
Attr.LatestChapter
23+
Attr.CurrentChapter
2524
};
2625
foreach (var attribute in attributesToFetch)
2726
{
@@ -43,7 +42,7 @@ public static void FetchNovelContent(NovelDataBuffer novelDataBuffer, HtmlDocume
4342
}
4443
public class LightNovelWorldStrategy : ScraperStrategy
4544
{
46-
private Uri? _chaptersUri;
45+
private Uri? _chaptersUri; // the url of the chapters pages are different from the table of contents page
4746
private readonly string _latestChapterXpath = "//*[@id='chapter-list-page']/header/p[2]/a";
4847

4948
public override async Task<NovelDataBuffer> ScrapeAsync()
@@ -101,7 +100,7 @@ private void SetCurrentChapterUrl(HtmlDocument htmlDocument, NovelDataBuffer nov
101100

102101
private int GetLastTableOfContentsPageNumber(HtmlDocument htmlDocument)
103102
{
104-
HtmlNodeCollection paginationNodes = htmlDocument.DocumentNode.SelectNodes(_scraperData.SiteConfig.Selectors.TableOfContnetsPaginationListItems);
103+
HtmlNodeCollection paginationNodes = htmlDocument.DocumentNode.SelectNodes(_scraperData.SiteConfig.Selectors.TableOfContentsPaginationListItems);
105104
int paginationCount = paginationNodes.Count;
106105

107106
int pageToStopAt = 1;

Benny-Scraper.BusinessLogic/Scrapers/Strategy/MangaKakalotStrategy.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@ public static async Task FetchNovelContentAsync(NovelDataBuffer novelDataBuffer,
2323
{
2424
Attr.Title,
2525
Attr.Author,
26-
Attr.Status,
26+
Attr.NovelStatus,
2727
Attr.Genres,
2828
Attr.AlternativeNames,
2929
Attr.Description,
3030
Attr.ThumbnailUrl,
3131
Attr.ChapterUrls,
32-
Attr.LatestChapter
32+
Attr.CurrentChapter
3333
};
3434

3535
foreach (var attribute in attributesToFetch)
@@ -45,7 +45,7 @@ public static async Task FetchNovelContentAsync(NovelDataBuffer novelDataBuffer,
4545
novelDataBuffer.FirstChapter = novelDataBuffer.ChapterUrls.First();
4646
}
4747
}
48-
else if (attribute == Attr.LatestChapter)
48+
else if (attribute == Attr.CurrentChapter)
4949
{
5050
FetchContentByAttribute(attribute, novelDataBuffer, htmlDocumentForChapterUrls, scraperData);
5151
novelDataBuffer.CurrentChapterUrl = new Uri(scraperData.BaseUri, novelDataBuffer.CurrentChapterUrl).ToString();

Benny-Scraper.BusinessLogic/Scrapers/Strategy/MangaKatanaStrategy.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@ public static async Task FetchNovelContentAsync(NovelDataBuffer novelDataBuffer,
2222
{
2323
Attr.Title,
2424
Attr.Author,
25-
Attr.Status,
25+
Attr.NovelStatus,
2626
Attr.Genres,
2727
Attr.AlternativeNames,
2828
Attr.Description,
2929
Attr.ThumbnailUrl,
3030
Attr.ChapterUrls,
31-
Attr.LatestChapter
31+
Attr.CurrentChapter
3232
};
3333

3434
foreach (var attribute in attributesToFetch)

Benny-Scraper.BusinessLogic/Scrapers/Strategy/MangaReaderStrategy.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@ public static async Task FetchNovelContentAsync(NovelDataBuffer novelDataBuffer,
2222
{
2323
Attr.Title,
2424
Attr.Author,
25-
Attr.Status,
25+
Attr.NovelStatus,
2626
Attr.Genres,
2727
Attr.AlternativeNames,
2828
Attr.Description,
2929
Attr.ThumbnailUrl,
3030
Attr.ChapterUrls,
31-
Attr.LatestChapter
31+
Attr.CurrentChapter
3232
};
3333

3434
foreach (var attribute in attributesToFetch)
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
using System.Globalization;
2+
using Benny_Scraper.BusinessLogic.Scrapers.Strategy.Impl;
3+
using Benny_Scraper.Models;
4+
using HtmlAgilityPack;
5+
6+
namespace Benny_Scraper.BusinessLogic.Scrapers.Strategy;
7+
8+
public class NovelDramaInitializer : NovelDataInitializer
9+
{
10+
public static void FetchNovelContent(NovelDataBuffer novelDataBuffer, HtmlDocument htmlDocument, ScraperData scraperData)
11+
{
12+
var tableOfContents = scraperData.SiteTableOfContents;
13+
var attributesToFetch = new List<Attr>()
14+
{
15+
Attr.Author,
16+
Attr.Title,
17+
Attr.NovelStatus,
18+
Attr.Genres,
19+
Attr.Description,
20+
Attr.ThumbnailUrl,
21+
Attr.LastTableOfContentsPage,
22+
Attr.FirstChapterUrl,
23+
Attr.CurrentChapter
24+
};
25+
26+
foreach (var attribute in attributesToFetch)
27+
{
28+
FetchContentByAttribute(attribute, novelDataBuffer, htmlDocument, scraperData);
29+
}
30+
31+
var fullCurrentChapterUrl = new Uri(tableOfContents, novelDataBuffer.CurrentChapterUrl?.TrimStart('/')).ToString();
32+
var fullThumbnailUrl = new Uri(tableOfContents, novelDataBuffer.ThumbnailUrl?.TrimStart('/')).ToString();
33+
var fullLastTableOfContentUrl = new Uri(tableOfContents, novelDataBuffer.LastTableOfContentsPageUrl?.TrimStart('/')).ToString();
34+
35+
novelDataBuffer.ThumbnailUrl = fullThumbnailUrl;
36+
novelDataBuffer.LastTableOfContentsPageUrl = fullCurrentChapterUrl;
37+
novelDataBuffer.LastTableOfContentsPageUrl = fullLastTableOfContentUrl;
38+
}
39+
}
40+
41+
public class NovelDramaStrategy : ScraperStrategy
42+
{
43+
private readonly string _lastTableOfContentsPageNumberXpath = "//*[@id='chapters']/div[2]/div[2]/div/div/input"; // This site does not have a last page button, so we have to get the last page number from the input box
44+
private readonly string _chapterMaxAttribute = "data-max";
45+
46+
public override async Task<NovelDataBuffer> ScrapeAsync()
47+
{
48+
Logger.Info($"Getting novel data for {this.GetType().Name}");
49+
SetBaseUri(_scraperData.SiteTableOfContents);
50+
var (htmlDocument, uri) = await LoadHtmlAsync(_scraperData.SiteTableOfContents);
51+
52+
try
53+
{
54+
NovelDataBuffer novelDataBuffer = await BuildNovelDataAsync(htmlDocument);
55+
novelDataBuffer.NovelUrl = uri.ToString();
56+
57+
return novelDataBuffer;
58+
}
59+
catch (Exception e)
60+
{
61+
Logger.Error($"Error while getting novel data. {e}");
62+
throw;
63+
}
64+
}
65+
66+
private async Task<NovelDataBuffer> BuildNovelDataAsync(HtmlDocument htmlDocument)
67+
{
68+
var novelDataBuffer = await FetchNovelDataFromTableOfContentsAsync(htmlDocument);
69+
70+
int pageToStopAt = FetchLastTableOfContentsPageNumber(htmlDocument);
71+
var (chapterUrls, lastTableOfContentsUrl) = await GetPaginatedChapterUrlsAsync(_scraperData.SiteTableOfContents, true, pageToStopAt);
72+
73+
novelDataBuffer.ChapterUrls = chapterUrls;
74+
novelDataBuffer.LastTableOfContentsPageUrl = lastTableOfContentsUrl; // this needs to be updated as it is not the same as what was set in FetchNovelDataFromTableOfContentsAsync
75+
return novelDataBuffer;
76+
}
77+
78+
public override NovelDataBuffer FetchNovelDataFromTableOfContents(HtmlDocument htmlDocument)
79+
{
80+
var novelDataBuffer = new NovelDataBuffer();
81+
try
82+
{
83+
NovelDramaInitializer.FetchNovelContent(novelDataBuffer, htmlDocument, _scraperData);
84+
return novelDataBuffer;
85+
}
86+
catch (Exception e)
87+
{
88+
Logger.Error($"Error occurred while getting novel data from table of contents. Error: {e}");
89+
}
90+
91+
return novelDataBuffer;
92+
}
93+
94+
private int FetchLastTableOfContentsPageNumber(HtmlDocument htmlDocument)
95+
{
96+
try
97+
{
98+
HtmlNode lastPageNode = htmlDocument.DocumentNode.SelectSingleNode(_lastTableOfContentsPageNumberXpath);
99+
string lastPage = lastPageNode.Attributes[_chapterMaxAttribute].Value;
100+
101+
int lastPageNumber = int.Parse(lastPage, NumberStyles.AllowThousands);
102+
103+
if (_scraperData.SiteConfig?.PageOffSet > 0)
104+
{
105+
lastPageNumber += _scraperData.SiteConfig.PageOffSet;
106+
}
107+
108+
Logger.Info($"Last table of contents page number is {lastPage}");
109+
return lastPageNumber;
110+
}
111+
catch (Exception e)
112+
{
113+
Logger.Error($"Error when getting last page table of contents page number. {e}");
114+
throw;
115+
}
116+
}
117+
}
Lines changed: 10 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
1-
using Benny_Scraper.BusinessLogic.Config;
1+
using Benny_Scraper.BusinessLogic.Scrapers.Strategy.Impl;
22
using Benny_Scraper.Models;
33
using HtmlAgilityPack;
4-
using System.Globalization;
5-
using NLog;
6-
using Benny_Scraper.BusinessLogic.Scrapers.Strategy.Impl;
74

85
namespace Benny_Scraper.BusinessLogic.Scrapers.Strategy
96
{
@@ -26,32 +23,29 @@ public static void FetchNovelContent(NovelDataBuffer novelDataBuffer, HtmlDocume
2623
Attr.Description,
2724
Attr.Genres,
2825
Attr.AlternativeNames,
29-
Attr.Status,
26+
Attr.NovelStatus,
3027
Attr.ThumbnailUrl,
3128
Attr.LastTableOfContentsPage,
3229
Attr.FirstChapterUrl,
33-
Attr.LatestChapter
30+
Attr.CurrentChapter
3431
};
32+
3533
foreach (var attribute in attributesToFetch)
3634
{
3735
FetchContentByAttribute(attribute, novelDataBuffer, htmlDocument, scraperData);
3836
}
3937

40-
//TODO: Brad: I notice that the name LatestChapter and CurrentChapter are both used to refer to the same thing.
41-
// As is, FetchContentByAttribute(Attr.LatestChapter ...) sets the NovelDataBuffer's CurrentChapterUrl property.
42-
// It is probably best if the two naming schemes are unified, but I don't want to change the data members
43-
// of NovelDataBuffer without consulting you first.
44-
var fullLatestChapterUrl = new Uri(tableOfContents, novelDataBuffer.CurrentChapterUrl?.TrimStart('/')).ToString();
38+
var fullCurrentChapterUrl = new Uri(tableOfContents, novelDataBuffer.CurrentChapterUrl?.TrimStart('/')).ToString();
4539
var fullThumbnailUrl = new Uri(tableOfContents, novelDataBuffer.ThumbnailUrl?.TrimStart('/')).ToString();
4640
var fullLastTableOfContentUrl = new Uri(tableOfContents, novelDataBuffer.LastTableOfContentsPageUrl?.TrimStart('/')).ToString();
4741

4842
novelDataBuffer.ThumbnailUrl = fullThumbnailUrl;
49-
novelDataBuffer.LastTableOfContentsPageUrl = fullLatestChapterUrl;
43+
novelDataBuffer.LastTableOfContentsPageUrl = fullCurrentChapterUrl;
5044
novelDataBuffer.LastTableOfContentsPageUrl = fullLastTableOfContentUrl;
5145
}
5246
}
5347
}
54-
48+
5549
public class NovelFullStrategy : ScraperStrategy
5650
{
5751
public override async Task<NovelDataBuffer> ScrapeAsync()
@@ -60,7 +54,7 @@ public override async Task<NovelDataBuffer> ScrapeAsync()
6054

6155
SetBaseUri(_scraperData.SiteTableOfContents);
6256
var (htmlDocument, uri) = await LoadHtmlAsync(_scraperData.SiteTableOfContents);
63-
57+
6458
try
6559
{
6660
NovelDataBuffer novelDataBuffer = await BuildNovelDataAsync(htmlDocument);
@@ -78,8 +72,8 @@ public override async Task<NovelDataBuffer> ScrapeAsync()
7872
private async Task<NovelDataBuffer> BuildNovelDataAsync(HtmlDocument htmlDocument)
7973
{
8074
var novelDataBuffer = FetchNovelDataFromTableOfContents(htmlDocument);
75+
int pageToStopAt = GetPageNumberFromUrlQuery(novelDataBuffer.LastTableOfContentsPageUrl, _scraperData.BaseUri);
8176

82-
int pageToStopAt = FetchLastTableOfContentsPageNumber(htmlDocument);
8377
var (chapterUrls, lastTableOfContentsUrl) = await GetPaginatedChapterUrlsAsync(_scraperData.SiteTableOfContents, true, pageToStopAt);
8478

8579
novelDataBuffer.ChapterUrls = chapterUrls;
@@ -103,31 +97,5 @@ public override NovelDataBuffer FetchNovelDataFromTableOfContents(HtmlDocument h
10397

10498
return novelDataBuffer;
10599
}
106-
107-
private int FetchLastTableOfContentsPageNumber(HtmlDocument htmlDocument)
108-
{
109-
Logger.Info($"Getting last table of contents page number at {_scraperData.SiteConfig?.Selectors.LastTableOfContentsPage}");
110-
try
111-
{
112-
HtmlNode lastPageNode = htmlDocument.DocumentNode.SelectSingleNode(_scraperData.SiteConfig?.Selectors.LastTableOfContentsPage);
113-
string lastPage = lastPageNode.Attributes[_scraperData.SiteConfig?.Selectors.LastTableOfContentPageNumberAttribute].Value;
114-
115-
int lastPageNumber = int.Parse(lastPage, NumberStyles.AllowThousands);
116-
117-
if (_scraperData.SiteConfig?.PageOffSet > 0)
118-
{
119-
lastPageNumber += _scraperData.SiteConfig.PageOffSet;
120-
}
121-
122-
Logger.Info($"Last table of contents page number is {lastPage}");
123-
return lastPageNumber;
124-
}
125-
catch (Exception e)
126-
{
127-
Logger.Error($"Error when getting last page table of contents page number. {e}");
128-
throw;
129-
}
130-
}
131-
132100
}
133-
}
101+
}

0 commit comments

Comments
 (0)