Skip to content

Commit

Permalink
Merge pull request #16 from dutchmega/master
Browse files Browse the repository at this point in the history
RoyalRoadL improvements
  • Loading branch information
Mitch528 committed May 24, 2016
2 parents c845d66 + cc828d8 commit 5d66083
Show file tree
Hide file tree
Showing 8 changed files with 234 additions and 100 deletions.
163 changes: 83 additions & 80 deletions WebNovelConverter/MainForm.Designer.cs

Large diffs are not rendered by default.

22 changes: 18 additions & 4 deletions WebNovelConverter/MainForm.cs
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,8 @@ private async void retrieveBackgroundWorker_DoWork(object sender, DoWorkEventArg
modeSelectedText = "http://" + modeSelectedText;

WebNovelSource source = GetSource(modeSelectedText, type);
string coverUrl = await source.GetNovelCoverAsync(modeSelectedText);
coverUrl = coverUrl.StartsWith("//") ? coverUrl.Substring(2) : coverUrl;

WebNovelInfo novelInfo = await source.GetNovelInfoAsync(modeSelectedText);

if (mode == "table of contents")
{
Expand All @@ -270,8 +270,22 @@ private async void retrieveBackgroundWorker_DoWork(object sender, DoWorkEventArg
}
}

if (!string.IsNullOrEmpty(coverUrl))
coverTextBox.Text = new UriBuilder(coverUrl).Uri.AbsoluteUri;
if (novelInfo != null)
{
if (!string.IsNullOrEmpty(novelInfo.CoverUrl))
{
try
{
string coverUrl = novelInfo.CoverUrl;
coverUrl = coverUrl.StartsWith("//") ? coverUrl.Substring(2) : coverUrl;
coverTextBox.Text = new UriBuilder(coverUrl).Uri.AbsoluteUri;
}
catch (UriFormatException) { }
}

if (!string.IsNullOrEmpty(novelInfo.Title))
titleTextBox.Text = novelInfo.Title;
}

progressBar.Visible = false;
retrieveButton.Enabled = true;
Expand Down
35 changes: 33 additions & 2 deletions WebNovelConverter/Sources/BakaTsukiSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
using AngleSharp.Dom.Html;
using AngleSharp.Extensions;
using WebNovelConverter.Sources.Models;
using System.Text.RegularExpressions;

namespace WebNovelConverter.Sources
{
Expand Down Expand Up @@ -82,9 +83,39 @@ protected override IEnumerable<ChapterLink> CollectChapterLinks(string baseUrl,
}
}

public override Task<string> GetNovelCoverAsync(string baseUrl, CancellationToken token = default(CancellationToken))
public override async Task<WebNovelInfo> GetNovelInfoAsync(string baseUrl, CancellationToken token = default(CancellationToken))
{
return Task.FromResult(string.Empty);
string baseContent = await GetWebPageAsync(baseUrl, token);

IHtmlDocument doc = await Parser.ParseAsync(baseContent, token);

var title = doc.QuerySelector("h1#firstHeading span")?.TextContent;

string coverUrl = null;
var coverUrlEl = doc.QuerySelector("div.thumb a.image img.thumbimage[src*=cover]");
if( coverUrlEl != null)
{
coverUrl = coverUrlEl.Attributes["src"].Value;

// Bigger thumbnail
if(coverUrl.Contains("width=") && coverUrlEl.HasAttribute("data-file-width"))
{
var width = Math.Min(int.Parse(coverUrlEl.Attributes["data-file-width"].Value)-1, 500);
coverUrl = Regex.Replace(coverUrl, @"width\=([0-9]+)", "width=" + width);
}

// Make URL absolute
if( coverUrl.StartsWith("/"))
{
coverUrl = new Uri(new Uri(baseUrl), coverUrl).AbsoluteUri;
}
}

return new WebNovelInfo()
{
Title = title,
CoverUrl = coverUrl
};
}

public override async Task<WebNovelChapter> GetChapterAsync(ChapterLink link,
Expand Down
14 changes: 14 additions & 0 deletions WebNovelConverter/Sources/Models/WebNovelInfo.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace WebNovelConverter.Sources.Models
{
public class WebNovelInfo
{
public string Title { get; set; }
public string CoverUrl { get; set; }
}
}
85 changes: 78 additions & 7 deletions WebNovelConverter/Sources/RoyalRoadLSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using AngleSharp.Dom.Html;
using AngleSharp.Extensions;
using WebNovelConverter.Sources.Models;
using System.Text.RegularExpressions;

namespace WebNovelConverter.Sources
{
Expand Down Expand Up @@ -65,37 +66,107 @@ public override async Task<WebNovelChapter> GetChapterAsync(ChapterLink link,

IHtmlDocument doc = await Parser.ParseAsync(pageContent, token);

IElement firstPostElement = (from e in doc.All
IElement postBodyEl = (from e in doc.All
where e.LocalName == "div"
where e.HasAttribute("class")
let classAttribute = e.GetAttribute("class")
where classAttribute.Contains("post_body")
select e).FirstOrDefault();

if (firstPostElement == null)
if (postBodyEl == null)
return null;

RemoveNavigation(firstPostElement);
RemoveNavigation(postBodyEl);
RemoveDonation(postBodyEl);
ExpandSpoilers(postBodyEl);
RemoveEmpyTags(postBodyEl);

var content = CleanupHTML(postBodyEl.InnerHtml);

return new WebNovelChapter
{
Url = link.Url,
Content = firstPostElement.InnerHtml
Content = content
};
}

public override async Task<string> GetNovelCoverAsync(string baseUrl, CancellationToken token = default(CancellationToken))
public override async Task<WebNovelInfo> GetNovelInfoAsync(string baseUrl, CancellationToken token = default(CancellationToken))
{
string baseContent = await GetWebPageAsync(baseUrl, token);

IHtmlDocument doc = await Parser.ParseAsync(baseContent, token);

return doc.GetElementById("fiction-header").Descendents<IElement>().FirstOrDefault(p => p.LocalName == "img")?.GetAttribute("src");
var fictionHeaderDes = doc.GetElementById("fiction-header");
var coverUrl = fictionHeaderDes.Descendents<IElement>().FirstOrDefault(p => p.LocalName == "img")?.GetAttribute("src");
var title = fictionHeaderDes.QuerySelector("h1.fiction-title")?.TextContent;

return new WebNovelInfo()
{
CoverUrl = coverUrl,
Title = title
};
}

protected virtual void RemoveNavigation(IElement rootElement)
{
rootElement.Descendents<IElement>().LastOrDefault(p => p.LocalName == "table")?.Remove();
// Last 1-2 tables might be navigation

foreach(var table in rootElement.QuerySelectorAll("table").Reverse().Take(2))
{
if( table.QuerySelectorAll("a").Any(x => x.TextContent.Contains("Chapter"))) {
table.Remove();
}
}
}

protected virtual void RemoveDonation(IElement rootElement)
{
foreach (var el in rootElement.QuerySelectorAll("div.thead"))
{
if (el.TextContent.Contains("Donation for the Author"))
el.Remove();
}
}

/// <summary>
/// Expands spoilers in HTML for easy reading.
/// Expects:
/// <div class="spoiler_header">Spoilerxxx</div>
/// <div class="spoiler_body" style="display: none;">xxxx</div>
/// </summary>
/// <param name="rootElement"></param>
protected void ExpandSpoilers(IElement rootElement)
{
foreach(var el in rootElement.QuerySelectorAll(".spoiler_body"))
{
el.SetAttribute("style", string.Empty);
el.SetAttribute("class", string.Empty);

}

foreach (var el in rootElement.QuerySelectorAll(".spoiler_header"))
{
el.Remove();
}
}

private void RemoveEmpyTags(IElement rootElement)
{
foreach (var el in rootElement.QuerySelectorAll("div,span"))
{
if (string.IsNullOrWhiteSpace(el.TextContent) && el.ChildElementCount == 0)
{
el.Remove();
}
}
}

private string CleanupHTML(string html)
{
// Too many newlines sometimes
html = new Regex("(<br>\\s*){3,}").Replace(html, "<br /><br />");

return html.Trim();
}
}
}
9 changes: 7 additions & 2 deletions WebNovelConverter/Sources/WebNovelSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

namespace WebNovelConverter.Sources
{
public class WebNovelSource
public abstract class WebNovelSource
{
protected static readonly ChapterLink[] EmptyLinks = new ChapterLink[0];

Expand All @@ -38,7 +38,12 @@ public WebNovelSource(string sourceName)
throw new NotImplementedException();
}

public virtual Task<string> GetNovelCoverAsync(string baseUrl, CancellationToken token = default(CancellationToken))
public virtual Task<WebNovelInfo> GetNovelInfoAsync(string baseUrl, CancellationToken token = default(CancellationToken))
{
return Task.FromResult((WebNovelInfo)null);
}

public virtual Task<string> GetNovelTitleAsync(string baseUrl, CancellationToken token = default(CancellationToken))
{
return Task.FromResult(string.Empty);
}
Expand Down
5 changes: 0 additions & 5 deletions WebNovelConverter/Sources/WordPressSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -187,11 +187,6 @@ orderby o
return chapter;
}

public override Task<string> GetNovelCoverAsync(string baseUrl, CancellationToken token = new CancellationToken())
{
return Task.FromResult(string.Empty);
}

protected virtual IEnumerable<string> GetPagedChapterUrls(IElement rootElement)
{
var pagElements = rootElement.FirstWhereHasClass(PaginationClasses, e => e.LocalName == "div")
Expand Down
1 change: 1 addition & 0 deletions WebNovelConverter/WebNovelConverter.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
</Compile>
<Compile Include="Sources\BakaTsukiSource.cs" />
<Compile Include="Sources\Models\ChapterRetrievalOptions.cs" />
<Compile Include="Sources\Models\WebNovelInfo.cs" />
<Compile Include="Sources\NovelsNaoSource.cs" />
<Compile Include="Sources\NovelSourceCollection.cs" />
<Compile Include="Sources\RoyalRoadLSource.cs" />
Expand Down

0 comments on commit 5d66083

Please sign in to comment.