Skip to content

Commit

Permalink
Fix issue #560 Search crawler no longer works
Browse files Browse the repository at this point in the history
  • Loading branch information
thomas694 committed Aug 28, 2024
1 parent 46dce28 commit 42e3fed
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 18 deletions.
4 changes: 2 additions & 2 deletions src/TumblThree/SharedAssemblyInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@

[assembly: ComVisible(false)]
[assembly: NeutralResourcesLanguage("en-US", UltimateResourceFallbackLocation.MainAssembly)]
[assembly: AssemblyVersion("2.14.2.0")]
[assembly: AssemblyFileVersion("2.14.2.0")]
[assembly: AssemblyVersion("2.15.0.0")]
[assembly: AssemblyFileVersion("2.15.0.0")]
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
using System;
using System.Collections.Generic;
using Newtonsoft.Json.Linq;
using Newtonsoft.Json;

namespace TumblThree.Applications.Converter
{
// this a modified version of this SO-answer: https://stackoverflow.com/a/45505097/14072498
public class EmptyArrayOrDictionaryConverter : JsonConverter
{
public override bool CanConvert(Type objectType) => objectType.IsAssignableFrom(typeof(Dictionary<string, object>));

public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
{
var token = JToken.Load(reader);
switch (token.Type)
{
case JTokenType.Object:
return token.ToObject(objectType, serializer);

case JTokenType.Array:
if (!token.HasValues)
return Activator.CreateInstance(objectType);
else
throw new JsonSerializationException("Object or empty array expected");
default:
throw new JsonSerializationException("Object or empty array expected");
}
}

public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer) => serializer.Serialize(writer, value);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ namespace TumblThree.Applications.Crawler
public class TumblrSearchCrawler : AbstractTumblrCrawler, ICrawler, IDisposable
{
private static readonly Regex extractJsonFromSearch = new Regex("window\\['___INITIAL_STATE___'\\] = (.*);");
private static readonly Regex extractJsonFromSearch2 = new Regex("id=\"___INITIAL_STATE___\">\\s*?({.*})\\s*?</script>", RegexOptions.Singleline);
private static readonly Regex extractJsonFromSearch2 = new Regex(@"id=""___INITIAL_STATE___"">\s*?({.*})\s*?</script>", RegexOptions.Singleline);

private readonly IShellService shellService;
private readonly IDownloader downloader;
Expand Down Expand Up @@ -122,7 +122,27 @@ private async Task CrawlPageAsync()
dynamic result = JsonConvert.DeserializeObject<ExpandoObject>(json, new ExpandoObjectConverter());
string nextUrl = "";
string bearerToken = "";
if (!HasProperty(result.SearchRoute, "timelines"))
if (HasProperty(result.SearchRoute, "timelines"))
{
if (result.SearchRoute.timelines.post.meta.status != 200)
{
Logger.Error(Resources.ErrorDownloadingBlog, Blog.Name, (string)result.SearchRoute.timelines.post.meta.msg, (long)result.SearchRoute.timelines.post.meta.status);
shellService.ShowError(new Exception(), string.Format(Resources.ErrorDownloadingBlog, Blog.Name, (string)result.SearchRoute.timelines.post.meta.msg, (long)result.SearchRoute.timelines.post.meta.status));
return;
}
if (!HasProperty(result.SearchRoute.timelines.post.response.timeline, "links"))
{
Logger.Error(Resources.SearchTermNotFound, (string)result.SearchRoute.searchParams.searchTerm);
shellService.ShowError(new Exception(), Resources.SearchTermNotFound, (string)result.SearchRoute.searchParams.searchTerm);
return;
}

nextUrl = result.apiUrl + result.SearchRoute.timelines.post.response.timeline.links.next.href;
bearerToken = result.apiFetchStore.API_TOKEN;

DownloadPage(result.SearchRoute.timelines.post);
}
else if (HasProperty(result.SearchRoute, "searchApiResponse"))
{
if (result.SearchRoute.searchApiResponse.meta.status != 200)
{
Expand All @@ -144,23 +164,19 @@ private async Task CrawlPageAsync()
}
else
{
if (result.SearchRoute.timelines.post.meta.status != 200)
{
Logger.Error(Resources.ErrorDownloadingBlog, Blog.Name, (string)result.SearchRoute.timelines.post.meta.msg, (long)result.SearchRoute.timelines.post.meta.status);
shellService.ShowError(new Exception(), string.Format(Resources.ErrorDownloadingBlog, Blog.Name, (string)result.SearchRoute.timelines.post.meta.msg, (long)result.SearchRoute.timelines.post.meta.status));
return;
}
if (!HasProperty(result.SearchRoute.timelines.post.response.timeline, "links"))
DataModels.TumblrTaggedSearchJson.TagSearch result2 = JsonConvert.DeserializeObject<DataModels.TumblrTaggedSearchJson.TagSearch>(json);

if (string.Compare(result2.Queries.Queries.Where(x => x.QueryHash.Contains("searchTimeline-post")).First().State.Status, "success", true) != 0)
{
Logger.Error(Resources.SearchTermNotFound, (string)result.SearchRoute.searchParams.searchTerm);
shellService.ShowError(new Exception(), Resources.SearchTermNotFound, (string)result.SearchRoute.searchParams.searchTerm);
Logger.Error(Resources.ErrorDownloadingBlog, Blog.Name, result2.Queries.Queries.Where(x => x.QueryHash.Contains("searchTimeline-post")).First().State.Error, GetCollectionName(Blog));
shellService.ShowError(new Exception(), string.Format(Resources.ErrorDownloadingBlog, Blog.Name, result2.Queries.Queries.Where(x => x.QueryHash.Contains("searchTimeline-post")).First().State.Error, GetCollectionName(Blog)));
return;
}

nextUrl = result.apiUrl + result.SearchRoute.timelines.post.response.timeline.links.next.href;
nextUrl = result2.ApiUrl + result2.Queries.Queries.Where(x => x.QueryHash.Contains("searchTimeline-post")).First().State.Data.Pages.First().NextLink;
bearerToken = result.apiFetchStore.API_TOKEN;

DownloadPage(result.SearchRoute.timelines.post);
// DownloadPage(result.SearchRoute.searchApiResponse);
}
while (true)
{
Expand Down Expand Up @@ -195,6 +211,7 @@ private async Task CrawlPageAsync()
catch (Exception e)
{
Logger.Error("TumblrSearchCrawler.CrawlPageAsync: {0}", e);
ShellService.ShowError(e, "{0}: {1}", Blog.Name, e.Message);
}
finally
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
using System.Collections.Generic;
using System.Runtime.Serialization;
using Newtonsoft.Json;
using TumblThree.Applications.Converter;

namespace TumblThree.Applications.DataModels.TumblrTaggedSearchJson
{
Expand Down Expand Up @@ -64,6 +66,7 @@ public class TagSearch
public AdPlacementConfiguration AdPlacementConfiguration { get; set; }

[DataMember(Name = "privacy")]
[JsonConverter(typeof(EmptyArrayOrDictionaryConverter))]
public Privacy Privacy { get; set; }

[DataMember(Name = "endlessScrollingDisabled")]
Expand Down Expand Up @@ -147,6 +150,7 @@ public class Query
public class State
{
[DataMember(Name = "data", EmitDefaultValue = false)]
[JsonConverter(typeof(EmptyArrayOrDictionaryConverter))]
public DataType Data { get; set; }

[DataMember(Name = "dataUpdateCount", EmitDefaultValue = false)]
Expand Down Expand Up @@ -1829,6 +1833,4 @@ public class Links
[DataMember(Name = "next")]
public NextRequest Next { get; set; }
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ namespace TumblThree.Applications.Properties
public sealed class AppSettings : IExtensibleDataObject
{
[IgnoreDataMember]
public static readonly string USERAGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36";
public static readonly string USERAGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36";

[IgnoreDataMember]
[System.Diagnostics.CodeAnalysis.SuppressMessage("Naming", "CA1707:Identifiers should not contain underscores", Justification = "<Pending>")]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
<Compile Include="Controllers\ManagerController.cs" />
<Compile Include="Controllers\ModuleController.cs" />
<Compile Include="Controllers\QueueController.cs" />
<Compile Include="Converter\EmptyArrayOrDictionaryConverter.cs" />
<Compile Include="Converter\PropertyCopier.cs" />
<Compile Include="Converter\SingleOrArrayConverter.cs" />
<Compile Include="CookieParser.cs" />
Expand Down

0 comments on commit 42e3fed

Please sign in to comment.