diff --git a/src/TumblThree/SharedAssemblyInfo.cs b/src/TumblThree/SharedAssemblyInfo.cs index eccfbe84..2b99bc13 100644 --- a/src/TumblThree/SharedAssemblyInfo.cs +++ b/src/TumblThree/SharedAssemblyInfo.cs @@ -12,5 +12,5 @@ [assembly: ComVisible(false)] [assembly: NeutralResourcesLanguage("en-US", UltimateResourceFallbackLocation.MainAssembly)] -[assembly: AssemblyVersion("2.14.2.0")] -[assembly: AssemblyFileVersion("2.14.2.0")] +[assembly: AssemblyVersion("2.15.0.0")] +[assembly: AssemblyFileVersion("2.15.0.0")] diff --git a/src/TumblThree/TumblThree.Applications/Converter/EmptyArrayOrDictionaryConverter.cs b/src/TumblThree/TumblThree.Applications/Converter/EmptyArrayOrDictionaryConverter.cs new file mode 100644 index 00000000..0e056ff6 --- /dev/null +++ b/src/TumblThree/TumblThree.Applications/Converter/EmptyArrayOrDictionaryConverter.cs @@ -0,0 +1,33 @@ +using System; +using System.Collections.Generic; +using Newtonsoft.Json.Linq; +using Newtonsoft.Json; + +namespace TumblThree.Applications.Converter +{ + // this a modified version of this SO-answer: https://stackoverflow.com/a/45505097/14072498 + public class EmptyArrayOrDictionaryConverter : JsonConverter + { + public override bool CanConvert(Type objectType) => objectType.IsAssignableFrom(typeof(Dictionary)); + + public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) + { + var token = JToken.Load(reader); + switch (token.Type) + { + case JTokenType.Object: + return token.ToObject(objectType, serializer); + + case JTokenType.Array: + if (!token.HasValues) + return Activator.CreateInstance(objectType); + else + throw new JsonSerializationException("Object or empty array expected"); + default: + throw new JsonSerializationException("Object or empty array expected"); + } + } + + public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer) => serializer.Serialize(writer, value); + } +} diff --git a/src/TumblThree/TumblThree.Applications/Crawler/TumblrSearchCrawler.cs b/src/TumblThree/TumblThree.Applications/Crawler/TumblrSearchCrawler.cs index 8d77f3dc..7bb65a58 100644 --- a/src/TumblThree/TumblThree.Applications/Crawler/TumblrSearchCrawler.cs +++ b/src/TumblThree/TumblThree.Applications/Crawler/TumblrSearchCrawler.cs @@ -31,7 +31,7 @@ namespace TumblThree.Applications.Crawler public class TumblrSearchCrawler : AbstractTumblrCrawler, ICrawler, IDisposable { private static readonly Regex extractJsonFromSearch = new Regex("window\\['___INITIAL_STATE___'\\] = (.*);"); - private static readonly Regex extractJsonFromSearch2 = new Regex("id=\"___INITIAL_STATE___\">\\s*?({.*})\\s*?", RegexOptions.Singleline); + private static readonly Regex extractJsonFromSearch2 = new Regex(@"id=""___INITIAL_STATE___"">\s*?({.*})\s*?", RegexOptions.Singleline); private readonly IShellService shellService; private readonly IDownloader downloader; @@ -122,7 +122,27 @@ private async Task CrawlPageAsync() dynamic result = JsonConvert.DeserializeObject(json, new ExpandoObjectConverter()); string nextUrl = ""; string bearerToken = ""; - if (!HasProperty(result.SearchRoute, "timelines")) + if (HasProperty(result.SearchRoute, "timelines")) + { + if (result.SearchRoute.timelines.post.meta.status != 200) + { + Logger.Error(Resources.ErrorDownloadingBlog, Blog.Name, (string)result.SearchRoute.timelines.post.meta.msg, (long)result.SearchRoute.timelines.post.meta.status); + shellService.ShowError(new Exception(), string.Format(Resources.ErrorDownloadingBlog, Blog.Name, (string)result.SearchRoute.timelines.post.meta.msg, (long)result.SearchRoute.timelines.post.meta.status)); + return; + } + if (!HasProperty(result.SearchRoute.timelines.post.response.timeline, "links")) + { + Logger.Error(Resources.SearchTermNotFound, (string)result.SearchRoute.searchParams.searchTerm); + shellService.ShowError(new Exception(), Resources.SearchTermNotFound, (string)result.SearchRoute.searchParams.searchTerm); + return; + } + + nextUrl = result.apiUrl + result.SearchRoute.timelines.post.response.timeline.links.next.href; + bearerToken = result.apiFetchStore.API_TOKEN; + + DownloadPage(result.SearchRoute.timelines.post); + } + else if (HasProperty(result.SearchRoute, "searchApiResponse")) { if (result.SearchRoute.searchApiResponse.meta.status != 200) { @@ -144,23 +164,19 @@ private async Task CrawlPageAsync() } else { - if (result.SearchRoute.timelines.post.meta.status != 200) - { - Logger.Error(Resources.ErrorDownloadingBlog, Blog.Name, (string)result.SearchRoute.timelines.post.meta.msg, (long)result.SearchRoute.timelines.post.meta.status); - shellService.ShowError(new Exception(), string.Format(Resources.ErrorDownloadingBlog, Blog.Name, (string)result.SearchRoute.timelines.post.meta.msg, (long)result.SearchRoute.timelines.post.meta.status)); - return; - } - if (!HasProperty(result.SearchRoute.timelines.post.response.timeline, "links")) + DataModels.TumblrTaggedSearchJson.TagSearch result2 = JsonConvert.DeserializeObject(json); + + if (string.Compare(result2.Queries.Queries.Where(x => x.QueryHash.Contains("searchTimeline-post")).First().State.Status, "success", true) != 0) { - Logger.Error(Resources.SearchTermNotFound, (string)result.SearchRoute.searchParams.searchTerm); - shellService.ShowError(new Exception(), Resources.SearchTermNotFound, (string)result.SearchRoute.searchParams.searchTerm); + Logger.Error(Resources.ErrorDownloadingBlog, Blog.Name, result2.Queries.Queries.Where(x => x.QueryHash.Contains("searchTimeline-post")).First().State.Error, GetCollectionName(Blog)); + shellService.ShowError(new Exception(), string.Format(Resources.ErrorDownloadingBlog, Blog.Name, result2.Queries.Queries.Where(x => x.QueryHash.Contains("searchTimeline-post")).First().State.Error, GetCollectionName(Blog))); return; } - nextUrl = result.apiUrl + result.SearchRoute.timelines.post.response.timeline.links.next.href; + nextUrl = result2.ApiUrl + result2.Queries.Queries.Where(x => x.QueryHash.Contains("searchTimeline-post")).First().State.Data.Pages.First().NextLink; bearerToken = result.apiFetchStore.API_TOKEN; - DownloadPage(result.SearchRoute.timelines.post); + // DownloadPage(result.SearchRoute.searchApiResponse); } while (true) { @@ -195,6 +211,7 @@ private async Task CrawlPageAsync() catch (Exception e) { Logger.Error("TumblrSearchCrawler.CrawlPageAsync: {0}", e); + ShellService.ShowError(e, "{0}: {1}", Blog.Name, e.Message); } finally { diff --git a/src/TumblThree/TumblThree.Applications/DataModels/TumblrTaggedSearch/TumblrTaggedSearchJson.cs b/src/TumblThree/TumblThree.Applications/DataModels/TumblrTaggedSearch/TumblrTaggedSearchJson.cs index 3b860c0e..5e0ddf19 100644 --- a/src/TumblThree/TumblThree.Applications/DataModels/TumblrTaggedSearch/TumblrTaggedSearchJson.cs +++ b/src/TumblThree/TumblThree.Applications/DataModels/TumblrTaggedSearch/TumblrTaggedSearchJson.cs @@ -1,5 +1,7 @@ using System.Collections.Generic; using System.Runtime.Serialization; +using Newtonsoft.Json; +using TumblThree.Applications.Converter; namespace TumblThree.Applications.DataModels.TumblrTaggedSearchJson { @@ -64,6 +66,7 @@ public class TagSearch public AdPlacementConfiguration AdPlacementConfiguration { get; set; } [DataMember(Name = "privacy")] + [JsonConverter(typeof(EmptyArrayOrDictionaryConverter))] public Privacy Privacy { get; set; } [DataMember(Name = "endlessScrollingDisabled")] @@ -147,6 +150,7 @@ public class Query public class State { [DataMember(Name = "data", EmitDefaultValue = false)] + [JsonConverter(typeof(EmptyArrayOrDictionaryConverter))] public DataType Data { get; set; } [DataMember(Name = "dataUpdateCount", EmitDefaultValue = false)] @@ -1829,6 +1833,4 @@ public class Links [DataMember(Name = "next")] public NextRequest Next { get; set; } } - - } diff --git a/src/TumblThree/TumblThree.Applications/Properties/AppSettings.cs b/src/TumblThree/TumblThree.Applications/Properties/AppSettings.cs index 02e1503d..9e30f394 100644 --- a/src/TumblThree/TumblThree.Applications/Properties/AppSettings.cs +++ b/src/TumblThree/TumblThree.Applications/Properties/AppSettings.cs @@ -16,7 +16,7 @@ namespace TumblThree.Applications.Properties public sealed class AppSettings : IExtensibleDataObject { [IgnoreDataMember] - public static readonly string USERAGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"; + public static readonly string USERAGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"; [IgnoreDataMember] [System.Diagnostics.CodeAnalysis.SuppressMessage("Naming", "CA1707:Identifiers should not contain underscores", Justification = "")] diff --git a/src/TumblThree/TumblThree.Applications/TumblThree.Applications.csproj b/src/TumblThree/TumblThree.Applications/TumblThree.Applications.csproj index ded6d85f..928f4b24 100644 --- a/src/TumblThree/TumblThree.Applications/TumblThree.Applications.csproj +++ b/src/TumblThree/TumblThree.Applications/TumblThree.Applications.csproj @@ -119,6 +119,7 @@ +