From 76bf5d93b6c3640ce31e1ac8568f7e983c922027 Mon Sep 17 00:00:00 2001 From: EternalClickbait <48875125+EternalClickbait@users.noreply.github.com> Date: Tue, 29 Oct 2019 18:48:04 +1000 Subject: [PATCH] Development (#2) Merge development into Stable for V2.0.0 --- .gitignore | 1 + README.md | 40 +++++++++------ Wiki.Net.Example/Example.cs | 15 +++--- Wiki.Net.sln.DotSettings | 1 + Wiki.Net/Error.cs | 35 +++++++++++++ Wiki.Net/Inheritance.md | 16 ++++++ Wiki.Net/SearchInfo.cs | 21 ++++++++ Wiki.Net/Warning.cs | 31 ++++++++++++ Wiki.Net/Wiki.Net.csproj | 12 +++++ Wiki.Net/Wiki.Net.csproj.DotSettings | 2 + Wiki.Net/WikiSearchQuery.cs | 26 ++++++++++ Wiki.Net/WikiSearchResponse.cs | 62 ++++++++++++++++------- Wiki.Net/WikiSearchResult.cs | 23 +++++++-- Wiki.Net/WikiSearchSettings.cs | 65 +++++++++++++++++++++++++ Wiki.Net/WikiSearcher.cs | 73 +++++++++++++++++++--------- 15 files changed, 356 insertions(+), 67 deletions(-) create mode 100644 Wiki.Net/Error.cs create mode 100644 Wiki.Net/Inheritance.md create mode 100644 Wiki.Net/SearchInfo.cs create mode 100644 Wiki.Net/Warning.cs create mode 100644 Wiki.Net/Wiki.Net.csproj.DotSettings create mode 100644 Wiki.Net/WikiSearchQuery.cs create mode 100644 Wiki.Net/WikiSearchSettings.cs diff --git a/.gitignore b/.gitignore index 3e759b7..f306d43 100644 --- a/.gitignore +++ b/.gitignore @@ -328,3 +328,4 @@ ASALocalRun/ # MFractors (Xamarin productivity tool) working folder .mfractor/ +*.cs___jb_tmp___ diff --git a/README.md b/README.md index 38dcb32..2c848f2 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,19 @@ # Wiki.Net [![License](https://img.shields.io/github/license/Creepysin-Studios/Wiki.Net)](/LICENSE) [![Requirements Status](https://requires.io/github/Creepysin-Studios/Wiki.Net/requirements.svg?branch=Stable)](https://requires.io/github/Creepysin-Studios/Wiki.Net/requirements/?branch=Stable) [![NuGet](https://img.shields.io/nuget/v/Wiki.Net)](https://www.nuget.org/packages/Wiki.Net/) -[![Nuget](https://img.shields.io/nuget/dt/Wiki.Net)](https://www.nuget.org/packages/Wiki.Net/) [![Discord](https://img.shields.io/badge/Discord-Creepysin-7289da.svg?logo=discord)](https://discord.creepysin.com) Wiki.Net – An unofficial C# Wikipedia API ## Features -Searches Wikipedia (duh!) and returns: +Searches Wikipedia (duh!) and returns (per result): +* Title * Page ID -* Titles * Word Count * Size (bytes?) * Text Preview * URL of page +* Time of last edit ## Getting Started @@ -29,16 +29,18 @@ You can also download the binaries from the [releases](https://github.com/Creepy ### Example -```csharp +```c# string searchString = “Computer”; +WikiSearchSettings searchSettings = new WikiSearchSettings + {RequestId = "Request ID", ResultLimit = 5, ResultOffset = 2}; -WikiSearchResponse response = WikiSearcher.Search(searchString); +WikiSearchResponse response = WikiSearcher.Search(searchString, searchSettings); Console.WriteLine($"\nResults found ({searchString}):\n"); -for (int i = 0; i < response.SearchResults.Length; i++) +foreach (WikiSearchResult result in response.Query.SearchResults) { - WikiSearchResult result = response.SearchResults[i]; - Console.WriteLine($"\t{result.Title} ({result.WordCount} words, {result.Size} bytes, id {result.PageId}):\t{result.Preview}...\n\tAt {result.Url}\n\tLast edited at {result.LastEdited}\n"); + Console.WriteLine( + $"\t{result.Title} ({result.WordCount} words, {result.Size} bytes, id {result.PageId}):\t{result.Preview}...\n\tAt {result.Url}\n\tLast edited at {result.LastEdited}\n"); } Console.ReadLine(); @@ -46,20 +48,26 @@ Console.ReadLine(); **Output** ``` -Computer (12154 words) -A computer is a machine that can be instructed to carry out sequences of arithmetic or logical operations automatically via computer programming. Modern... -https://en.wikipedia.org/?curid=7878457 +Results found (Computer): -Computer science (7267 words) -Computer science (sometimes called computation science or computing science, but not to be confused with computational science or software engineering)... -https://en.wikipedia.org/?curid=5323 + Information technology (2836 words, 27146 bytes, id 36674345): Information technology (IT) is the use of computers to store, retrieve, transmit, and manipulate data, or information, often in the context of a business... + At https://en.wikipedia.org/?curid=36674345 + Last edited at 24/10/2019 11:53:39 AM -*More results* + Computer graphics (computer science) (1632 words, 18720 bytes, id 18567168): Computer graphics is a sub-field of Computer Science which studies methods for digitally synthesizing and manipulating visual content. Although the term... + At https://en.wikipedia.org/?curid=18567168 + Last edited at 17/09/2019 12:21:21 AM + + Computer hardware (2479 words, 22776 bytes, id 21808348): Computer hardware includes the physical, tangible parts or components of a computer, such as the cabinet, central processing unit, monitor, keyboard,... + At https://en.wikipedia.org/?curid=21808348 + Last edited at 16/10/2019 4:00:29 PM + + *More results* ``` ## Authors -**EternalClickbait** - *Initial work* - [EternalClickbait]( https://github.com/EternalClickbait) +**EternalClickbait** - *Initial work* - [EternalClickbait](https://github.com/EternalClickbait) ## License diff --git a/Wiki.Net.Example/Example.cs b/Wiki.Net.Example/Example.cs index af4652a..75ea59b 100644 --- a/Wiki.Net.Example/Example.cs +++ b/Wiki.Net.Example/Example.cs @@ -40,6 +40,8 @@ private static void Main() #region Loop until the user exits + WikiSearchSettings searchSettings = new WikiSearchSettings + {RequestId = "Request ID", ResultLimit = 5, ResultOffset = 2}; Request: //Get a search from the user, or exit string req = AskUserString("Enter a search query, 'exit' or 'quit' to quit"); @@ -53,7 +55,7 @@ private static void Main() } Console.Clear(); - PrintResults(req); + PrintResults(req, searchSettings); //Wait until the user presses enter to search again Console.WriteLine("Press any key to search again"); Console.ReadKey(true); @@ -62,17 +64,14 @@ private static void Main() #endregion } - private static void PrintResults(string searchString) + private static void PrintResults(string searchString, WikiSearchSettings searchSettings = null) { - WikiSearchResponse response = WikiSearcher.Search(searchString); + WikiSearchResponse response = WikiSearcher.Search(searchString, searchSettings); Console.WriteLine($"\nResults found ({searchString}):\n"); - for (int i = 0; i < response.SearchResults.Length; i++) - { - WikiSearchResult result = response.SearchResults[i]; + foreach (WikiSearchResult result in response.Query.SearchResults) Console.WriteLine( - $"\t{result.Title} ({result.WordCount} words, {result.Size} bytes, id {result.PageId}):\t{result.Preview}...\n\tAt {result.Url}\n\tLast edited at {result.LastEdited}\n"); - } + $"\t{result.Title} ({result.WordCount} words, {result.Size} bytes, id {result.PageId}):\t{result.Preview}...\n\tAt {result.Url} and {result.ConstantUrl}\n\tLast edited at {result.LastEdited}\n"); } private static string AskUserString(string message, bool clearConsole = true) diff --git a/Wiki.Net.sln.DotSettings b/Wiki.Net.sln.DotSettings index bfe8695..e98a52a 100644 --- a/Wiki.Net.sln.DotSettings +++ b/Wiki.Net.sln.DotSettings @@ -1,3 +1,4 @@  + True True True \ No newline at end of file diff --git a/Wiki.Net/Error.cs b/Wiki.Net/Error.cs new file mode 100644 index 0000000..ea8ce59 --- /dev/null +++ b/Wiki.Net/Error.cs @@ -0,0 +1,35 @@ +using Newtonsoft.Json; + +namespace CreepysinStudios.WikiDotNet +{ + /// + /// A class that represents a Wikipedia API error + /// + // ReSharper disable once ClassCannotBeInstantiated + public sealed class Error + { + /// + /// What error code does this this error correspond to + /// + [JsonProperty("code")] public readonly string Code; + + /// + /// Any extra information the assist with debugging + /// + [JsonProperty("data")] public readonly string Data; + + /// + /// What Wikipedia module gave this error + /// + [JsonProperty("module")] public readonly string Module; + + /// + /// Information about this error + /// + [JsonProperty("*")] public readonly string Text; + + private Error() + { + } + } +} \ No newline at end of file diff --git a/Wiki.Net/Inheritance.md b/Wiki.Net/Inheritance.md new file mode 100644 index 0000000..f6edf83 --- /dev/null +++ b/Wiki.Net/Inheritance.md @@ -0,0 +1,16 @@ +# Class Inheritance + +### WikiSearchResponse +* #### Query + * ##### Search Results (Array) + * ###### Ns (Namespace?) + * ###### Last edited + * ###### Page ID + * ###### Preview + * ###### Size + * ###### Title + * ###### Word Count + * ###### Url + * ##### SearchInfo + * ###### TotalHits +* #### Request ID \ No newline at end of file diff --git a/Wiki.Net/SearchInfo.cs b/Wiki.Net/SearchInfo.cs new file mode 100644 index 0000000..98a04ba --- /dev/null +++ b/Wiki.Net/SearchInfo.cs @@ -0,0 +1,21 @@ +using Newtonsoft.Json; + +namespace CreepysinStudios.WikiDotNet +{ + /// + /// A class that contains information about a Wikipedia search. Currently only contains an int for the total number of + /// results. + /// + public class SearchInfo + { + /// + /// How many hits did the search return (in total, including those not shown) + /// + // ReSharper disable once StringLiteralTypo + [JsonProperty("totalhits")] public int TotalHits; + + private SearchInfo() + { + } + } +} \ No newline at end of file diff --git a/Wiki.Net/Warning.cs b/Wiki.Net/Warning.cs new file mode 100644 index 0000000..c6061a0 --- /dev/null +++ b/Wiki.Net/Warning.cs @@ -0,0 +1,31 @@ +using Newtonsoft.Json; + +namespace CreepysinStudios.WikiDotNet +{ + /// + /// A class that represents a Wikipedia API warning. Often returned when invalid parameters/arguments are passed to the + /// Wikipedia API + /// + // ReSharper disable once ClassCannotBeInstantiated + public sealed class Warning + { + /// + /// What warning code does this this warning correspond to + /// + [JsonProperty("code")] public readonly string Code; + + /// + /// What Wikipedia module gave this warning + /// + [JsonProperty("module")] public readonly string Module; + + /// + /// Information about this warning + /// + [JsonProperty("*")] public readonly string Text; + + private Warning() + { + } + } +} \ No newline at end of file diff --git a/Wiki.Net/Wiki.Net.csproj b/Wiki.Net/Wiki.Net.csproj index 98eb1f1..f6960cb 100644 --- a/Wiki.Net/Wiki.Net.csproj +++ b/Wiki.Net/Wiki.Net.csproj @@ -24,4 +24,16 @@ + + + .gitignore + + + LICENSE + + + README.md + + + diff --git a/Wiki.Net/Wiki.Net.csproj.DotSettings b/Wiki.Net/Wiki.Net.csproj.DotSettings new file mode 100644 index 0000000..6ce52d4 --- /dev/null +++ b/Wiki.Net/Wiki.Net.csproj.DotSettings @@ -0,0 +1,2 @@ + + True \ No newline at end of file diff --git a/Wiki.Net/WikiSearchQuery.cs b/Wiki.Net/WikiSearchQuery.cs new file mode 100644 index 0000000..6679a6a --- /dev/null +++ b/Wiki.Net/WikiSearchQuery.cs @@ -0,0 +1,26 @@ +using Newtonsoft.Json; + +namespace CreepysinStudios.WikiDotNet +{ + /// + /// Contains an array of s and a + /// + // ReSharper disable once ClassCannotBeInstantiated + public sealed class WikiSearchQuery + { + /// + /// A read-only field that contains information such as the total amount of hits the search returned + /// + // ReSharper disable once StringLiteralTypo + [JsonProperty("searchinfo")] public readonly SearchInfo SearchInfo; + + /// + /// An array of results returned from the wikipedia servers + /// + [JsonProperty("search")] public readonly WikiSearchResult[] SearchResults; + + private WikiSearchQuery() + { + } + } +} \ No newline at end of file diff --git a/Wiki.Net/WikiSearchResponse.cs b/Wiki.Net/WikiSearchResponse.cs index dd2bc0a..debf933 100644 --- a/Wiki.Net/WikiSearchResponse.cs +++ b/Wiki.Net/WikiSearchResponse.cs @@ -1,44 +1,72 @@ #region using System; -using System.Net.Http; +using Newtonsoft.Json; #endregion namespace CreepysinStudios.WikiDotNet { /// - /// A class that contains an array of , returned from the Wikipedia servers + /// An object returned by the Wikipedia API that contains a and /// + //TODO: Add Error and warning class in case + // ReSharper disable once ClassCannotBeInstantiated public sealed class WikiSearchResponse { /// - /// The Json string from which the results were taken + /// Any errors returned with the request, or if there weren't any /// - public readonly string JsonResult; + [JsonProperty("errors")] public readonly Error[] Errors; /// - /// The response message from which the and are parsed + /// The Query that the search returned /// - public readonly HttpResponseMessage ResponseMessage; + [JsonProperty("query")] public readonly WikiSearchQuery Query; /// - /// An array of results returned from the wikipedia servers + /// The Request ID that was passed during the request /// - public readonly WikiSearchResult[] SearchResults; + // ReSharper disable once StringLiteralTypo + [JsonProperty("requestid")] public readonly string RequestId; /// - /// A constructor that creates a new + /// The Wikipedia server that this request was served by /// - /// The Json string used to parse the search results - /// The that was returned from the server - /// An array of parsed search results - internal WikiSearchResponse(string jsonResult, - HttpResponseMessage responseMessage, WikiSearchResult[] searchResults) + // ReSharper disable once StringLiteralTypo + [JsonProperty("servedby")] public readonly string ServedBy; + + /// + /// The time at which the Wikipedia server received the search request + /// + // ReSharper disable once StringLiteralTypo + [JsonProperty("curtimestamp")] public readonly DateTime Timestamp; + + /// + /// Any warnings returned with the request, or if there weren't any + /// + [JsonProperty("warnings")] public readonly Warning[] Warnings; + + private WikiSearchResponse() { - JsonResult = jsonResult ?? throw new ArgumentNullException(nameof(jsonResult)); - SearchResults = searchResults ?? throw new ArgumentNullException(nameof(searchResults)); - ResponseMessage = responseMessage ?? throw new ArgumentNullException(nameof(responseMessage)); + } + + /// + /// Was this request successful, or were there errors? + /// + public bool WasSuccessful + { + get + { + //If our errors and warnings arrays are null, we know this request was successful + if (Errors == null && Warnings == null) return true; + + //If our arrays aren't null and their length is not zero, return false + if (Warnings != null && Warnings.Length != 0) return false; + if (Errors != null && Errors.Length != 0) return false; + + return true; + } } } } \ No newline at end of file diff --git a/Wiki.Net/WikiSearchResult.cs b/Wiki.Net/WikiSearchResult.cs index f9e786f..6136846 100644 --- a/Wiki.Net/WikiSearchResult.cs +++ b/Wiki.Net/WikiSearchResult.cs @@ -10,8 +10,7 @@ namespace CreepysinStudios.WikiDotNet /// /// A single search result from a Wikipedia search /// - - //TODO: Add what categories the article falls into + // ReSharper disable once ClassCannotBeInstantiated public sealed class WikiSearchResult { /// @@ -19,6 +18,11 @@ public sealed class WikiSearchResult /// [JsonProperty("timestamp")] public readonly DateTime LastEdited; + /// + /// Unknown what this number refers to, likely refers to 'namespace' + /// + [JsonProperty("ns")] public readonly int Ns; + /// /// The numerical ID that corresponds internally (in Wikipedia's servers) to this page /// @@ -47,9 +51,20 @@ public sealed class WikiSearchResult // ReSharper disable once StringLiteralTypo [JsonProperty("wordcount")] public readonly int WordCount; + private WikiSearchResult() + { + } + + /// + /// A URL that can be used to access the article online. Created using the Page ID, and will point to the same article + /// even if the title changes + /// + public string ConstantUrl => $"https://en.wikipedia.org/?curid={PageId}"; + /// - /// The URL that can be used to access the article online. Created using the Page ID + /// A URL that can be used to access the article. If the page gets renamed or moved, this will likely break, and point + /// to a different or non-existent page /// - public string Url => $"https://en.wikipedia.org/?curid={PageId}"; + public string Url => $"https://en.wikipedia.org/wiki/{Title}"; } } \ No newline at end of file diff --git a/Wiki.Net/WikiSearchSettings.cs b/Wiki.Net/WikiSearchSettings.cs new file mode 100644 index 0000000..37370ae --- /dev/null +++ b/Wiki.Net/WikiSearchSettings.cs @@ -0,0 +1,65 @@ +#region + +using System; +using System.Collections.Generic; + +#endregion + +namespace CreepysinStudios.WikiDotNet +{ + /// + /// A struct containing settings for use when searching with . + /// + /// + public sealed class WikiSearchSettings + { + /// + /// What namespaces to search in. Default is {0} (default) + /// + // ReSharper disable once FieldCanBeMadeReadOnly.Global + public List Namespaces = null; + + /// + /// [Backing Field] How many results to return + /// + private int resultLimit = 10; + + /// + /// How many results to return + /// + /// Occurs when the given value is too high or low + public int ResultLimit + { + get => resultLimit; + set + { + const int min = 1; + const int max = 50; + if (value < min || value > max) + throw new ArgumentOutOfRangeException(nameof(value), + $"Value {value} is out of range. Valid range is {min}-{max}"); + resultLimit = value; + } + } + + /// + /// An amount to offset the search results by. Useful when scrolling through large groups of pages + /// + public int ResultOffset { get; set; } + + /// + /// A string that will be returned with the request results. Useful to distinguish multiple requests + /// + public string RequestId { get; set; } + + // ReSharper disable once CommentTypo + /// + /// Should we only find results that exactly match our search + /// Example: + /// 'Microsoft' results in 'Microsoft' + /// 'Microsof' results in 'no results' + /// + // ReSharper disable once AutoPropertyCanBeMadeGetOnly.Global + public bool ExactMatch { get; set; } = false; + } +} \ No newline at end of file diff --git a/Wiki.Net/WikiSearcher.cs b/Wiki.Net/WikiSearcher.cs index d84713a..f7fdaa2 100644 --- a/Wiki.Net/WikiSearcher.cs +++ b/Wiki.Net/WikiSearcher.cs @@ -6,7 +6,6 @@ using System.Net.Http; using System.Text.RegularExpressions; using Newtonsoft.Json; -using Newtonsoft.Json.Linq; #endregion @@ -17,12 +16,6 @@ namespace CreepysinStudios.WikiDotNet /// public static class WikiSearcher { - /// - /// The path we use to get results from - /// - private const string WikiGetPath = "https://en.wikipedia.org/w/api.php"; - - //Our HttpClient and handler that we use to request our information /// /// The that we use to request our information /// @@ -38,6 +31,11 @@ public static class WikiSearcher /// private static readonly JsonSerializerSettings JsonSerializerSettings = new JsonSerializerSettings(); + /// + /// The path we use to get results from + /// + private static string WikiGetPath => $"{(UseHttps ? "https://" : "http://")}en.wikipedia.org/w/api.php"; + /// /// An optional proxy to route HTTP requests through when searching /// @@ -48,34 +46,67 @@ public static IWebProxy Proxy set => Handler.Proxy = value; } + /// + /// If we should use HTTPS for web requests or HTTP + /// + // ReSharper disable once MemberCanBePrivate.Global + // ReSharper disable once AutoPropertyCanBeMadeGetOnly.Global + public static bool UseHttps { get; set; } = true; + /// /// Searches Wikipedia using the given /// /// The string to search for + /// An optional set of settings to + /// /// A list of search results obtained from the Wikipedia API - public static WikiSearchResponse Search(string searchString) + public static WikiSearchResponse Search(string searchString, WikiSearchSettings searchSettings = null) { if (string.IsNullOrWhiteSpace(searchString)) throw new ArgumentNullException(nameof(searchString), "A search string must be provided"); //Encode our values to be passed to the server string url; - using (FormUrlEncodedContent content = new FormUrlEncodedContent(new[] + Dictionary args = new Dictionary { // ReSharper disable StringLiteralTypo - //Get results in Json - new KeyValuePair("format", "json"), //Query the Wiki API - new KeyValuePair("action", "query"), - //Give errors in plain text - new KeyValuePair("errorformat", "plaintext"), + ["action"] = "query", + ["list"] = "search", //Our search params - new KeyValuePair("list", "search"), - new KeyValuePair("srsearch", searchString) + ["srsearch"] = searchString, + //Get results in Json + ["format"] = "json", + //Give errors in plain text + ["errorformat"] = "plaintext" // ReSharper restore StringLiteralTypo - })) + }; + + if (searchSettings != null) + { + // ReSharper disable StringLiteralTypo + + //Limit our results, and offset if required + args.Add("srlimit", searchSettings.ResultLimit.ToString()); + args.Add("sroffset", searchSettings.ResultOffset.ToString()); + //If the namespaces list is null use "*" which means all of them + args.Add("srnamespace", + searchSettings.Namespaces == null ? "*" : string.Join('|', searchSettings.Namespaces)); + //If we should search for the exact string + args.Add("srwhat", searchSettings.ExactMatch ? "nearmatch" : "text"); + //Get which server we were served by + args.Add("servedby", "true"); + //Request the current timestamp be included + args.Add("curtimestamp", "true"); + if (searchSettings.RequestId != null) + args.Add("requestid", searchSettings.RequestId); + + // ReSharper restore StringLiteralTypo + } + + using (FormUrlEncodedContent content = new FormUrlEncodedContent(args)) { url = $"{WikiGetPath}?{content.ReadAsStringAsync().Result}"; } @@ -85,10 +116,8 @@ public static WikiSearchResponse Search(string searchString) string jsonResult = responseMessage.Content.ReadAsStringAsync().Result; jsonResult = StripTags(jsonResult); - WikiSearchResponse searchResponse = new WikiSearchResponse(jsonResult, responseMessage, - //We don't want to keep all of the extra information from our search, so we do some json magic to get the inner property - JsonConvert.DeserializeObject(jsonResult, JsonSerializerSettings).GetValue("query") - .ToObject().GetValue("search").ToObject()); + WikiSearchResponse searchResponse = + JsonConvert.DeserializeObject(jsonResult, JsonSerializerSettings); return searchResponse; } @@ -102,7 +131,7 @@ private static string StripTags(string source) { //We need to replace any quotes before they get processed by the HTML decoder, or they don't get escaped and deal havoc with the Json string unquoted = source.Replace(""", "\\\""); - //Decode html entity codes like `"` into their unicode counterparts (e.g. `"` => `"`) + //Decode html entity codes like `<` into their unicode counterparts (e.g. `<` => `<`) string decoded = WebUtility.HtmlDecode(unquoted); //Remove html formatting tags like ,
etc. return Regex.Replace(decoded, "<.*?>", string.Empty);