diff --git a/FOCA/FOCA.csproj b/FOCA/FOCA.csproj
index 4dc8b51..feaba46 100644
--- a/FOCA/FOCA.csproj
+++ b/FOCA/FOCA.csproj
@@ -78,6 +78,9 @@
4
false
+
+ true
+
..\packages\com.rusanu.dataconnectiondialog.1.0.0.1\lib\net20\com.rusanu.dataconnectiondialog.dll
@@ -118,11 +121,25 @@
3.5
-
+
+ ..\packages\System.Net.Http.4.3.4\lib\net46\System.Net.Http.dll
+
3.0
+
+ ..\packages\System.Security.Cryptography.Algorithms.4.3.0\lib\net461\System.Security.Cryptography.Algorithms.dll
+
+
+ ..\packages\System.Security.Cryptography.Encoding.4.3.0\lib\net46\System.Security.Cryptography.Encoding.dll
+
+
+ ..\packages\System.Security.Cryptography.Primitives.4.3.0\lib\net46\System.Security.Cryptography.Primitives.dll
+
+
+ ..\packages\System.Security.Cryptography.X509Certificates.4.3.0\lib\net461\System.Security.Cryptography.X509Certificates.dll
+
3.0
diff --git a/FOCA/PanelDNSSearch.cs b/FOCA/PanelDNSSearch.cs
index 282cacf..89878b4 100644
--- a/FOCA/PanelDNSSearch.cs
+++ b/FOCA/PanelDNSSearch.cs
@@ -5,7 +5,6 @@
using Heijden.DNS;
using MetadataExtractCore;
using MetadataExtractCore.Diagrams;
-using SearcherCore.Searcher.BingAPI;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
@@ -472,8 +471,7 @@ private void GoogleApiSearch()
try
{
- var gr = (GoogleAPISearcher.GoogleAPIResults)item;
- var url = new Uri(gr.Url);
+ var url = new Uri((string)item);
var strHost = url.Host;
if (
currentResults.All(
@@ -687,8 +685,7 @@ private void BingApiSearch()
try
{
- var br = (BingApiResult)item;
- var url = new Uri(br.Url);
+ var url = new Uri((string)item);
var strHost = url.Host;
if (
currentResults.All(
@@ -905,8 +902,7 @@ private void SerchLinkApiBingEvent(string ip, BingAPISearcher bingSearcherApi, L
loopState.Stop();
try
{
- var br = (BingApiResult)item;
- var url = new Uri(br.Url);
+ var url = new Uri((string)item);
if (
currentResults.Any(d => string.Equals(d, url.Host, StringComparison.CurrentCultureIgnoreCase)))
return;
diff --git a/FOCA/PanelMetadataSearch.cs b/FOCA/PanelMetadataSearch.cs
index 0d377a7..c82034e 100644
--- a/FOCA/PanelMetadataSearch.cs
+++ b/FOCA/PanelMetadataSearch.cs
@@ -2385,10 +2385,35 @@ private void CustomSearch(object parameter)
var searchString = parameter as string;
try
{
+
if (chkGoogle.Checked)
- CustomSearchEventsGeneric(new GoogleWebSearcher(), searchString);
+ {
+ if (String.IsNullOrWhiteSpace(Program.cfgCurrent.GoogleApiKey) || String.IsNullOrWhiteSpace(Program.cfgCurrent.GoogleApiCx))
+ {
+ CustomSearchEventsGeneric(new GoogleWebSearcher(), searchString);
+ }
+ else
+ {
+ CustomSearchEventsGeneric(new GoogleAPISearcher
+ {
+ GoogleApiKey = Program.cfgCurrent.GoogleApiKey,
+ GoogleApiCx = Program.cfgCurrent.GoogleApiCx,
+ SearchAll = true
+ }, searchString);
+ }
+ }
+
if (chkBing.Checked)
- CustomSearchEventsGeneric(new BingWebSearcher(), searchString);
+ {
+ if (String.IsNullOrWhiteSpace(Program.cfgCurrent.BingApiKey))
+ {
+ CustomSearchEventsGeneric(new BingWebSearcher(), searchString);
+ }
+ else
+ {
+ CustomSearchEventsGeneric(new BingAPISearcher(Program.cfgCurrent.BingApiKey), searchString);
+ }
+ }
}
catch (ThreadAbortException)
{
@@ -2437,9 +2462,33 @@ private void SearchAll()
try
{
if (chkGoogle.Checked)
- SearchEventsGeneric(new GoogleWebSearcher());
+ {
+ if (String.IsNullOrWhiteSpace(Program.cfgCurrent.GoogleApiKey) || String.IsNullOrWhiteSpace(Program.cfgCurrent.GoogleApiCx))
+ {
+ SearchEventsGeneric(new GoogleWebSearcher());
+ }
+ else
+ {
+ SearchEventsGeneric(new GoogleAPISearcher
+ {
+ GoogleApiKey = Program.cfgCurrent.GoogleApiKey,
+ GoogleApiCx = Program.cfgCurrent.GoogleApiCx,
+ SearchAll = true
+ });
+ }
+ }
+
if (chkBing.Checked)
- SearchEventsGeneric(new BingWebSearcher());
+ {
+ if (String.IsNullOrWhiteSpace(Program.cfgCurrent.BingApiKey))
+ {
+ SearchEventsGeneric(new BingWebSearcher());
+ }
+ else
+ {
+ SearchEventsGeneric(new BingAPISearcher(Program.cfgCurrent.BingApiKey));
+ }
+ }
if (chkDuck.Checked)
SearchEventsGeneric(new DuckduckgoWebSearcher());
}
diff --git a/FOCA/Properties/AssemblyInfo.cs b/FOCA/Properties/AssemblyInfo.cs
index 425d7d8..2ad91b4 100644
--- a/FOCA/Properties/AssemblyInfo.cs
+++ b/FOCA/Properties/AssemblyInfo.cs
@@ -32,5 +32,5 @@
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
-[assembly: AssemblyVersion("3.4.4.1")]
-[assembly: AssemblyFileVersion("3.4.4.1")]
+[assembly: AssemblyVersion("3.4.5.0")]
+[assembly: AssemblyFileVersion("3.4.5.0")]
diff --git a/FOCA/app.config b/FOCA/app.config
index 0877e98..dad533f 100644
--- a/FOCA/app.config
+++ b/FOCA/app.config
@@ -8,7 +8,7 @@
-
+
@@ -20,7 +20,7 @@
-
+
diff --git a/FOCA/packages.config b/FOCA/packages.config
index f85ac7c..e74373b 100644
--- a/FOCA/packages.config
+++ b/FOCA/packages.config
@@ -7,4 +7,9 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/SearcherCore/SearcherCore/Searcher/BingAPI/SearchBingApi.cs b/SearcherCore/SearcherCore/Searcher/BingAPI/SearchBingApi.cs
index 22609ae..44385a5 100644
--- a/SearcherCore/SearcherCore/Searcher/BingAPI/SearchBingApi.cs
+++ b/SearcherCore/SearcherCore/Searcher/BingAPI/SearchBingApi.cs
@@ -1,29 +1,11 @@
-using System;
+using Newtonsoft.Json.Linq;
+using RestSharp;
+using System;
using System.Collections.Generic;
using System.Linq;
-using Newtonsoft.Json.Linq;
-using RestSharp;
namespace SearcherCore.Searcher.BingAPI
{
- ///
- /// BingApiResults encapsulates the fields of each result
- /// that FOCA may use
- ///
- public class BingApiResult
- {
- public BingApiResult(string url, string title, string description)
- {
- Url = url;
- Title = title;
- Description = description;
- }
-
- public string Url { get; set; }
- public string Title { get; set; }
- public string Description { get; set; }
- }
-
public class SearchBingApi
{
public delegate void StatusUpdateHandler(object sender, string e);
@@ -46,9 +28,9 @@ public SearchBingApi(string secretKey)
///
/// query
///
- public List Search(string q)
+ public List Search(string q)
{
- var results = new List();
+ var results = new List();
var offset = 0;
// URL of the requests. Count = 50 because it's the max allowed value
var request = new RestRequest($"search?count=50&safeSearch=Off&textFormat=Raw&offset={offset}&q={q}",
@@ -62,21 +44,18 @@ public List Search(string q)
{
do
{
- var webpages = (JArray) token["webPages"]["value"];
+ var webpages = (JArray)token["webPages"]["value"];
results.AddRange(
webpages.Select(
- res =>
- new BingApiResult((string) res.SelectToken("displayUrl"),
- (string) res.SelectToken("name"), (string) res.SelectToken("snippet"))));
- foreach (
- var b in
+ res => (string)res.SelectToken("displayUrl")));
+ foreach (var b in
webpages.Select(
- link => new BingApiResult((string) link.SelectToken("displayUrl"), "", "")))
+ link => ((string)link.SelectToken("displayUrl"))))
{
- UpdateStatus(b.Url);
+ UpdateStatus(b);
}
offset += 50;
- } while (offset < (int) token.SelectToken("webPages").SelectToken("totalEstimatedMatches")/1000);
+ } while (offset < (int)token.SelectToken("webPages").SelectToken("totalEstimatedMatches") / 1000);
}
catch
{
diff --git a/SearcherCore/SearcherCore/Searcher/BingAPISearcher.cs b/SearcherCore/SearcherCore/Searcher/BingAPISearcher.cs
index e8feb47..342ac83 100644
--- a/SearcherCore/SearcherCore/Searcher/BingAPISearcher.cs
+++ b/SearcherCore/SearcherCore/Searcher/BingAPISearcher.cs
@@ -110,7 +110,7 @@ private void GetCustomLinksAsync(object customSearchString)
private int GetBingResults(string searchString, out bool moreResults)
{
var client = new SearchBingApi(BingApiKey);
- List results;
+ List results;
try
{
results = client.Search(searchString);
diff --git a/SearcherCore/SearcherCore/Searcher/GoogleAPI/SearchGoogleApi.cs b/SearcherCore/SearcherCore/Searcher/GoogleAPI/SearchGoogleApi.cs
index eab4c41..e9a9712 100644
--- a/SearcherCore/SearcherCore/Searcher/GoogleAPI/SearchGoogleApi.cs
+++ b/SearcherCore/SearcherCore/Searcher/GoogleAPI/SearchGoogleApi.cs
@@ -44,11 +44,11 @@ private CseResource.ListRequest BuildRequest(string searchString)
return listRequest;
}
- public List RunService(string searchString)
+ public List RunService(string searchString)
{
var listRequest = BuildRequest(searchString);
IList paging = new List();
- var urls = new List();
+ var urls = new List();
var count = 0;
while (paging != null)
{
@@ -60,7 +60,7 @@ private CseResource.ListRequest BuildRequest(string searchString)
{
urls.AddRange(
paging.Select(
- item => new GoogleAPISearcher.GoogleAPIResults {Url = item.Link, Title = item.Title}));
+ item => item.Link));
foreach (var item in paging)
{
UpdateStatus(item.Link);
diff --git a/SearcherCore/SearcherCore/Searcher/GoogleAPISearcher.cs b/SearcherCore/SearcherCore/Searcher/GoogleAPISearcher.cs
index 5563223..62e81a7 100644
--- a/SearcherCore/SearcherCore/Searcher/GoogleAPISearcher.cs
+++ b/SearcherCore/SearcherCore/Searcher/GoogleAPISearcher.cs
@@ -1,22 +1,12 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading;
using FOCA.Threads;
using SearcherCore.Searcher.GoogleAPI;
+using System.Linq;
+using System.Threading;
namespace FOCA.Searcher
{
- public class GoogleAPISearcher: WebSearcher
+ public class GoogleAPISearcher : WebSearcher
{
- public class GoogleAPIResults
- {
- public string Url;
- public string Title;
- public string UrlCache;
- }
-
public string GoogleApiKey { get; set; }
public string GoogleApiCx { get; set; }
public const int maxResultPerRequest = 8;
@@ -38,7 +28,7 @@ public override void GetLinks()
if (thrSearchLinks != null && thrSearchLinks.IsAlive) return;
thrSearchLinks = new Thread(GetLinksAsync)
{
- Priority = ThreadPriority.Lowest,
+ Priority = ThreadPriority.Lowest,
IsBackground = true
};
thrSearchLinks.Start();
@@ -68,7 +58,7 @@ private void GetLinksAsync()
OnSearcherChangeStateEvent(new EventsThreads.ThreadStringEventArgs("Searching links in " + Name + "..."));
try
{
- foreach(var strExtension in Extensions)
+ foreach (var strExtension in Extensions)
{
OnSearcherChangeStateEvent(new EventsThreads.ThreadStringEventArgs("Search " + strExtension + " in " + Name));
GetGoogleLinks("site:" + Site + " filetype:" + strExtension);
@@ -96,7 +86,7 @@ private void GetCustomLinksAsync(object customSearchString)
try
{
if (SearchAll)
- OnSearcherEndEvent(GetGoogleAllLinks((string) customSearchString) > maxResults - 10
+ OnSearcherEndEvent(GetGoogleAllLinks((string)customSearchString) > maxResults - 10
? new EventsThreads.ThreadEndEventArgs(
EventsThreads.ThreadEndEventArgs.EndReasonEnum.LimitReached)
: new EventsThreads.ThreadEndEventArgs(EventsThreads.ThreadEndEventArgs.EndReasonEnum.NoMoreData));
@@ -140,7 +130,7 @@ private int GetGoogleResults(string searchString, out bool moreResults)
OnSearcherLogEvent(new EventsThreads.ThreadStringEventArgs($"[{strName}] Found {results.Count} links"));
OnSearcherLinkFoundEvent(new EventsThreads.ThreadListDataFoundEventArgs(results));
- return results.Count ;
+ return results.Count;
}
///
diff --git a/SearcherCore/SearcherCore/Searcher/GoogleWebSearcher.cs b/SearcherCore/SearcherCore/Searcher/GoogleWebSearcher.cs
index e9a51ad..46999d6 100644
--- a/SearcherCore/SearcherCore/Searcher/GoogleWebSearcher.cs
+++ b/SearcherCore/SearcherCore/Searcher/GoogleWebSearcher.cs
@@ -1,15 +1,13 @@
+using FOCA.Threads;
using System;
using System.Collections.Generic;
-using System.Text;
-using System.Threading;
-using System.Net;
using System.IO;
-using System.Linq;
+using System.Net;
+using System.Net.Cache;
+using System.Text;
using System.Text.RegularExpressions;
-using System.Windows.Forms;
-using FOCA.Threads;
+using System.Threading;
using System.Web;
-using System.Net.Cache;
namespace FOCA.Searcher
{
@@ -21,8 +19,8 @@ public class GoogleWebSearcher : WebSearcher
public const int maxResults = 1000;
private static string strCaptchaCookie = string.Empty;
- private static Form frmCaptcha;
-
+ private static readonly Regex googleWebUriRegex = new Regex("q=(?https?:\\/\\/[^>]+)&sa", RegexOptions.IgnoreCase | RegexOptions.Compiled);
+
public int ResultsPerPage { get; set; }
public int Offset { get; set; }
@@ -51,7 +49,7 @@ public override void GetLinks()
if (thrSearchLinks != null && thrSearchLinks.IsAlive) return;
thrSearchLinks = new Thread(GetLinksAsync)
{
- Priority = ThreadPriority.Lowest,
+ Priority = ThreadPriority.Lowest,
IsBackground = true
};
thrSearchLinks.Start();
@@ -67,9 +65,10 @@ public override void GetCustomLinks(string customSearchString)
thrSearchLinks = new Thread(GetCustomLinksAsync)
{
- Priority = ThreadPriority.Lowest,
+ Priority = ThreadPriority.Lowest,
IsBackground = true
};
+ thrSearchLinks.SetApartmentState(ApartmentState.STA);
thrSearchLinks.Start(customSearchString);
}
@@ -111,7 +110,7 @@ private void GetCustomLinksAsync(object CustomSearchString)
try
{
if (SearchAll)
- OnSearcherEndEvent(GetGoogleAllLinks((string) CustomSearchString) == maxResults
+ OnSearcherEndEvent(GetGoogleAllLinks((string)CustomSearchString) == maxResults
? new EventsThreads.ThreadEndEventArgs(
EventsThreads.ThreadEndEventArgs.EndReasonEnum.LimitReached)
: new EventsThreads.ThreadEndEventArgs(EventsThreads.ThreadEndEventArgs.EndReasonEnum.NoMoreData));
@@ -166,7 +165,7 @@ private int GetGoogleResults(string searchString, int currentResultPerPage, int
sb.Append("&as_qdr=" + FirstSeenToHtmlOption(FirstSeen));
if (WriteInLanguage != Language.AnyLanguage)
sb.Append("&lr=" + LanguageToHtmlOption(WriteInLanguage));
-
+
var intTimeOut = 5000 + 10000 * retries;
Error = false;
request = (HttpWebRequest)HttpWebRequest.Create(sb.ToString());
@@ -189,102 +188,31 @@ private int GetGoogleResults(string searchString, int currentResultPerPage, int
try
{
OnSearcherLogEvent(new EventsThreads.ThreadStringEventArgs(string.Format("[{0}] Requesting URL {1}", strName, request.RequestUri)));
+ response = (HttpWebResponse)request.GetResponse();
- try
- {
- response = (HttpWebResponse)request.GetResponse();
- }
- catch (WebException we)
+ }
+ catch (WebException we)
+ {
+ if (we.Response != null && we.Response is HttpWebResponse)
{
- var hwr = (HttpWebResponse)we.Response;
- if (hwr.StatusCode == HttpStatusCode.ServiceUnavailable)
+ HttpWebResponse exceptionResponse = (HttpWebResponse)we.Response;
+ //Too many request, reCaptcha
+ if ((int)exceptionResponse.StatusCode == 429)
{
-
- referer = hwr.ResponseUri.AbsoluteUri;
-
- var srLector = new StreamReader(hwr.GetResponseStream(), Encoding.UTF8);
- var htmlCaptcha = srLector.ReadToEnd();
- srLector.Close();
- hwr.Close();
-
- Match m;
- var continuePagePattern = "name=\"continue\" value=\"(.*?)\"";
- m = Regex.Match(htmlCaptcha, continuePagePattern);
- var strContinuePage = m.Success ? HttpUtility.UrlEncode(m.Groups[1].Value.Replace("&", "&"), Encoding.UTF8) : string.Empty;
-
- var captchaImgPattern = "/sorry/image(.*?)\"";
- m = Regex.Match(htmlCaptcha, captchaImgPattern);
- var captchaImg = m.Success ? m.Value.Substring(0, m.Value.Length - 1).Replace("&", "&") : string.Empty;
-
- var captchaIDPattern = "id=(.*?)&";
- m = Regex.Match(captchaImg, captchaIDPattern);
- var captchaID = m.Success ? m.Groups[1].Value : string.Empty;
-
- var captchaQParameterPattern = "q=(.*?)&";
- m = Regex.Match(captchaImg, captchaQParameterPattern);
- var qParameter = m.Success ? m.Groups[1].Value : string.Empty;
-
- var captchaSolved = string.Empty;
-
- if (frmCaptcha == null)
- captchaSolved = ShowCaptcha(captchaImg);
- else
- {
- frmCaptcha.FormClosed += frmCaptcha_FormClosed;
-
- Error = true;
- do
- {
- Thread.Sleep(500);
- } while (frmCaptcha != null);
-
- if (string.IsNullOrEmpty(captchaSolved))
- Thread.CurrentThread.Abort();
- continue;
- }
-
- var url = ipv4Google + "/sorry/index";
- var strRequest = url + "?q=" + qParameter + "&hl=es&continue=" + strContinuePage + "&id=" + captchaID + "&captcha=" + captchaSolved + "&submit=Enviar";
- request = (HttpWebRequest)HttpWebRequest.Create(strRequest);
- request.CachePolicy = policy;
- if (!string.IsNullOrEmpty(UserAgent))
- request.UserAgent = UserAgent;
- var Cookie = strCaptchaCookie;
- request.UserAgent = "FOCA";
- request.Headers[HttpRequestHeader.Cookie] = Cookie;
- request.Headers[HttpRequestHeader.AcceptLanguage] = "es-ES,es;q=0.8,en-US;q=0.5,en;q=0.3";
- request.Headers[HttpRequestHeader.AcceptEncoding] = "gzip, deflate, br";
- request.Referer = referer;
- request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
- request.KeepAlive = true;
-
- request.Timeout = intTimeOut;
- request.AllowAutoRedirect = false;
- response = (HttpWebResponse)request.GetResponse();
-
- if (response.StatusCode == HttpStatusCode.Found)
- {
- var newUrl = response.Headers["Location"];
-
- var nextURL = HttpUtility.UrlDecode(newUrl, Encoding.UTF8);
- var query = HttpUtility.ParseQueryString(nextURL);
- var googleAbuseParameters = query["google_abuse"].Split(';');
-
- foreach (var parameter in googleAbuseParameters.Where(parameter => parameter.StartsWith("GOOGLE_ABUSE_EXEMPTION")))
- {
- strCaptchaCookie = parameter;
- }
- }
-
- Error = true;
+ OnSearcherLogEvent(new EventsThreads.ThreadStringEventArgs("Too many requests to GoogleWeb engine. Please use the API instead."));
+ retries = 3;
+ }
+ else
+ {
+ OnSearcherLogEvent(new EventsThreads.ThreadStringEventArgs(string.Format("[{0}] Error {1} in request {2}", strName, retries, request.RequestUri.ToString())));
}
}
- }
- catch (WebException)
- {
+ else
+ {
+ OnSearcherLogEvent(new EventsThreads.ThreadStringEventArgs(string.Format("[{0}] Error {1} in request {2}", strName, retries, request.RequestUri.ToString())));
+ }
Error = true;
retries++;
- OnSearcherLogEvent(new EventsThreads.ThreadStringEventArgs(string.Format("[{0}] Error {1} in request {2}", strName, retries, request.RequestUri.ToString())));
}
catch
{
@@ -293,6 +221,7 @@ private int GetGoogleResults(string searchString, int currentResultPerPage, int
OnSearcherLogEvent(new EventsThreads.ThreadStringEventArgs(string.Format("[{0}] Error {1} in request {2}", strName, retries, request.RequestUri.ToString())));
}
} while (Error && retries < 3);
+
if (Error || retries >= 3)
throw new Exception(string.Format("[{0}] Error connecting", Name));
string html = null;
@@ -301,34 +230,19 @@ private int GetGoogleResults(string searchString, int currentResultPerPage, int
html = lector.ReadToEnd();
}
response.Close();
- var patron = new Regex("<\\s*a\\s+href=\\s*\"?([^\"]*)\"?\\s*", RegexOptions.IgnoreCase);
- var lstCurrentResults = new List