diff --git a/appveyor.yml b/appveyor.yml index 855f34f..c522ecb 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,7 +1,17 @@ -image: Visual Studio 2017 +image: Visual Studio 2019 + +init: + - git config --global core.autocrlf true + +environment: + matrix: + - solution: Semester4/WebPageDownloader/WebPageDownloader.sln before_build: - - nuget restore semester2/6.1/HW6T2.sln + - nuget restore %solution% build: - project: semester2/2.3/2.3.sln + project: $(solution) + +test_script: + - dotnet test %solution% diff --git a/semester4/WebPageDownloader/WebPageDownloader.Tests/UnitTest1.fs b/semester4/WebPageDownloader/WebPageDownloader.Tests/UnitTest1.fs new file mode 100644 index 0000000..70caf6a --- /dev/null +++ b/semester4/WebPageDownloader/WebPageDownloader.Tests/UnitTest1.fs @@ -0,0 +1,21 @@ +module WebPageDownloader.Tests + +open NUnit.Framework +open FsUnit +open Downloader + +[] +let ``Cheeze test`` () = + let data = (getAllMentionedPages "https://akulovka.com/blog/syr/").Value + data |> snd |> List.length |> should equal 27 + data |> snd |> List.item 0 |> fst |> should equal "https://akulovka.com/blog/rss/" + +[] +let ``Github test`` () = + let data = (getAllMentionedPages "https://github.com/").Value + data |> snd |> List.length |> should equal 53 + data |> snd |> List.item 0 |> fst |> should equal "https://github.githubassets.com" + +[] +let ``Invalid url test`` () = + getAllMentionedPages "https://theresnotgingihope.com" |> should equal None \ No newline at end of file diff --git a/semester4/WebPageDownloader/WebPageDownloader.Tests/WebPageDownloader.Tests.fsproj b/semester4/WebPageDownloader/WebPageDownloader.Tests/WebPageDownloader.Tests.fsproj new file mode 100644 index 0000000..759733b --- /dev/null +++ b/semester4/WebPageDownloader/WebPageDownloader.Tests/WebPageDownloader.Tests.fsproj @@ -0,0 +1,25 @@ + + + + netcoreapp3.1 + + false + false + + + + + + + + + + + + + + + + + + diff --git a/semester4/WebPageDownloader/WebPageDownloader.sln b/semester4/WebPageDownloader/WebPageDownloader.sln new file mode 100644 index 0000000..1cac14e --- /dev/null +++ b/semester4/WebPageDownloader/WebPageDownloader.sln @@ -0,0 +1,22 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "WebPageDownloader", "WebPageDownloader\WebPageDownloader.fsproj", "{1BC317B7-AD15-4A86-B19E-D9F1ACBF11B5}" +EndProject +Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "WebPageDownloader.Tests", "WebPageDownloader.Tests\WebPageDownloader.Tests.fsproj", "{E0403355-E29F-4F86-914A-E08F63A77FCB}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {1BC317B7-AD15-4A86-B19E-D9F1ACBF11B5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {1BC317B7-AD15-4A86-B19E-D9F1ACBF11B5}.Debug|Any CPU.Build.0 = Debug|Any CPU + {1BC317B7-AD15-4A86-B19E-D9F1ACBF11B5}.Release|Any CPU.ActiveCfg = Release|Any CPU + {1BC317B7-AD15-4A86-B19E-D9F1ACBF11B5}.Release|Any CPU.Build.0 = Release|Any CPU + {E0403355-E29F-4F86-914A-E08F63A77FCB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {E0403355-E29F-4F86-914A-E08F63A77FCB}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E0403355-E29F-4F86-914A-E08F63A77FCB}.Release|Any CPU.ActiveCfg = Release|Any CPU + {E0403355-E29F-4F86-914A-E08F63A77FCB}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection +EndGlobal diff --git a/semester4/WebPageDownloader/WebPageDownloader/Downloader.fs b/semester4/WebPageDownloader/WebPageDownloader/Downloader.fs new file mode 100644 index 0000000..0d8d41d --- /dev/null +++ b/semester4/WebPageDownloader/WebPageDownloader/Downloader.fs @@ -0,0 +1,50 @@ +module Downloader + +open System.Net +open System.IO +open System.Text.RegularExpressions +open System + +let expr = @"href\s*=\s*(?:[""'](?<1>[^""']*)[""']|(?<1>\S+))" +let regex = Regex (expr, RegexOptions.IgnoreCase) + +/// Downloads page and returns page and its url +let fetchAsync url = + async { + try + let request = WebRequest.Create(Uri(url)) + use! response = request.AsyncGetResponse() + use stream = response.GetResponseStream() + use reader = new StreamReader(stream) + let page = reader.ReadToEnd() + return Some (url, page) + with + | _ -> + return None + } + +/// Finds all links on given page +let getAllLinks page = + regex.Matches(page) |> Seq.map (fun (x : Match) -> x.Groups.[1].Value) |> Seq.toList + +/// Tales list of urls and download all page by every url +let getAllPages links = + List.map fetchAsync links |> Async.Parallel |> Async.RunSynchronously |> Array.toList + +/// Returns page located by given url and all pages mentioned there +let getAllMentionedPages url = + let firstPage = fetchAsync url |> Async.RunSynchronously + match firstPage with + | None -> None + | Some (_, page) -> + let pages = page |> getAllLinks |> getAllPages |> List.filter (fun x -> x.IsSome) |> List.map Option.get + Some (firstPage.Value, pages) + +/// Prints all urls mentioned in page located by given url and number of symbols in these pages +let printAllData url = + match getAllMentionedPages url with + | None -> printfn "Nothing had been found, seems like url is wrong" + | Some (page, mentioned) -> + printfn "Original url %s --- %i symbols" (fst page) ((snd page).Length) + mentioned |> List.length |> printfn "Found %i valid links" + mentioned |> List.iter (fun item -> printfn "Url: %s --- %i symbols" (fst item) (snd item).Length) \ No newline at end of file diff --git a/semester4/WebPageDownloader/WebPageDownloader/WebPageDownloader.fsproj b/semester4/WebPageDownloader/WebPageDownloader/WebPageDownloader.fsproj new file mode 100644 index 0000000..3d6f238 --- /dev/null +++ b/semester4/WebPageDownloader/WebPageDownloader/WebPageDownloader.fsproj @@ -0,0 +1,12 @@ + + + + Library + netcoreapp3.1 + + + + + + +