diff --git a/F#/Crawler.Test/Crawler.Test.fsproj b/F#/Crawler.Test/Crawler.Test.fsproj new file mode 100644 index 0000000..9ab7de2 --- /dev/null +++ b/F#/Crawler.Test/Crawler.Test.fsproj @@ -0,0 +1,29 @@ + + + + net8.0 + + false + true + true + + + + + + + + + + + + + + + + + + + + + diff --git a/F#/Crawler.Test/CrawlerTest.fs b/F#/Crawler.Test/CrawlerTest.fs new file mode 100644 index 0000000..04efc01 --- /dev/null +++ b/F#/Crawler.Test/CrawlerTest.fs @@ -0,0 +1,35 @@ +module Crawler.Test + +open NUnit.Framework +open FsUnit + +[] +let incorrectUrlTest () = + let result = crawlAsync "asd" |> Async.RunSynchronously + match result with + | Error _ -> Assert.Pass() + | _ -> Assert.Fail() + +[] +let complicatedTest () = + let result = crawlAsync "https://clang.llvm.org/get_started.html" |> Async.RunSynchronously + let expectedResult : (string * Choice)[]= List.toArray [ + ("http://llvm.org/", Choice1Of2(16655)); + ("http://llvm.org/releases/download.html", Choice2Of2(System.Net.WebException())); + ("http://clang-analyzer.llvm.org", Choice1Of2(7996)); + ("http://lists.llvm.org/mailman/listinfo/cfe-commits", Choice1Of2(6720)); + ("http://clang.llvm.org/doxygen/", Choice1Of2(2614)); + ("http://llvm.org/devmtg/", Choice1Of2(13563)); + ("http://getgnuwin32.sourceforge.net/", Choice1Of2(10391)) + ] + match result with + | Error _ -> + Assert.Fail() + | Ok a -> + let check x y = + fst(x) |> should equal (fst(y)) + match (snd(x), snd(y)) with + | (Choice1Of2 a, Choice1Of2 b) -> a |> should equal b + | (Choice2Of2 (a: exn), Choice2Of2 (b: exn)) -> a.GetType() |> should equal (b.GetType()) + | _ -> Assert.Fail() + Seq.iter2 check expectedResult a diff --git a/F#/Crawler/Crawler.fs b/F#/Crawler/Crawler.fs new file mode 100644 index 0000000..911fe3a --- /dev/null +++ b/F#/Crawler/Crawler.fs @@ -0,0 +1,43 @@ +module Crawler + +open FSharp.Data +open System.Text.RegularExpressions + +/// +/// Creates async task to fetch page length +/// +/// Page url +/// Page lenght +let fetchSize url = + async { + let! page = Http.AsyncRequestString(url) + return page.Length + } |> Async.Catch + +/// +/// Creates async task to fetch size of every url on the page +/// +/// Page url +/// Async task of whether the result was successful or not +let crawlAsync url = + let regex = new Regex("") + async { + let! pageOrExc = Http.AsyncRequestString(url) |> Async.Catch + match pageOrExc with + | Choice2Of2 exc -> return Error(exc) + | Choice1Of2 page -> + let links = regex.Matches(page) |> Seq.map (fun x -> x.Groups[1].Value) + let results = links |> Seq.map fetchSize |> Async.Parallel |> Async.RunSynchronously + return Ok(Array.map2 (fun x y -> (x, y)) (Seq.toArray links) results) + } + +let printCrawl url = + let result = crawlAsync url |> Async.RunSynchronously + match result with + | Error e -> printfn "Incorrect url: %s" e.Message + | Ok array -> + let printUrl (url, result) = + match result with + | Choice1Of2 length -> printfn "%s - %d" url length + | Choice2Of2 _ -> printfn "Incorrect url ignored: %s" url + array |> Array.iter printUrl diff --git a/F#/Crawler/Crawler.fsproj b/F#/Crawler/Crawler.fsproj new file mode 100644 index 0000000..5180b9c --- /dev/null +++ b/F#/Crawler/Crawler.fsproj @@ -0,0 +1,16 @@ + + + + net8.0 + true + + + + + + + + + + + diff --git a/F#/forSpbu.sln b/F#/forSpbu.sln index 9d54d03..c49837d 100644 --- a/F#/forSpbu.sln +++ b/F#/forSpbu.sln @@ -21,6 +21,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "07.03", "07.03", "{5CA9053B EndProject Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Homework1", "Homework1\Homework1.fsproj", "{04B15EE4-079A-42ED-ACC8-E2DCD25281C6}" EndProject +Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Crawler", "Crawler\Crawler.fsproj", "{39BB4800-E3FC-4BEC-A740-FC95E622A68B}" +EndProject +Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Crawler.Test", "Crawler.Test\Crawler.Test.fsproj", "{E99E1D30-982C-4C5F-B53B-E2A2B5B61749}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -58,6 +62,14 @@ Global {89A935E8-B5F3-435D-ACC3-A99DD7C66178}.Debug|Any CPU.Build.0 = Debug|Any CPU {89A935E8-B5F3-435D-ACC3-A99DD7C66178}.Release|Any CPU.ActiveCfg = Release|Any CPU {89A935E8-B5F3-435D-ACC3-A99DD7C66178}.Release|Any CPU.Build.0 = Release|Any CPU + {39BB4800-E3FC-4BEC-A740-FC95E622A68B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {39BB4800-E3FC-4BEC-A740-FC95E622A68B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {39BB4800-E3FC-4BEC-A740-FC95E622A68B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {39BB4800-E3FC-4BEC-A740-FC95E622A68B}.Release|Any CPU.Build.0 = Release|Any CPU + {E99E1D30-982C-4C5F-B53B-E2A2B5B61749}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {E99E1D30-982C-4C5F-B53B-E2A2B5B61749}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E99E1D30-982C-4C5F-B53B-E2A2B5B61749}.Release|Any CPU.ActiveCfg = Release|Any CPU + {E99E1D30-982C-4C5F-B53B-E2A2B5B61749}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(NestedProjects) = preSolution {7937CDA8-8285-4E23-AD1A-FC0F04FEEFE6} = {91E3BDA2-0836-46C2-95F0-02513FD7F13F}