Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions F#/Crawler.Test/Crawler.Test.fsproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>

<IsPackable>false</IsPackable>
<GenerateProgramFile>true</GenerateProgramFile>
<IsTestProject>true</IsTestProject>
</PropertyGroup>

<ItemGroup>
<Compile Include="CrawlerTest.fs" />

</ItemGroup>

<ItemGroup>
<PackageReference Include="coverlet.collector" Version="6.0.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
<PackageReference Include="FsUnit" Version="6.0.0" />
<PackageReference Include="NUnit" Version="4.1.0" />
<PackageReference Include="NUnit.Analyzers" Version="3.9.0" />
<PackageReference Include="NUnit3TestAdapter" Version="4.5.0" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\Crawler\Crawler.fsproj" />
</ItemGroup>

</Project>
35 changes: 35 additions & 0 deletions F#/Crawler.Test/CrawlerTest.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
module Crawler.Test

open NUnit.Framework
open FsUnit

[<Test>]
let incorrectUrlTest () =
let result = crawlAsync "asd" |> Async.RunSynchronously
match result with
| Error _ -> Assert.Pass()
| _ -> Assert.Fail()

[<Test>]
let complicatedTest () =
let result = crawlAsync "https://clang.llvm.org/get_started.html" |> Async.RunSynchronously
let expectedResult : (string * Choice<int, exn>)[]= List.toArray [
("http://llvm.org/", Choice1Of2(16655));
("http://llvm.org/releases/download.html", Choice2Of2(System.Net.WebException()));
("http://clang-analyzer.llvm.org", Choice1Of2(7996));
("http://lists.llvm.org/mailman/listinfo/cfe-commits", Choice1Of2(6720));
("http://clang.llvm.org/doxygen/", Choice1Of2(2614));
("http://llvm.org/devmtg/", Choice1Of2(13563));
("http://getgnuwin32.sourceforge.net/", Choice1Of2(10391))
]
match result with
| Error _ ->
Assert.Fail()
| Ok a ->
let check x y =
fst(x) |> should equal (fst(y))
match (snd(x), snd(y)) with
| (Choice1Of2 a, Choice1Of2 b) -> a |> should equal b
| (Choice2Of2 (a: exn), Choice2Of2 (b: exn)) -> a.GetType() |> should equal (b.GetType())
| _ -> Assert.Fail()
Seq.iter2 check expectedResult a
43 changes: 43 additions & 0 deletions F#/Crawler/Crawler.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
module Crawler

open FSharp.Data
open System.Text.RegularExpressions

/// <summary>
/// Creates async task to fetch page length
/// </summary>
/// <param name"url"> Page url </param>
/// <returns> Page lenght </returns>
let fetchSize url =
async {
let! page = Http.AsyncRequestString(url)
return page.Length
} |> Async.Catch

/// <summary>
/// Creates async task to fetch size of every url on the page
/// </summary>
/// <param name"url"> Page url </param>
/// <returns> Async task of whether the result was successful or not </returns>
let crawlAsync url =
let regex = new Regex("<a href=\"(http://[\w-/.]+)\">")
async {
let! pageOrExc = Http.AsyncRequestString(url) |> Async.Catch
match pageOrExc with
| Choice2Of2 exc -> return Error(exc)
| Choice1Of2 page ->
let links = regex.Matches(page) |> Seq.map (fun x -> x.Groups[1].Value)
let results = links |> Seq.map fetchSize |> Async.Parallel |> Async.RunSynchronously
return Ok(Array.map2 (fun x y -> (x, y)) (Seq.toArray links) results)
}

let printCrawl url =
let result = crawlAsync url |> Async.RunSynchronously
match result with
| Error e -> printfn "Incorrect url: %s" e.Message
| Ok array ->
let printUrl (url, result) =
match result with
| Choice1Of2 length -> printfn "%s - %d" url length
| Choice2Of2 _ -> printfn "Incorrect url ignored: %s" url
array |> Array.iter printUrl
16 changes: 16 additions & 0 deletions F#/Crawler/Crawler.fsproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
</PropertyGroup>

<ItemGroup>
<Compile Include="Crawler.fs" />
</ItemGroup>

<ItemGroup>
<PackageReference Include="FSharp.Data" Version="6.4.0" />
</ItemGroup>

</Project>
12 changes: 12 additions & 0 deletions F#/forSpbu.sln
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "07.03", "07.03", "{5CA9053B
EndProject
Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Homework1", "Homework1\Homework1.fsproj", "{04B15EE4-079A-42ED-ACC8-E2DCD25281C6}"
EndProject
Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Crawler", "Crawler\Crawler.fsproj", "{39BB4800-E3FC-4BEC-A740-FC95E622A68B}"
EndProject
Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Crawler.Test", "Crawler.Test\Crawler.Test.fsproj", "{E99E1D30-982C-4C5F-B53B-E2A2B5B61749}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -58,6 +62,14 @@ Global
{89A935E8-B5F3-435D-ACC3-A99DD7C66178}.Debug|Any CPU.Build.0 = Debug|Any CPU
{89A935E8-B5F3-435D-ACC3-A99DD7C66178}.Release|Any CPU.ActiveCfg = Release|Any CPU
{89A935E8-B5F3-435D-ACC3-A99DD7C66178}.Release|Any CPU.Build.0 = Release|Any CPU
{39BB4800-E3FC-4BEC-A740-FC95E622A68B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{39BB4800-E3FC-4BEC-A740-FC95E622A68B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{39BB4800-E3FC-4BEC-A740-FC95E622A68B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{39BB4800-E3FC-4BEC-A740-FC95E622A68B}.Release|Any CPU.Build.0 = Release|Any CPU
{E99E1D30-982C-4C5F-B53B-E2A2B5B61749}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E99E1D30-982C-4C5F-B53B-E2A2B5B61749}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E99E1D30-982C-4C5F-B53B-E2A2B5B61749}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E99E1D30-982C-4C5F-B53B-E2A2B5B61749}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{7937CDA8-8285-4E23-AD1A-FC0F04FEEFE6} = {91E3BDA2-0836-46C2-95F0-02513FD7F13F}
Expand Down