Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
image: Visual Studio 2017
image: Visual Studio 2019

init:
- git config --global core.autocrlf true

environment:
matrix:
- solution: Semester4/WebPageDownloader/WebPageDownloader.sln

before_build:
- nuget restore semester2/6.1/HW6T2.sln
- nuget restore %solution%

build:
project: semester2/2.3/2.3.sln
project: $(solution)

test_script:
- dotnet test %solution%
21 changes: 21 additions & 0 deletions semester4/WebPageDownloader/WebPageDownloader.Tests/UnitTest1.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
module WebPageDownloader.Tests

open NUnit.Framework
open FsUnit
open Downloader

[<Test>]
let ``Cheeze test`` () =
let data = (getAllMentionedPages "https://akulovka.com/blog/syr/").Value
data |> snd |> List.length |> should equal 27
data |> snd |> List.item 0 |> fst |> should equal "https://akulovka.com/blog/rss/"

[<Test>]
let ``Github test`` () =
let data = (getAllMentionedPages "https://github.com/").Value
data |> snd |> List.length |> should equal 53
data |> snd |> List.item 0 |> fst |> should equal "https://github.githubassets.com"

[<Test>]
let ``Invalid url test`` () =
getAllMentionedPages "https://theresnotgingihope.com" |> should equal None
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>

<IsPackable>false</IsPackable>
<GenerateProgramFile>false</GenerateProgramFile>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="FsUnit" Version="4.0.1" />
<PackageReference Include="nunit" Version="3.12.0" />
<PackageReference Include="NUnit3TestAdapter" Version="3.15.1" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.4.0" />
</ItemGroup>

<ItemGroup>
<Compile Include="UnitTest1.fs" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\WebPageDownloader\WebPageDownloader.fsproj" />
</ItemGroup>

</Project>
22 changes: 22 additions & 0 deletions semester4/WebPageDownloader/WebPageDownloader.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

Microsoft Visual Studio Solution File, Format Version 12.00
Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "WebPageDownloader", "WebPageDownloader\WebPageDownloader.fsproj", "{1BC317B7-AD15-4A86-B19E-D9F1ACBF11B5}"
EndProject
Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "WebPageDownloader.Tests", "WebPageDownloader.Tests\WebPageDownloader.Tests.fsproj", "{E0403355-E29F-4F86-914A-E08F63A77FCB}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{1BC317B7-AD15-4A86-B19E-D9F1ACBF11B5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{1BC317B7-AD15-4A86-B19E-D9F1ACBF11B5}.Debug|Any CPU.Build.0 = Debug|Any CPU
{1BC317B7-AD15-4A86-B19E-D9F1ACBF11B5}.Release|Any CPU.ActiveCfg = Release|Any CPU
{1BC317B7-AD15-4A86-B19E-D9F1ACBF11B5}.Release|Any CPU.Build.0 = Release|Any CPU
{E0403355-E29F-4F86-914A-E08F63A77FCB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E0403355-E29F-4F86-914A-E08F63A77FCB}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E0403355-E29F-4F86-914A-E08F63A77FCB}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E0403355-E29F-4F86-914A-E08F63A77FCB}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal
50 changes: 50 additions & 0 deletions semester4/WebPageDownloader/WebPageDownloader/Downloader.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
module Downloader

open System.Net
open System.IO
open System.Text.RegularExpressions
open System

let expr = @"href\s*=\s*(?:[""'](?<1>[^""']*)[""']|(?<1>\S+))"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Очень уж разрешающий регэксп. Если просто где-то на странице найдётся, например, href='sdfds", оно подумает, что это ссылка. Правильнее было бы искать начало тэга <a

let regex = Regex (expr, RegexOptions.IgnoreCase)

/// Downloads page and returns page and its url
let fetchAsync url =
async {
try
let request = WebRequest.Create(Uri(url))
use! response = request.AsyncGetResponse()
use stream = response.GetResponseStream()
use reader = new StreamReader(stream)
let page = reader.ReadToEnd()
return Some (url, page)
with
| _ ->
return None
}

/// Finds all links on given page
let getAllLinks page =
regex.Matches(page) |> Seq.map (fun (x : Match) -> x.Groups.[1].Value) |> Seq.toList

/// Tales list of urls and download all page by every url
let getAllPages links =
List.map fetchAsync links |> Async.Parallel |> Async.RunSynchronously |> Array.toList

/// Returns page located by given url and all pages mentioned there
let getAllMentionedPages url =
let firstPage = fetchAsync url |> Async.RunSynchronously
match firstPage with
| None -> None
| Some (_, page) ->
let pages = page |> getAllLinks |> getAllPages |> List.filter (fun x -> x.IsSome) |> List.map Option.get
Some (firstPage.Value, pages)

/// Prints all urls mentioned in page located by given url and number of symbols in these pages
let printAllData url =
match getAllMentionedPages url with
| None -> printfn "Nothing had been found, seems like url is wrong"
| Some (page, mentioned) ->
printfn "Original url %s --- %i symbols" (fst page) ((snd page).Length)
mentioned |> List.length |> printfn "Found %i valid links"
mentioned |> List.iter (fun item -> printfn "Url: %s --- %i symbols" (fst item) (snd item).Length)
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Library</OutputType>
<TargetFramework>netcoreapp3.1</TargetFramework>
</PropertyGroup>

<ItemGroup>
<Compile Include="Downloader.fs" />
</ItemGroup>

</Project>