Skip to content

Commit

Permalink
✨ add support for remote resource fetching (#281)
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastianMindee authored Dec 9, 2024
1 parent aa1b44d commit 0864fc6
Show file tree
Hide file tree
Showing 9 changed files with 171 additions and 135 deletions.
23 changes: 0 additions & 23 deletions docs/code_samples/carte_vitale_v1.txt

This file was deleted.

16 changes: 16 additions & 0 deletions docs/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ There are a few different ways of loading a document file, depending on your use
* [Path](#path)
* [File Object](#stream-object)
* [Bytes](#bytes)
* [Remote File](#remote-file)

### Path
Load from a file directly from disk. Requires an absolute path, as a string.
Expand Down Expand Up @@ -155,6 +156,21 @@ string fileName = "myfile.pdf";
var inputSource = new LocalInputSource(myFileInBytes, fileName);
```

### Remote File

You can pass a URL to the server through the UrlInputSource class:

```csharp
var remoteInput = new UrlInputSource("https://www.example.com/some/file.ext");
```

If your file is hidden behind a redirection, you can load your file locally instead:

```csharp
var remoteInput = new UrlInputSource("https://www.example.com/some/file.ext");
var localInput = remoteInput.AsLocalInputSource();
```

## Parsing a file
To send a file to the API, we need to specify how to process the document.
This will determine which API endpoint is used and how the API return will be handled internally by the library.
Expand Down
48 changes: 47 additions & 1 deletion src/Mindee/Input/UrlInputSource.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
using System;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using Mindee.Exceptions;
using RestSharp;
using RestSharp.Authenticators;

namespace Mindee.Input
{
Expand Down Expand Up @@ -44,14 +46,58 @@ private void IsUriValid()
{
throw new MindeeInputException("Local files are not supported, use `LocalInputSource` instead.");
}

if (!FileUrl.IsAbsoluteUri)
{
throw new MindeeInputException("The URI must be absolute.");
}

if (FileUrl.Scheme != "https")
{
throw new MindeeInputException("Only the HTTPS scheme is supported.");
}
}

/// <summary>
/// Downloads the file from the url, and returns a LocalInputSource wrapper object for it.
/// </summary>
/// <returns>A LocalInputSource.</returns>
/// <exception cref="MindeeInputException">Throws if the file can't be accessed or downloaded.</exception>
public async Task<LocalInputSource> AsLocalInputSource(
string filename = null,
string username = null,
string password = null,
string token = null,
int maxRedirects = 3,
IRestClient restClient = null)
{
filename ??= Path.GetFileName(FileUrl.LocalPath);
if (filename == "" || !Path.HasExtension(filename))
{
throw new MindeeInputException("Filename must end with an extension.");
}

var options = new RestClientOptions(FileUrl) { FollowRedirects = true, MaxRedirects = maxRedirects };

if (!string.IsNullOrEmpty(token))
{
options.Authenticator = new JwtAuthenticator(token);
}
else if (!string.IsNullOrEmpty(username) && !string.IsNullOrEmpty(password))
{
options.Authenticator = new HttpBasicAuthenticator(username, password);
}

restClient ??= new RestClient(options);
var request = new RestRequest(FileUrl);
var response = await restClient.ExecuteAsync(request);

// Note: response.IsSuccessful can't be mocked as easily, so this is a better solution at the moment.
if (response.IsSuccessStatusCode)
{
return new LocalInputSource(fileBytes: response.RawBytes, filename: filename);
}
throw new MindeeInputException($"Failed to download file: {response.ErrorMessage}");
}
}
}
1 change: 1 addition & 0 deletions src/Mindee/Pdf/PdfCompressor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ public static byte[] CompressPdf(byte[] pdfData, int imageQuality = 85, bool for
Console.WriteLine(
"MINDEE WARNING: Found text inside of the provided PDF file. Compression operation aborted since disableSourceText is set to 'true'.");
Console.ResetColor();
Console.Write(""); // Flush buffer color from stdout. Come on .NET... I shouldn't have to do this.
return pdfData;
}

Expand Down
20 changes: 0 additions & 20 deletions src/Mindee/Product/Fr/CarteVitale/CarteVitaleV1.cs

This file was deleted.

55 changes: 0 additions & 55 deletions src/Mindee/Product/Fr/CarteVitale/CarteVitaleV1Document.cs

This file was deleted.

21 changes: 21 additions & 0 deletions tests/Mindee.IntegrationTests/Input/UrlInputSourceTest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
using Mindee.Input;
using Mindee.Product.Invoice;

namespace Mindee.IntegrationTests.Input
{
[Trait("Category", "URL loading")]
public class UrlInputSourceTest
{
[Fact]
public async Task GivenARemoteFile_MustRetrieveResponse()
{
var apiKey = Environment.GetEnvironmentVariable("Mindee__ApiKey");
var client = TestingUtilities.GetOrGenerateMindeeClient(apiKey);
var remoteInput = new UrlInputSource("https://github.com/mindee/client-lib-test-data/blob/main/products/invoice_splitter/invoice_5p.pdf?raw=true");
var localInput = await remoteInput.AsLocalInputSource();
Assert.Equal("invoice_5p.pdf", localInput.Filename);
var result = await client.ParseAsync<InvoiceV4>(localInput);
Assert.Equal(5, result.Document.NPages);
}
}
}
86 changes: 86 additions & 0 deletions tests/Mindee.UnitTests/Input/UrlInputFileTest.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
using System.Net;
using Mindee.Exceptions;
using Mindee.Input;
using Moq;
using RestSharp;

namespace Mindee.UnitTests.Input
{
[Trait("Category", "URL loading")]
public class UrlInputSourceTest
{
private readonly Mock<IRestClient> _mockRestClient = new();

[Fact]
public void Can_Load_Type_String()
{
Expand Down Expand Up @@ -34,5 +39,86 @@ public void DoesNot_Load_InvalidUrl()
Assert.Throws<UriFormatException>(
() => new UrlInputSource("invalid-url"));
}
[Fact]
public async Task AsLocalInputSource_SuccessfulDownload()
{
_mockRestClient
.Setup(x => x.ExecuteAsync(It.IsAny<RestRequest>(), It.IsAny<CancellationToken>()))
.ReturnsAsync(new RestResponse
{
StatusCode = HttpStatusCode.OK,
RawBytes = [1, 2, 3, 4, 5],
IsSuccessStatusCode = true
});

var urlInputSource = new UrlInputSource("https://example.com/file.pdf");
var result = await urlInputSource.AsLocalInputSource(restClient: _mockRestClient.Object);

Assert.IsType<LocalInputSource>(result);
Assert.Equal("file.pdf", result.Filename);
Assert.Equal(5, result.FileBytes.Length);
}

[Fact]
public async Task AsLocalInputSource_FailedDownload()
{
_mockRestClient
.Setup(x => x.ExecuteAsync(It.IsAny<RestRequest>(), It.IsAny<CancellationToken>()))
.ReturnsAsync(new RestResponse
{
StatusCode = HttpStatusCode.NotFound,
ErrorMessage = "File not found",
IsSuccessStatusCode = false
});

var urlInputSource = new UrlInputSource("https://example.com/nonexistent.pdf");
await Assert.ThrowsAsync<MindeeInputException>(
() => urlInputSource.AsLocalInputSource(restClient: _mockRestClient.Object));
}

[Fact]
public async Task AsLocalInputSource_WithCustomFilename()
{
_mockRestClient
.Setup(x => x.ExecuteAsync(It.IsAny<RestRequest>(), It.IsAny<CancellationToken>()))
.ReturnsAsync(new RestResponse
{
StatusCode = HttpStatusCode.OK,
RawBytes = [1, 2, 3, 4, 5],
IsSuccessStatusCode = true
});

var urlInputSource = new UrlInputSource("https://example.com/file.pdf");
var result = await urlInputSource.AsLocalInputSource("custom.pdf", restClient: _mockRestClient.Object);

Assert.IsType<LocalInputSource>(result);
Assert.Equal("custom.pdf", result.Filename);
}

[Fact]
public async Task AsLocalInputSource_WithAuthentication()
{
_mockRestClient
.Setup(x => x.ExecuteAsync(It.IsAny<RestRequest>(), It.IsAny<CancellationToken>()))
.ReturnsAsync(new RestResponse
{
StatusCode = HttpStatusCode.OK,
RawBytes = [1, 2, 3, 4, 5],
IsSuccessStatusCode = true
});

var urlInputSource = new UrlInputSource("https://example.com/file.pdf");
var result = await urlInputSource.AsLocalInputSource(username: "user", password: "pass", restClient: _mockRestClient.Object);

Assert.IsType<LocalInputSource>(result);
Assert.Equal("file.pdf", result.Filename);
}

[Fact]
public async Task AsLocalInputSource_InvalidFilename()
{
var urlInputSource = new UrlInputSource("https://example.com/file.pdf");
await Assert.ThrowsAsync<MindeeInputException>(() => urlInputSource.AsLocalInputSource("invalid"));
}
}
}
36 changes: 0 additions & 36 deletions tests/Mindee.UnitTests/Product/Fr/CarteVitale/CarteVitaleV1Test.cs

This file was deleted.

0 comments on commit 0864fc6

Please sign in to comment.