Skip to content

Commit 2e2a8c3

Browse files
committed
2 parents aabb0ae + 4e5435d commit 2e2a8c3

File tree

6 files changed

+194
-0
lines changed

6 files changed

+194
-0
lines changed
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
using System.IO;
2+
using System.IO.Compression;
3+
using System.Threading.Tasks;
4+
using System.Text.Json;
5+
using System.Diagnostics;
6+
using Amazon.Lambda.Core;
7+
using Amazon.S3;
8+
using DiscUtils.Iso9660;
9+
10+
// Assembly attribute to enable the Lambda function's JSON input to be converted into a .NET class.
11+
[assembly: LambdaSerializer(typeof(Amazon.Lambda.Serialization.SystemTextJson.DefaultLambdaJsonSerializer))]
12+
13+
namespace Process1GBWith256MBLambda
14+
{
15+
public class Functions
16+
{
17+
const string BUCKET = "rds.nsrl.nist.gov";
18+
const string KEY = "RDS/current/RDS_ios.iso";
19+
const string ZIPNAME = "NSRLFILE.ZIP";
20+
const string FILENAME = "NSRLFile.txt";
21+
22+
public Functions()
23+
{
24+
}
25+
26+
public async Task<object> Get(JsonDocument request, ILambdaContext context)
27+
{
28+
var s3 = new AmazonS3Client();
29+
30+
// easier than doing math on the timestamps in logs
31+
var timer = new Stopwatch();
32+
timer.Start();
33+
34+
context.Logger.LogLine($"{timer.Elapsed}: Getting started.");
35+
using var stream = new Cppl.Utilities.AWS.SeekableS3Stream(s3, BUCKET, KEY, 12 * 1024 * 1024, 5);
36+
using var iso = new CDReader(stream, true);
37+
using var embedded = iso.OpenFile(ZIPNAME, FileMode.Open, FileAccess.Read);
38+
using var zip = new ZipArchive(embedded, ZipArchiveMode.Read);
39+
var entry = zip.GetEntry(FILENAME);
40+
using var file = entry.Open();
41+
using var reader = new StreamReader(file);
42+
43+
// how soon do we get the first line?
44+
var line = await reader.ReadLineAsync();
45+
context.Logger.LogLine($"{timer.Elapsed}: First row received.");
46+
47+
// read all of the remainline lines (it'll take a while...)
48+
ulong rows = 1;
49+
while ((line = await reader.ReadLineAsync()) != null) {
50+
++rows;
51+
}
52+
context.Logger.LogLine($"{timer.Elapsed}: Done reading rows.");
53+
54+
// the total amount read should be close to the total file size, but the amount loaded may be greated than
55+
// the file size if too few ranges are held in the MRU and end-up being loaded multiple times.
56+
context.Logger.LogLine($"{timer.Elapsed}: {stream.TotalRead:0,000} read {stream.TotalLoaded:0,000} loaded of {stream.Length:0,000} bytes");
57+
timer.Stop();
58+
59+
return new {
60+
IsoPath = $"s3://{BUCKET}/{KEY}",
61+
stream.TotalRead,
62+
stream.TotalLoaded,
63+
entry.Length,
64+
TotalRows = rows,
65+
Status = "ok"
66+
};
67+
}
68+
}
69+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
<PropertyGroup>
3+
<TargetFramework>netcoreapp3.1</TargetFramework>
4+
<GenerateRuntimeConfigurationFiles>true</GenerateRuntimeConfigurationFiles>
5+
<AWSProjectType>Lambda</AWSProjectType>
6+
</PropertyGroup>
7+
<ItemGroup>
8+
<PackageReference Include="Amazon.Lambda.Core" Version="1.1.0" />
9+
<PackageReference Include="Amazon.Lambda.Serialization.SystemTextJson" Version="2.0.1" />
10+
<PackageReference Include="AWSSDK.S3" Version="3.5.4.1" />
11+
<PackageReference Include="DiscUtils" Version="0.11.0.2" />
12+
<PackageReference Include="System.IO.Compression" Version="4.3.0" />
13+
<PackageReference Include="System.Text.Json" Version="5.0.0" />
14+
</ItemGroup>
15+
<ItemGroup>
16+
<ProjectReference Include="..\..\SeekableS3Stream\SeekableS3Stream.csproj" />
17+
</ItemGroup>
18+
</Project>
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Impossible Lambda
2+
3+
This example demonstrates a Lambda function that uses `SeekableS3Stream` to fully process a file that is much larger than the available RAM. In this case the Lambda is intentionally under-provisioned with ony 256 MB of RAM while the file to be processed is nearly 500MB and decompresses to around 1GB of content. Processing such a file with a naiive solution where it is read entirely into RAM (likely using a `MemoryStream`) would obviously not work here, so this is a case where `SeekableS3Stream` makes the "impossible" possible.
4+
5+
**NOTE**: Running this Lambda processes a very large amount of data, and will cost you money. Keep that in mind, please.
6+
7+
Beyond unblocking a real use case, the seekable-stream approach shows additional perfomance benefits. The time between starting the processing and the first line being read is around 1 second -- far less than copying the file alone takes with a "read it all" to memory approach. Additionally, with that head-start the overall run time is also substantially less (all other things equal). Nice.
8+
9+
## Deploying from the Command Line
10+
11+
This application may be deployed using the [Amazon.Lambda.Tools Global Tool](https://github.com/aws/aws-extensions-for-dotnet-cli#aws-lambda-amazonlambdatools) from the command line.
12+
13+
Install Amazon.Lambda.Tools Global Tools if not already installed.
14+
```
15+
dotnet tool install -g Amazon.Lambda.Tools
16+
```
17+
18+
If already installed check if new version is available.
19+
```
20+
dotnet tool update -g Amazon.Lambda.Tools
21+
```
22+
23+
Deploy application (run from the project directory)
24+
```
25+
dotnet lambda deploy-serverless
26+
```
27+
28+
## Results
29+
30+
Here are the log messages and output of a run of the function. In two minutes and eleven seconds, the 9 million lines in the zip file (1.2 GB of text) are read while only 256MB of RAM is used.
31+
32+
```
33+
2020-11-15T21:10:29.717-08:00 START RequestId: 14e3c267-6782-4943-a619-a45c859e7593 Version: $LATEST
34+
2020-11-15T21:10:29.851-08:00 00:00:00.0000017: Getting started.
35+
2020-11-15T21:10:32.554-08:00 00:00:02.7031771: First row received.
36+
2020-11-15T21:12:41.791-08:00 00:02:11.9228511: Done reading rows.
37+
2020-11-15T21:12:41.791-08:00 00:02:11.9231731: 394,072,280 read 415,236,096 loaded of 407,250,944 bytes
38+
2020-11-15T21:12:41.853-08:00 END RequestId: 14e3c267-6782-4943-a619-a45c859e7593
39+
2020-11-15T21:12:41.853-08:00 REPORT RequestId: 14e3c267-6782-4943-a619-a45c859e7593 Duration: 132133.06 ms Billed Duration: 132200 ms Memory Size: 256 MB Max Memory Used: 256 MB
40+
```
41+
42+
```json
43+
{
44+
"IsoPath": "s3://rds.nsrl.nist.gov/RDS/current/RDS_ios.iso",
45+
"TotalRead": 394072280,
46+
"TotalLoaded": 415236096,
47+
"Length": 1202942018,
48+
"TotalRows": 9037375,
49+
"Status": "ok"
50+
}
51+
```
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"Information": [
3+
"This file provides default values for the deployment wizard inside Visual Studio and the AWS Lambda commands added to the .NET Core CLI.",
4+
"To learn more about the Lambda commands with the .NET Core CLI execute the following command at the command line in the project root directory.",
5+
"dotnet lambda help",
6+
"All the command line options for the Lambda command can be specified in this file."
7+
],
8+
"profile": "",
9+
"region": "",
10+
"configuration": "Release",
11+
"framework": "netcoreapp3.1",
12+
"s3-prefix": "Process1GBWith256MBLambda/",
13+
"template": "serverless.template",
14+
"template-parameters": ""
15+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"AWSTemplateFormatVersion": "2010-09-09",
3+
"Transform": "AWS::Serverless-2016-10-31",
4+
"Description": "An AWS Serverless Application.",
5+
"Resources": {
6+
"Processor": {
7+
"Type": "AWS::Serverless::Function",
8+
"Properties": {
9+
"Handler": "Process1GBWith256MBLambda::Process1GBWith256MBLambda.Functions::Get",
10+
"Runtime": "dotnetcore3.1",
11+
"CodeUri": "",
12+
"MemorySize": 256,
13+
"Timeout": 900,
14+
"Role": null,
15+
"Policies": [
16+
"AWSLambdaBasicExecutionRole",
17+
{
18+
"S3ReadPolicy": {
19+
"BucketName": "rds.nsrl.nist.gov"
20+
}
21+
}
22+
]
23+
}
24+
}
25+
}
26+
}

SeekableS3Stream.sln

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LoadSchemaFromParquet", "Ex
1515
EndProject
1616
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ReadFirstLineOfZipInsideIso", "Examples\ReadFirstLineOfZipInsideIso\ReadFirstLineOfZipInsideIso.csproj", "{88A17C35-69A2-4E87-9AAF-F57288A9225E}"
1717
EndProject
18+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Process1GBWith256MBLambda", "Examples\Process1GBWith256MBLambda\Process1GBWith256MBLambda.csproj", "{E30D3035-4345-4475-BB96-4C0767B4289B}"
19+
EndProject
1820
Global
1921
GlobalSection(SolutionConfigurationPlatforms) = preSolution
2022
Debug|Any CPU = Debug|Any CPU
@@ -88,11 +90,24 @@ Global
8890
{88A17C35-69A2-4E87-9AAF-F57288A9225E}.Release|x64.Build.0 = Release|Any CPU
8991
{88A17C35-69A2-4E87-9AAF-F57288A9225E}.Release|x86.ActiveCfg = Release|Any CPU
9092
{88A17C35-69A2-4E87-9AAF-F57288A9225E}.Release|x86.Build.0 = Release|Any CPU
93+
{E30D3035-4345-4475-BB96-4C0767B4289B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
94+
{E30D3035-4345-4475-BB96-4C0767B4289B}.Debug|Any CPU.Build.0 = Debug|Any CPU
95+
{E30D3035-4345-4475-BB96-4C0767B4289B}.Debug|x64.ActiveCfg = Debug|Any CPU
96+
{E30D3035-4345-4475-BB96-4C0767B4289B}.Debug|x64.Build.0 = Debug|Any CPU
97+
{E30D3035-4345-4475-BB96-4C0767B4289B}.Debug|x86.ActiveCfg = Debug|Any CPU
98+
{E30D3035-4345-4475-BB96-4C0767B4289B}.Debug|x86.Build.0 = Debug|Any CPU
99+
{E30D3035-4345-4475-BB96-4C0767B4289B}.Release|Any CPU.ActiveCfg = Release|Any CPU
100+
{E30D3035-4345-4475-BB96-4C0767B4289B}.Release|Any CPU.Build.0 = Release|Any CPU
101+
{E30D3035-4345-4475-BB96-4C0767B4289B}.Release|x64.ActiveCfg = Release|Any CPU
102+
{E30D3035-4345-4475-BB96-4C0767B4289B}.Release|x64.Build.0 = Release|Any CPU
103+
{E30D3035-4345-4475-BB96-4C0767B4289B}.Release|x86.ActiveCfg = Release|Any CPU
104+
{E30D3035-4345-4475-BB96-4C0767B4289B}.Release|x86.Build.0 = Release|Any CPU
91105
EndGlobalSection
92106
GlobalSection(NestedProjects) = preSolution
93107
{B72A9C11-2DBF-4A75-932C-54D61F0C696B} = {497DFE31-D99D-426F-8C99-BC74679447D9}
94108
{D919413C-1011-461E-9EF7-4228F6008725} = {497DFE31-D99D-426F-8C99-BC74679447D9}
95109
{6683488F-8F57-4868-B70F-8DFA4C092F16} = {497DFE31-D99D-426F-8C99-BC74679447D9}
96110
{88A17C35-69A2-4E87-9AAF-F57288A9225E} = {497DFE31-D99D-426F-8C99-BC74679447D9}
111+
{E30D3035-4345-4475-BB96-4C0767B4289B} = {497DFE31-D99D-426F-8C99-BC74679447D9}
97112
EndGlobalSection
98113
EndGlobal

0 commit comments

Comments
 (0)