-
Notifications
You must be signed in to change notification settings - Fork 0
/
TwitterFunction.cs
125 lines (111 loc) · 5.12 KB
/
TwitterFunction.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
using System;
using System.IO;
using System.Linq;
using Microsoft.Azure.WebJobs;
using Microsoft.Azure.WebJobs.Extensions.Http;
using Microsoft.AspNetCore.Http;
using Microsoft.Extensions.Logging;
using Newtonsoft.Json;
using System.Collections.Generic;
using System.Threading.Tasks;
using Microsoft.WindowsAzure.Storage;
namespace MCSC
{
/// <summary>
/// Process source tweet list, look for any unique entries that mention missing persons
/// </summary>
public static class TwitterFunction
{
[StorageAccount("BlobStorageConnectionString")]
[FunctionName("TwitterFunction")]
public static async Task Run(
[HttpTrigger(AuthorizationLevel.Function, "post", Route = null)] HttpRequest req,
[Queue("%QueueName_TwitterOutput%")] ICollector<TweetModel> queueCollector,
ILogger log)
{
var requestBody = new StreamReader(req.Body).ReadToEnd();
var container = JsonConvert.DeserializeObject<MCSC.V2.ContainerModel>(requestBody);
var tweets = container.ConvertToArchived();
try
{
tweets = FilterMissingTweets(tweets);
log.LogInformation($"Number of tweets matching the keywords: {tweets.Count}");
if (tweets.Count == 0)
{
return;
}
// order the tweets by date
tweets.Sort(new TweetDateComparer());
if (!CloudStorageAccount.TryParse(Environment.GetEnvironmentVariable("BlobStorageConnectionString", EnvironmentVariableTarget.Process),
out var storageAccount))
{
throw new Exception("unable to create storage account connection");
}
var blobReference = storageAccount.CreateCloudBlobClient()
.GetContainerReference(Environment.GetEnvironmentVariable("BlobStorageContainerName", EnvironmentVariableTarget.Process))
.GetBlockBlobReference(Environment.GetEnvironmentVariable("BlobStorageBlobName", EnvironmentVariableTarget.Process));
List<TweetModel> tweetsFromStorage;
if (await blobReference.ExistsAsync())
{
string jsonString = await blobReference.DownloadTextAsync();
tweetsFromStorage = JsonConvert.DeserializeObject<List<TweetModel>>(jsonString);
}
else
{
tweetsFromStorage = new List<TweetModel>();
}
int newTweetsCount = 0;
foreach (var tweet in tweets)
{
// if this is a re-tweet then check to see if we've already processed the original
if (tweet.OriginalTweet != null &&
tweetsFromStorage.FindIndex(f => f.TweetId == tweet.OriginalTweet.TweetId) >= 0)
continue;
// skip the tweet if it was already processed
if (tweetsFromStorage.FindIndex(f => f.TweetId == tweet.TweetId) >= 0)
continue;
tweetsFromStorage.Add(tweet);
queueCollector.Add(tweet);
newTweetsCount++;
}
log.LogInformation($"Duplicate check completed, number of new tweets {newTweetsCount}");
if (newTweetsCount > 0)
{
// Before we upload the processed tweets, let's trim down some old data - anything older than a year
int removedTweets = tweetsFromStorage.RemoveAll(w => w.CreatedAtIso < DateTime.Now.AddYears(-1));
log.LogInformation($"Removed {removedTweets} old tweet(s) from the processed tweets.json file.");
await blobReference.UploadTextAsync(JsonConvert.SerializeObject(tweetsFromStorage));
}
}
catch (Exception e)
{
log.LogError(e, "Error in twitter function.");
throw;
}
}
private static List<TweetModel> FilterMissingTweets(IEnumerable<TweetModel> tweets)
{
var filteredTweets = new List<TweetModel>();
var keywordsList = Environment.GetEnvironmentVariable("TweetKeywords", EnvironmentVariableTarget.Process);
var keywords = keywordsList.Split(",");
foreach(var tweet in tweets)
{
if (keywords.Any(s => tweet.TweetText.Contains(s, StringComparison.InvariantCultureIgnoreCase))) {
filteredTweets.Add(tweet);
}
}
return filteredTweets;
}
}
public class TweetDateComparer : IComparer<TweetModel>
{
public int Compare(TweetModel x, TweetModel y)
{
if (x == null || y == null)
{
return 0;
}
return x.CreatedAtIso.CompareTo(y.CreatedAtIso);
}
}
}