-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
remove receiver, add scraper, few mods #11
base: master
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"specversion":"0.2", | ||
"type":"cloudevent.flickr.image", | ||
"source":"scraper", | ||
"id":"my-id", | ||
"contenttype":"application\/json", | ||
"data":{ | ||
"photo_url":"https:\/\/farm2.staticflickr.com\/1051\/1154370504_58bafbc654_c.jpg" | ||
}, | ||
"extensions":{} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,16 @@ | ||
import fdk | ||
import ujson | ||
import os | ||
import random | ||
import flickrapi | ||
import ssl | ||
import sys | ||
import random | ||
import requests | ||
from fdk import fixtures | ||
import fdk | ||
|
||
ssl._create_default_https_context = ssl._create_unverified_context | ||
from cloudevents.sdk import converters | ||
from cloudevents.sdk import marshaller | ||
from cloudevents.sdk.converters import structured | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider using binary format instead of structured. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. simpler, less efforts to parse the event itself. |
||
from cloudevents.sdk.event import v02 | ||
|
||
#ssl._create_default_https_context = ssl._create_unverified_context | ||
|
||
flickr = flickrapi.FlickrAPI( | ||
os.environ.get("FLICKR_API_KEY"), | ||
|
@@ -18,52 +19,41 @@ | |
format='parsed-json' | ||
) | ||
|
||
PHOTO_SOURCE_URL = 'https://farm{0}.staticflickr.com/{1}/{2}_{3}{4}.{5}' | ||
|
||
def get_image_url(photo_dict): | ||
return PHOTO_SOURCE_URL.format( | ||
photo_dict['farm'], photo_dict['server'], | ||
photo_dict['id'], photo_dict['secret'], | ||
'_c', 'jpg' | ||
) | ||
|
||
|
||
def photo_to_payload(body, photo_dict): | ||
return { | ||
"id": photo_dict.get('id'), | ||
"image_url": get_image_url(photo_dict), | ||
"countrycode": body.get("countrycode"), | ||
"bucket": body.get("bucket", "") | ||
} | ||
|
||
|
||
def handler(ctx, data=None, loop=None): | ||
payloads = [] | ||
|
||
# Scrape Flickr | ||
if data and len(data) > 0: | ||
body = ujson.loads(data) | ||
|
||
photos = flickr.photos.search( | ||
text=body.get("query", "baby smile"), | ||
per_page=int(body.get("num", "5")), | ||
page=int(body.get("page", int(random.uniform(1, 50)))), | ||
extras="original_format", | ||
safe_search="1", | ||
content_type="1", | ||
text=body.get("query", "baby smile"), | ||
per_page=int(body.get("num", "5")), | ||
page=int(body.get("page", int(random.uniform(1, 50)))), | ||
extras="original_format", | ||
safe_search="1", | ||
content_type="1", | ||
) | ||
|
||
# For each photo | ||
for p in photos.get('photos', {'photo': {}}).get('photo', []): | ||
payloads.append(photo_to_payload(body, p)) | ||
|
||
for p in payloads: | ||
this_payload = { | ||
"media":[p.get("image_url")], | ||
"event_id": "test_id", | ||
"event_type": "test_type", | ||
"ran_on": "<fn-hostname-or-dns>" | ||
} | ||
process_url = "http://docker.for.mac.localhost:8080/t/cloudevents/image-processor" | ||
r = requests.post(process_url, data=ujson.dumps(this_payload)) | ||
|
||
return {"result": payloads} | ||
photo_url_tpl = 'https://farm{0}.staticflickr.com/{1}/{2}_{3}{4}.{5}' | ||
photo_url = photo_url_tpl.format(p['farm'], p['server'],p['id'], p['secret'], '_c', 'jpg') | ||
|
||
data = {"photo_url": photo_url} | ||
|
||
event = ( | ||
v02.Event(). | ||
SetContentType("application/json"). | ||
SetData(data). | ||
SetEventID("my-id"). | ||
SetSource("scraper"). | ||
SetEventType("cloudevent.flickr.image") | ||
) | ||
event_json = ujson.dumps(event.Properties()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That’s why I pointed you to ToRequest API. Here you’re basically lucky because your event data a dict, in other case it wouldn’t work. Here you need to create an instance of an marshaller, from that you need to call a ToRequest method that will turn your event objection into serialized one (headers and a body). |
||
print(event_json) | ||
|
||
process_url = "http://docker.for.mac.localhost:8080/t/cloudevents/image-processor" | ||
r = requests.post(process_url, data=event_json) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It’s not enough to send a JSON-like object and call it a CloudEvent. You need to set content type to |
||
|
||
|
||
if __name__ == "__main__": | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
fdk | ||
flickrapi | ||
requests | ||
requests | ||
cloudevents |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Imported but not used
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
well I presume I need to use those if I want to send the event the right way (not as json)