Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove receiver, add scraper, few mods #11

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 16 additions & 17 deletions functions/image-processor/func.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,6 @@ def process_single_media_file(ctx, sess, media_url, label_map, event_id, event_t


def with_graph(label_map):

sess = tf.Session()
sess.graph.as_default()

Expand All @@ -337,23 +336,23 @@ def fn(ctx, data=None, loop=None):
if data is not None or len(data) !=0:
data = ujson.loads(data)
log.info("incoming data: {0}".format(ujson.dumps(data)))
media = data.get("media", [])
event_id = data.get("event_id")
event_type = data.get("event_type", "")
if event_type.startswith("Microsoft"):
event_type = "Azure"
event_id = event_id.replace("-", "")
ran_on = data.get("ran_on", "Fn Project on Oracle Cloud")

for media_url in media:
img, status = process_single_media_file(
ctx, sess, media_url, label_map,
event_id, event_type, ran_on
)

post_image(ctx, status, media_url, add_fn_logo(img))
else:
log.info("missing data")
# media = data.get("media", [])
event_id = data.get("id")
event_type = data.get("type", "")
photo_url = data.get("data").get("photo_url")

# if event_type.startswith("Microsoft"):
# event_type = "Azure"
# event_id = event_id.replace("-", "")

ran_on = data.get("ran_on", "Fn Project")

img, status = process_single_media_file(
ctx, sess, photo_url, label_map,
event_id, event_type, ran_on
)
post_image(ctx, status, photo_url, add_fn_logo(img))

return fn

Expand Down
2 changes: 1 addition & 1 deletion functions/image-processor/func.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
schema_version: 20180708
name: image-processor
version: 0.0.104
version: 0.0.109
runtime: docker
format: http-stream
memory: 1024
Expand Down
11 changes: 11 additions & 0 deletions functions/image-processor/payload.ce.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"specversion":"0.2",
"type":"cloudevent.flickr.image",
"source":"scraper",
"id":"my-id",
"contenttype":"application\/json",
"data":{
"photo_url":"https:\/\/farm2.staticflickr.com\/1051\/1154370504_58bafbc654_c.jpg"
},
"extensions":{}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
"http://survivingchurch.org/wp-content/uploads/2016/10/Donald-Trump-Photos-HD-1024x768.png",
"https://www.dairyqueen.com/Global/Food/Hot-Dogs_8-to-1_470x500.jpg",
"https://pbs.twimg.com/profile_images/625802912887783424/_uzj-IZ4.jpg",
"https://i.ytimg.com/vi/lFarE1hH0ss/maxresdefault.jpg",
"https://i.ytimg.com/vi/lFarE1hH0ss/maxresdefault.jpg"
],
"media_test": [
"https://i.ytimg.com/vi/Lu8lDgKInSM/hqdefault.jpg",
"http://m.step.aero/Burningman_2017_gifting_plane_rides.jpg",
"http://m.step.aero/Burningman_2017_raising_and_rigging.jpg",
"http://m.step.aero/Chad's%20Dog.png"
],
"media_test": [
"https://i.ytimg.com/vi/Lu8lDgKInSM/hqdefault.jpg"
],
"event_id": "test_id",
"event_type": "test_type",
"ran_on": "<fn-hostname-or-dns>"
}
}
80 changes: 35 additions & 45 deletions functions/scraper/func.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import fdk
import ujson
import os
import random
import flickrapi
import ssl
import sys
import random
import requests
from fdk import fixtures
import fdk

ssl._create_default_https_context = ssl._create_unverified_context
from cloudevents.sdk import converters
from cloudevents.sdk import marshaller
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Imported but not used

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well I presume I need to use those if I want to send the event the right way (not as json)

from cloudevents.sdk.converters import structured
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider using binary format instead of structured.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

simpler, less efforts to parse the event itself.

from cloudevents.sdk.event import v02

#ssl._create_default_https_context = ssl._create_unverified_context

flickr = flickrapi.FlickrAPI(
os.environ.get("FLICKR_API_KEY"),
Expand All @@ -18,52 +19,41 @@
format='parsed-json'
)

PHOTO_SOURCE_URL = 'https://farm{0}.staticflickr.com/{1}/{2}_{3}{4}.{5}'

def get_image_url(photo_dict):
return PHOTO_SOURCE_URL.format(
photo_dict['farm'], photo_dict['server'],
photo_dict['id'], photo_dict['secret'],
'_c', 'jpg'
)


def photo_to_payload(body, photo_dict):
return {
"id": photo_dict.get('id'),
"image_url": get_image_url(photo_dict),
"countrycode": body.get("countrycode"),
"bucket": body.get("bucket", "")
}


def handler(ctx, data=None, loop=None):
payloads = []

# Scrape Flickr
if data and len(data) > 0:
body = ujson.loads(data)

photos = flickr.photos.search(
text=body.get("query", "baby smile"),
per_page=int(body.get("num", "5")),
page=int(body.get("page", int(random.uniform(1, 50)))),
extras="original_format",
safe_search="1",
content_type="1",
text=body.get("query", "baby smile"),
per_page=int(body.get("num", "5")),
page=int(body.get("page", int(random.uniform(1, 50)))),
extras="original_format",
safe_search="1",
content_type="1",
)

# For each photo
for p in photos.get('photos', {'photo': {}}).get('photo', []):
payloads.append(photo_to_payload(body, p))

for p in payloads:
this_payload = {
"media":[p.get("image_url")],
"event_id": "test_id",
"event_type": "test_type",
"ran_on": "<fn-hostname-or-dns>"
}
process_url = "http://docker.for.mac.localhost:8080/t/cloudevents/image-processor"
r = requests.post(process_url, data=ujson.dumps(this_payload))

return {"result": payloads}
photo_url_tpl = 'https://farm{0}.staticflickr.com/{1}/{2}_{3}{4}.{5}'
photo_url = photo_url_tpl.format(p['farm'], p['server'],p['id'], p['secret'], '_c', 'jpg')

data = {"photo_url": photo_url}

event = (
v02.Event().
SetContentType("application/json").
SetData(data).
SetEventID("my-id").
SetSource("scraper").
SetEventType("cloudevent.flickr.image")
)
event_json = ujson.dumps(event.Properties())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That’s why I pointed you to ToRequest API. Here you’re basically lucky because your event data a dict, in other case it wouldn’t work.

Here you need to create an instance of an marshaller, from that you need to call a ToRequest method that will turn your event objection into serialized one (headers and a body).

print(event_json)

process_url = "http://docker.for.mac.localhost:8080/t/cloudevents/image-processor"
r = requests.post(process_url, data=event_json)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It’s not enough to send a JSON-like object and call it a CloudEvent. You need to set content type to application/cloudevent+json



if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion functions/scraper/func.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
schema_version: 20180708
name: scraper
version: 0.0.29
version: 0.0.30
runtime: python3.7
entrypoint: python3 func.py
format: http-stream
Expand Down
3 changes: 2 additions & 1 deletion functions/scraper/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
fdk
flickrapi
requests
requests
cloudevents