-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathExamples-PROC_FCMP_Python-PharmaSUG2023.sas
436 lines (361 loc) · 11.9 KB
/
Examples-PROC_FCMP_Python-PharmaSUG2023.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
* Set directory to save functions;
options cmplib=work.funcs;
* Check environment variables;
data _null_;
MAS_M2PATH=sysget('MAS_M2PATH');
put MAS_M2PATH=;
MAS_PYPATH=sysget('MAS_PYPATH');
put MAS_PYPATH=;
run;
* Input data;
filename outxlsx 'XXXX\PharmaSUG22\privateschools2122.xlsx';
proc http
url='https://www.cde.ca.gov/ds/si/ps/documents/privateschools2122.xlsx'
method='get'
out=outxlsx
;
run; quit;
libname schools xlsx 'XXXX\PharmaSUG22\privateschools2122.xlsx';
data schools_sds(rename=(cds_code_char=cds_code));
length cds_code_char $ 14;
set schools.'2021-22 Private School Data$A3:0'n;
full_address = catx(', ', street, city, state, zip);
cds_code_char = put(cds_code, z14.);
drop cds_code;
run;
libname schools clear;
/*--------------------------------+
| SAS ONLY "Hello World" examples |
+--------------------------------*/
* 1. SAS hello function;
proc fcmp outlib=work.funcs.sas;
function hello_from_sas(name $) $ 25;
Message = 'Hello, '||name;
return(Message);
endfunc;
run;
data _null_;
message = hello_from_sas('PharmaSUG');
put message=;
run;
%put %sysfunc(hello_from_sas(PharmaSUG));
* 2. SAS Hello/Goodbye routine;
proc fcmp outlib=work.funcs.sas;
subroutine hello_goodbye(name $, greeting $, farewell $);
outargs greeting, farewell;
greeting = 'Hello, '||name;
farewell = name||', Goodbye!';
endsub;
run;
data _null_;
length message1 message2 $ 25;
call hello_goodbye('PharmaSUG', message1, message2);
put message1 / message2;
run;
/*----------------------------------+
| SAS/Python "Hello World" examples |
+----------------------------------*/
* 1. Inline FCMP Python hello;
proc fcmp;
length message $ 25;
declare object py(python);
submit into py;
def hello():
"""Output: hello_return_value"""
return 'Hello'
endsubmit;
rc = py.publish();
rc = py.call('hello');
message = py.results['hello_return_value'];
file log;
put message=;
run;
* 2. SAS/Python function hello, with random module;
proc fcmp outlib=work.funcs.python;
function greetings_from_python() $ 25;
length message $ 25;
declare object py(python);
submit into py;
def greetings():
"""Output: greetings_return_value"""
import random
greeting = random.choice(
['Hello', "What's up", 'How do you do?']
)
return greeting
endsubmit;
rc = py.publish();
rc = py.call('greetings');
message = py.results['greetings_return_value'];
return(message);
endfunc;
run;
data _null_;
message = greetings_from_python();
put message=;
run;
* 3. SAS FCMP Python function hello, with faker;
proc fcmp outlib=work.funcs.python;
subroutine personal_greetings_from_python(greeting $, name $);
length greeting $ 25 name $ 25;
outargs greeting, name;
declare object py(python);
submit into py;
def personal_greetings():
"""Output: greeting_return_value, name_return_value"""
import random
from faker import Faker
greeting = random.choice(
['Hello,', "What's up,", 'How do you do,']
)
fake = Faker()
name = fake.name()
return greeting, name
endsubmit;
rc = py.publish();
rc = py.call('personal_greetings');
greeting = py.results['greeting_return_value'];
name = py.results['name_return_value'];
endsub;
run;
data _null_;
length greeting name $ 25;
call personal_greetings_from_python(greeting, name);
put greeting name;
run;
* 4. Passing a parameter to Python;
proc fcmp outlib=work.funcs.python;
function data_driven_hello_from_python(name $) $ 25;
length Message $ 25;
declare object py(python);
submit into py;
def data_driven_hello(name):
"""Output: hello_return_value"""
return f'Hello {name}!'
endsubmit;
rc = py.publish();
rc = py.call('data_driven_hello', name);
message = py.results['hello_return_value'];
return(message);
endfunc;
run;
data greetings;
set sashelp.class;
message = data_driven_hello_from_python(name);
run;
proc print data=greetings;
var message;
run;
/*-----------------------------------------------+
| Persisting State Between Python Function Calls |
+-----------------------------------------------*/
proc fcmp outlib=work.funcs.pyfuncs;
function is_first_occurrence(value $);
declare object py(python);
submit into py;
def track_first_occurrences(value):
"""Output: return_value"""
global values_already_encountered
try:
values_already_encountered
except NameError:
values_already_encountered = []
if value in values_already_encountered:
return False
else:
values_already_encountered.append(value)
return True
endsubmit;
rc = py.publish();
rc = py.call("track_first_occurrences", value);
return_value = py.results["return_value"];
return(return_value);
endfunc;
run;
data first_fish;
set sashelp.fish;
if is_first_occurrence(species) then output;
run;
proc print data=first_fish;
run;
/*---------------------------+
| Validating Email Addresses |
+---------------------------*/
proc fcmp outlib=work.funcs.python;
function get_normalized_email(email $) $ 100;
declare object py(python);
length normalized_email $ 100 Exception_Encountered $ 500;
submit into py;
def normalize_email(e):
"""Output: normalize_email_return_value, exception"""
from email_validator import (
validate_email, EmailNotValidError
)
try:
normalized_email = validate_email(
e, check_deliverability=False
)
return normalized_email.email, ' '
except EmailNotValidError:
return ' ', repr(e)
endsubmit;
rc = py.publish();
rc = py.call('normalize_email', email);
Exception_Encountered = py.results['exception'];
if not missing(Exception_Encountered) then
put Exception_Encountered=;
normalized_email = py.results['normalize_email_return_value'];
return(normalized_email);
endfunc;
run;
data normalized_emails;
set schools_sds;
normalized_email = get_normalized_email(primary_email);
run;
proc print data=normalized_emails;
var primary_email normalized_email;
where primary_email NE normalized_email;
run;
/*----------+
| Geocoding |
+----------*/
%let MAPQUEST_API_KEY = 'XXXXXXXXXXXXXXXXXXXXXXXXXXX';
proc fcmp outlib=work.funcs.python;
subroutine get_lat_long(address $, key $, lat, long);
outargs lat, long;
declare object py(python);
submit into py;
def geocode(a, k):
"""Output: latitude_return_value, longitude_return_value"""
import geocoder
g = geocoder.mapquest(a, key = k)
lat = g.latlng[0]
long = g.latlng[1]
return lat, long
endsubmit;
rc = py.publish();
rc = py.call('geocode', address, key);
lat = py.results['latitude_return_value'];
long = py.results['longitude_return_value'];
endsub;
run;
data lat_lng;
set schools_sds(obs=10);
call get_lat_long(full_address, &MAPQUEST_API_KEY, lat, long);
run;
proc print data=lat_lng;
var full_address lat long;
run;
/*-----------------+
| Excel formatting |
+-----------------*/
proc fcmp;
length libpath path outfile $ 500;
libpath = pathname('work');
path = catx('\',libpath,'schools_sds.sas7bdat');
file log;
declare object py(python);
submit into py;
def format_excel(datasetpath):
"""Output: output_file"""
import pandas
import pathlib
import xlsxwriter
# read a SAS dataset
schools_df = pandas.read_sas(datasetpath, encoding='latin1')
# output an Excel file
file_path = pathlib.Path('XXXX\excel_output')
file_name = 'example_excel_export.xlsx'
sheet_name = 'Augmented CDE Data'
# setup Excel file writer
with pandas.ExcelWriter(
pathlib.Path(file_path, file_name), engine='xlsxwriter'
) as writer:
schools_df.to_excel(
writer,
sheet_name=sheet_name,
index=False,
startrow=1,
header=False,
)
max_column_index = schools_df.shape[1] - 1
# setup formatting to be applied below
workbook = writer.book
text_format = workbook.add_format({'num_format': '@'})
header_format = workbook.add_format({
'bold': True,
'text_wrap': True,
'valign': 'center',
'num_format': '@',
'fg_color': '#FFE552', # Light Gold
'border': 1,
})
# write header row values with formatting
worksheet = writer.sheets[sheet_name]
for col_num, value in enumerate(schools_df.columns.values):
worksheet.write(0, col_num, value, header_format)
# use fixed column width and use a universal text format
worksheet.set_column(0, max_column_index, 20, text_format)
# turn on filtering for top row
worksheet.autofilter(
0, 0, schools_df.shape[0], max_column_index
)
# turn on freeze panes for top row
worksheet.freeze_panes(1, 0)
return str(pathlib.Path(file_path, file_name))
endsubmit;
rc = py.publish();
rc = py.call('format_excel', path);
outfile = py.results['output_file'];
put 'Output file: ' outfile;
run;
/*------------+
| Import YAML |
+------------*/
proc fcmp;
length workpath outfile $ 500;
workpath = pathname('work');
file log;
declare object py(python);
submit into py;
def import_yaml_to_sas(workpath):
"""Output: output_table"""
import yaml
import requests
from pandas import json_normalize
import sys
setattr(sys.stdin, 'isatty', lambda: False)
from saspy import SASsession
url = 'https://raw.githubusercontent.com/unitedstates/congress-legislators/main/legislators-current.yaml'
request_response = requests.get(url)
if request_response.status_code != 200:
return ' '
legislators_list = yaml.safe_load(request_response.text)
legislators_df = json_normalize(legislators_list)
legislator_info = legislators_df[[
'id.cspan',
'name.first',
'name.middle',
'name.last',
'bio.birthday',
'bio.gender'
]]
sas = SASsession()
sas.saslib(libref='out',path=workpath)
outds = sas.dataframe2sasdata(
df=sas.validvarname(legislator_info),
libref='out',
table='legislators',
encode_errors='replace'
)
sas.endsas()
return outds.table
endsubmit;
rc = py.publish();
rc = py.call('import_yaml_to_sas', workpath);
outfile = py.results['output_table'];
if not missing(outfile) then
put 'Output dataset:' outfile;
run;
proc print data=legislators(obs=10);
run;