add sample scripts, update manual

kevinlawler · Aug 28, 2024 · 4aa5a60 · 4aa5a60
1 parent 274e7ae
commit 4aa5a60
Show file tree

Hide file tree

Showing 77 changed files with 4,143 additions and 0 deletions.
diff --git a/manual/refmanual.pdf b/manual/refmanual.pdf
diff --git a/scripts/bloomberg.kerf b/scripts/bloomberg.kerf
diff --git a/scripts/gen-license.kerf b/scripts/gen-license.kerf
@@ -0,0 +1,28 @@
+//do not share
+//how to generate kerf-license.dat
+
+if(len(.Argv) < 5) {
+  out 'Bad #args: need, eg ./kerf gen-license.kerf "TheCustomer Name" "3m" "mytempfilename.txt" \n'
+  exit(1);
+}
+
+customer:      .Argv[2 + 0]
+duration: eval(.Argv[2 + 1])
+filename:      .Argv[2 + 2]
+
+expiry: now()['date'] + duration 
+
+premap: {'customer':customer, 'expiry': expiry}
+deliver: premap
+
+premap['kerf']: 82349792832925
+verifier: checksum premap
+
+deliver['sign']: verifier
+
+str: '\u00ae\u00af' xor uneval deliver
+
+write_to_path(filename, str)
+
+exit(0)
+
diff --git a/scripts/greeks.kerf b/scripts/greeks.kerf
@@ -0,0 +1,12 @@
+
+//Normal cumulative distribution function, with mean and deviation as arguments
+def .Math.normal_cdf(value, mean, deviation) {
+  M_SQRT1_2: 1 / sqrt 2
+  return 0.5 * erfc(-(value - mean) * M_SQRT1_2 / deviation);
+}
+
+def .Math.standard_normal_cdf(x) {
+  .Math.normal_cdf(x, 0, 1)
+}
+
+
diff --git a/scripts/math/dojo/README.md b/scripts/math/dojo/README.md
@@ -0,0 +1,6 @@
+Stuff in here is Scott's "Kerf workouts" translating K/J stuff. Could be turned 
+into learning or marketing material. 
+Since the codebase of "things written in Kerf" is small, even my anemic noodlings 
+are worth something.
+
+
diff --git a/scripts/math/dojo/stats.kerf b/scripts/math/dojo/stats.kerf
@@ -0,0 +1,114 @@
+// Taken from http://kx.com/a/k/examples/stat.k
+// aggregations
+// avg:{(+/x)%#x}
+average:{[x] add fold x / count x}
+
+def average2(x){
+ sm: add fold x	
+ sm / count(x)
+}
+
+function average3(x){
+ sum(x) / count(x)
+}
+
+average4:{[x] + \/ x / count x}
+average5:{[x] sum(x) / count(x)}
+average6:{[x] + fold x / count x}
+
+// var:{avg[x*x]-a*a:avg x}
+// population variance
+variance: {[x] a:avg(x); avg(x*x) - a*a}
+def variance2(x) {
+a:avg(x)
+avg(x*x) - a*a
+}
+
+
+
+def mstd(n,x) {
+    a:mavg(n,x);
+    return sqrt(mavg(n,x*x) - a*a)
+}
+
+
+def mstd(n,x) {
+    a:mavg(n,x);
+    norm: sqrt(n/n-1);
+    return norm * sqrt((mavg(n,x*x) - a*a))
+}
+
+mstd2: {[n,x] sqrt mavg(n,x*x) -a*a:mavg(n,x)}
+
+variance3:{[x] avg(x*x) -a*a:avg x}
+
+
+// dev:{_sqrt var x}
+dev:{[x] sqrt var x}
+
+// cov:{avg[x*y]-avg[x]*avg y}
+cov:{[x,y] avg(x*y) - (avg(x) * avg(y))}
+
+
+// cor:{cov[x;y]%dev[x]*dev y}
+cor:{[x,y] cov(x,y) / dev(x) * dev(y)}
+
+// ut:{[f;x]:[1=#x;,,1.0;(,1.0,r),(r:f[*x]'y),'_f[f]y:1_ x]}
+
+// / uniform moving window
+// avgs:{[n;x]x-(-n)_(((n-1)#0n),0.0),x:+\x%n}
+
+// vars:{[n;x]avgs[n;x*x]-a*a:avgs[n;x]}
+// devs:{[n;x]_sqrt vars[n;x]}
+// covs:{[n;x;y]avgs[n;x*y]-avgs[n;x]*avgs[n;y]}
+// cors:{[n;x;y]covs[n;x;y]%devs[n;x]*devs[n;y]}
+
+// / autocorrelation
+// aut:{(+/*':x)%+/x*x-:avg x}
+
+// / weighted average
+// wavg:{(+/x*y)%+/x}
+wavg:{[x,y] (+ fold x * y) / (+ fold x)} 
+
+// / median
+// med:{.5*+/x(<x)@_.5*-1 0+#x}
+
+// / x random weighted by y
+// skew:{(+\y%+/y)_binl x _draw 0}
+
+// pi:3.14159265358979323846
+
+// / normal distribution, e.g. nor[10]
+// nor:{[n]if[n!2;:-1__f n+1];t:_sqrt-2*_log *u:2 -1#n _draw 0;(t*_sin x),t*_cos x:(2*pi)**|u}
+
+// / normal cumulative Abramowitz & Stegun 26.2.17
+// nc:{_abs(x>0)-(_exp[-.5*x*x]%_sqrt 2*pi)*t*.31938153+t*-0.356563782+t*1.781477937+t*-1.821255978+1.330274429*t:%1+.2316419*_abs x}
+nc:{[x] 
+ pi: 3.14159265358979323846
+ t: 1/ 1 + 0.2316419 * abs x
+ abs(x>0) - (exp(-.5*x*x)/sqrt 2*pi)*t*.31938153+t*-0.356563782+t*1.781477937+t*-1.821255978+1.330274429*t
+}
+
+// / black scholes (- v's for puts)
+// bs:{[s;x;t;r;v](s*nc h+v)-x*_exp[-r]*nc h:((_log[s%x]+r*:t)%v)-.5*v*:_sqrt t}
+// s:60;x:65;t:.25;r:.08;v:.3
+// bs[s;x;t;r;-v]
+// \ not right yet; doesn't work in K either.
+black_scholes:{[s,x,t,r,v] 
+h:((log(s/x)+r*:t)/v)-.5*v*:sqrt t
+(s*erf( h+v))-x*exp[-r]*erf( h)
+}
+def black_scholes(s,k,r,tau,sigma) {
+ d1:  (log(s/k) + (r + 0.5*sigma*sigma)*tau)/(sigma * sqrt(tau))
+ d2: d1 - sigma * sqrt(tau)
+ s * erf(d1) - k * exp(-r*tau) * erf(d2)
+}
+
+// / spearman
+// spear:{1-(6*+/d*d:(<x)-<y)%(n^3)-n:#x}
+// / spearman matrix, e.g. sm 100 22 _draw 0
+// sm:{1-(6*+/''d*d:x-\:/:x:<:'x)%(n^3)-n:#*x}
+
+// / great circle nautical miles (68.7 to 69.4 miles per degree)
+// gcm:{rm:(%m:180*60%pi)*60 _sv;m*_acos(_sin[x]*_sin y)+_cos[x:rm x 0]*_cos[y:rm y 0]*_cos(rm x 1)-rm y 1}
+// gcm[lax:(33 57;118 24)]jfk:(40 38;73 47)
diff --git a/scripts/math/wavelets.kerf b/scripts/math/wavelets.kerf
@@ -0,0 +1,77 @@
+HighPass2: {[x] { reverse(pow(-1,range(0,len(x))) * x)}}  // why doesn't this work? 
+HighPass: {[x] reverse x * (-1)**range(0,len(x))}
+// drot =:  ((i.(#y)) +/ (0,((_1 * #y) + }.i.(#{.x)))) { y'  
+Drot: {[x, y] range(count(y)) +/  (0,(-1 * count(y)) + INSERTVB range(count(x)) car take y)}
+//  13 : '|: |. ((2 %~ (#x)),2) $ |. y'  
+filtRot: {[x, y] transpose reverse (count(x))  }
+// 13 : ' (2 # (x drot y)) * ((2*#y)$x)'
+reducer: {[x, y] (flatten transpose drot(x y)) * repeat((2 * count y),x) }
+// oddx=: ] {~ ([: (] #~ 0 1 $~ #) [: i. [: # ]) -/ [: i. [: # [
+oddx: {[x, y] x} //todo
+
+function dwt(x,y) {
+lpf: wdict x
+hpf: HighPass lpf
+yvals: oddx hpf y
+level: yvals mmul hpf
+wav: yvals mmul lpf
+{wav:wav,lvl:level}
+}
+
+function idwt(x,y) {
+wv: y['wav']
+yl: y['lvl']
++/ reducer(lpf yl) + reducer(hpf wv)
+}
+
+def mp(x,y) {
+dot(x,y) \>
+}
+
+def dot(x,y) {
+sum x * y
+}
+
+
+wdict: {
+  haar: 0.707107 0.707107,
+  w4: -0.125 0.375 -0.375 0.125,
+  mb4: 0.4801755 0.8372545 0.2269312 -0.1301477,
+  mb8: -0.1673619 0.01847751 0.5725771 0.7351331 0.2947855 -0.1108673 0.007106015 0.06436345,
+  mb16:-0.0130277 0.02173677 0.1136116 -0.0577657 -0.2278359 0.1188725 0.6349228 0.6701646 0.2345342 -0.05656657 -0.01987986 0.05474628 -0.02483876 -0.04984698 0.009620427 0.005765899,
+  mb24: -2.132706e-05 0.0004745736 0.0007456041 -0.004879053 -0.001482995 0.04199576 -0.002658282 -0.006559513 0.1019512 0.1689456 0.1243531 0.1949147 0.4581101 0.6176385 0.2556731 -0.3091111 -0.3622424 -0.004575448 0.1479342 0.01027154 -0.01644859 -0.002062335 0.001193006 5.361301e-05,
+  db4: 0.482962913144534 0.836516303737808 0.224143868042013  -0.12940952255126,
+  db6: 0.332670552950083 0.806891509311093 0.459877502118491 -0.135011020010255 -0.0854412738820267 0.0352262918857096,
+  db8: 0.230377813307443 0.714846570548406 0.630880767935879 -0.0279837694166834 -0.187034811717913 0.0308413818353661 0.0328830116666778 -0.0105974017850021,
+  db16: 0.0544158422431049 0.312871590914303 0.67563073629729 0.585354683654191 -0.0158291052563816 -0.28401554296157 0.0004724845739124 0.128747426620484 -0.0173693010018083 -0.0440882539307952 0.0139810279173995 0.0087460940474061 -0.0048703529934518 -0.000391740373377 0.0006754494064506 -0.0001174767841248,
+  fk4: 0.653927555569765 0.753272492839487 0.0531792287790598 -0.0461657148152177,
+  fk6: 0.42791503242231 0.812919643136907 0.356369511070187 -0.146438681272577 -0.0771777574069701 0.0406258144232379,
+  fk8: 0.3492381118638 0.782683620384065 0.475265135079471 -0.0996833284505732 -0.15997809743403 0.0431066681065162 0.0425816316775818 -0.0190001788537359,
+  fk14: 0.260371769291396 0.686891477239599 0.611554653959511 0.0514216541421191 -0.245613928162192 -0.0485753390858553 0.124282560921513 0.0222267396224631 -0.0639973730391417 -0.00507437254997285 0.029779711590379 -0.00329747915270872 -0.00927061337444824 0.00351410097043596,
+  la8: -0.0757657147893567 -0.0296355276459604 0.497618667632563 0.803738751805386 0.297857795605605 -0.0992195435769564 -0.0126039672622638 0.0322231006040782,
+  la16: 0.0544158422431049 0.312871590914303 0.67563073629729 0.585354683654191 -0.0158291052563816 -0.28401554296157 0.0004724845739124 0.128747426620484 -0.0173693010018083 -0.0440882539307952 0.0139810279173995 0.0087460940474061 -0.0048703529934518 -0.000391740373377 0.0006754494064506  -0.0001174767841248,
+  la20: 0.000770159809103 9.56326707837e-05 -0.0086412992759401 -0.0014653825833465 0.0459272392237649 0.0116098939129724 -0.159494278857531 -0.0708805358108615 0.471690666842659 0.769510037014339 0.383826761225382 -0.0355367403054689 -0.0319900568281631 0.049994972079156 0.0057649120455518 -0.020354939803946 -0.000804358934537 0.0045931735836703 5.7036084339e-05 -0.0004593294205481,
+}       
+
+
+def dot(x,y) {
+  +/(transpose x)*y
+}
+a dot b
+r: m1 dot/:\: +m2  / dot product with the transpose.
+
+
+a: [[1 2], [3 4]]
+b: [[5 6], [7 8]]
+
+c: [[1 2 3],[4 5 6], [7 8 9]]
+
+minv([1 2, 3 4]) - [-2 1, 1.5 -0.5]
+
+  a:[1 2, 3 4]; b:[5 6, 7 8]; a mmul b
+[19 22, 43 50]
+  a:[1 2 3, 4 5 6]; b:[7 8, 9 10, 11 12]; a mmul b
+[58 64, 139 154]
+  TEST_EQUAL("a: [6 2, 2 3]; b: 22 10; sum(abs minus(lsq(a,b), [23/7, 8/7])) < pow(10, -9)","1");
+  TEST_EQUAL("a: [6 2, 2 3]; b: 22 10; sum(abs minus(    a\\b, [23/7, 8/7])) < pow(10, -9)","1");
+  TEST_EQUAL("pow(10, -9) > sum fold minv([1 2, 3 4]) - [-2 1, 1.5 -0.5]", "1");
diff --git a/scripts/perfdemo/fintime/README.md b/scripts/perfdemo/fintime/README.md
@@ -0,0 +1,108 @@
+I was originally going to use some futures data I had kicking around, and have the 
+parse/load for that, however, Dennis Shasha's Fintime thing is vastly superior, 
+because it can be distributed and jury rigged to do other things, and because 
+he's a professor who helped invent KDB who knows what a TSDB benchmark 
+should look like better than I do.
+
+http://cs.nyu.edu/shasha/fintime.html
+
+It's an ancient benchmark, but it looks quite useful, and is related to the TPC-D/H 
+benchmarks somehow.
+
+Ultimately, what I'd like to do here is set this up for Kerf, then several other common 
+things, and demonstrate how Kerf beats the pants off of these other tools.
+
+ We suggest 3 scale factors, namely, 50,000 securities, 100,000 securities, and 1,000,000 securities, all for 4,000 days. These roughly correspond to all equity securities in the US, all equity securities in the G7 countries and all equity securities in the world.
+
+./histgen 2000 5000
+/*2000 equities 5000 days*/
+
+tickgen n 20 t 20 d 30
+/*20 scale 20 ticks 30 days*/
+
+
+1. Get the closing price of a set of 10 stocks for a 10-year period
+   and group into weekly, monthly and yearly aggregates. For each
+   aggregate period determine the low, high and average closing price
+   value. The output should be sorted by id and trade date.
+
+
+2. Adjust all prices and volumes (prices are multiplied by the split
+   factor and volumes are divided by the split factor) for a set of
+   1000 stocks to reflect the split events during a specified 300 day
+   period, assuming that events occur before the first trade of the
+   split date. These are called split-adjusted prices and volumes.
+
+
+3. For each stock in a specified list of 1000 stocks, find the
+   differences between the daily high and daily low on the day of each
+   split event during a specified period.
+
+
+4. Calculate the value of the S&P500 and Russell 2000 index for a
+   specified day using unadjusted prices and the index composition of
+   the 2 indexes (see appendix for spec) on the specified day
+
+
+5. Find the 21-day and 5-day moving average price for a specified list
+   of 1000 stocks during a 6-month period. (Use split adjusted prices)
+
+6. (Based on the previous query) Find the points (specific days) when
+   the 5-month moving average intersects the 21-day moving average for
+   these stocks. The output is to be sorted by id and date.
+
+
+7. Determine the value of $100,000 now if 1 year ago it was invested
+   equally in 10 specified stocks (i.e. allocation for each stock is
+   $10,000). The trading strategy is: When the 20-day moving average
+   crosses over the 5-month moving average the complete allocation for
+   that stock is invested and when the 20-day moving average crosses
+   below the 5-month moving average the entire position is sold. The
+   trades happen on the closing price of the trading day.
+
+
+
+8. Find the pair-wise coefficients of correlation in a set of 10
+   securities for a 2 year period. Sort the securities by the
+   coefficient of correlation, indicating the pair of securities
+   corresponding to that row. [Note: coefficient of correlation
+   defined in appendix]
+
+
+
+9. Determine the yearly dividends and annual yield (dividends/average
+   closing price) for the past 3 years for all the stocks in the
+   Russell 2000 index that did not split during that period. Use
+   unadjusted prices since there were no splits to adjust for.
+
+
+  appendix:   S&P/Russell indices. The composition of these indexes
+   can be generated by randomly selecting 500 and 2000 distinct
+   securities at random uniformly and without replacement from the
+   universe of  securities.
+
+For the tick piece:
+1. Get all ticks for a specified set of 100 securities for a specified
+   three hour time period on a specified trade date.
+
+2. Determine the volume weighted price of a security considering only
+   the ticks in a specified three hour interval
+
+
+3. Determine the top 10 percentage losers for the specified date on
+   the specified exchanges sorted by percentage loss. The loss is
+   calculated as a percentage of the last trade price of the previous
+   day.
+
+4. Determine the top 10 most active stocks for a specified date sorted
+   by cumulative trade volume by considering all trades
+
+5. Find the most active stocks in the "COMPUTER" industry (use SIC code)
+
+6. Find the 10 stocks with the highest percentage spreads. Spread is
+   the difference between the last ask-price and the last
+   bid-price. Percentage spread is calculated as a percentage of the
+   mid-point price (average of ask and bid price).
+
+
+