-
Notifications
You must be signed in to change notification settings - Fork 23
/
bloom.class.php
483 lines (434 loc) · 10 KB
/
bloom.class.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
<?php
/**
* Bloom filter php
*
* This is Bloom's filter php implementation
*
* @author Spartak Kagramanayan <mr.spartak[at]rambler.ru>
* @version 0.7
*/
/**
* Main BloomClass
* Use cases:
* -to cooldown HDD usage
* -to speedup checking of object excistance (with counter 40% slower, but still more faster than native)
* -to save memory
*
* When to use:
* -Get/Set operations are more than 0.001
* -You need a fast check with big set, more than 100 000 entries
*/
class Bloom
{
/**
* Bloom object container
* @var mixed
*/
public $set;
/**
* Array of Hashes objects
* @var array
*/
public $hashes;
/**
* Error chance (0;1)
* @var float
*/
public $error_chance;
/**
* Size of set variable
* @var int
*/
public $set_size;
/**
* Number of different hash objects
* @var int
*/
public $hash_count;
/**
* Number of current entries
* @var int
*/
public $entries_count;
/**
* Number of entries max
* @var int
*/
public $entries_max;
/**
* Use counter or not (for remove ability)
* @var boolean
*/
public $counter;
/**
* Alphabet for counter
* @var string
*/
public $alphabet = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
/**
* Map of user setup parameters
* @access private
* @var boolean
*/
private $map = array(
'entries_max' => array(
'type' => 'integer',
'min' => 0
),
'error_chance' => array(
'type' => 'double',
'min' => 0,
'max' => 1
),
'set_size' => array(
'type' => 'integer',
'min' => 100
),
'hash_count' => array(
'type' => 'integer',
'min' => 1
),
'counter' => array(
'type' => 'boolean'
),
'hash' => array(
'strtolower' => array(
'type' => 'boolean'
)
)
);
/**
* Class initiation
*
* @param array
* @return BloomObject
*/
public function __construct($setup = null)
{
/**
* Default parameters
*/
$params = array(
'entries_max' => 100,
'error_chance' => 0.001,
'counter' => false,
'hash' => array(
'strtolower' => true
)
);
/**
* Applying income user parameters
*/
$params = Map::apply($this->map, $params, $setup);
/**
* Setting every parameters as properties
*/
foreach($params as $key => $value)
$this->$key = $value;
/**
* Calculating size of set using maximum number of entries and error chance
*/
if(!$this->set_size)
$this->set_size = -round( ( $this->entries_max * log($this->error_chance) ) / pow(log(2), 2) );
/**
* Calculating number of hashes using maximum number of entries and set size
*/
if(!$this->hash_count)
$this->hash_count = round($this->set_size * log(2) / $this->entries_max);
/**
* Setting up HashObjects to hashes property
*/
for($i = 0; $i < $this->hash_count; $i++)
$this->hashes[] = new Hash($params['hash'], $this->hashes);
/**
* Initiation set
*/
$this->set = str_repeat('0', $this->set_size);
return $this;
}
/**
* For serializing
*
* @return object for serializing
*/
public function __sleep() {
foreach($this as $key => $attr)
$result[] = $key;
if($this->entries_count == 0)
unset($result['set']);
return $result;
}
/**
* For unserializing
*
* @return object unserialized object
*/
public function __wakeup() {
if($this->entries_count == 0)
$this->set = str_repeat('0', $this->set_size);
}
/**
* Set value to Bloom filter
*
* @param mixed
* @return BloomObject
*/
public function set($mixed) {
/**
* In case of array given, no matter how depp it is,
* method calls itself recursively with each array element
*/
if( is_array($mixed) )
foreach($mixed as $arg)
$this->set($arg);
/**
* Otherwise method set mark into set property by using every HashObject
*/
else
for($i=0; $i < $this->hash_count; $i++) {
if($this->counter === false)
$this->set[ $this->hashes[$i]->crc($mixed, $this->set_size) ] = 1;
else
$this->counter( $this->hashes[$i]->crc($mixed, $this->set_size), 1 );
$this->entries_count++;
}
return $this;
}
/**
* Unset value from Bloom filter
*
* @param mixed
* @return mixed (boolean) or (array)
*/
public function delete($mixed) {
if($this->counter === false)
return false;
/**
* In case of array given, no matter how depp it is,
* method calls itself recursively with each array element
*/
if( is_array($mixed) ) {
foreach($mixed as $key => $arg)
$result[$key] = $this->delete($arg);
return $result;
}
/**
* Otherwise method decrements mark if element exists
*/
else
if($this->has($mixed)) {
for($i=0; $i < $this->hash_count; $i++) {
$this->counter($this->hashes[$i]->crc($mixed, $this->set_size), -1);
$this->entries_count--;
}
return true;
}
else
return false;
}
/**
* Works with special string in counter mode
*
* @param int position
* @param int number to add
* @param boolean return value or setup set
* @return mixed
*/
public function counter($position, $add = 0, $get = false) {
/**
* Return value or recalculate with alphabet
*/
if($get === true)
return $this->set[$position];
else {
$in_a = strpos($this->alphabet, $this->set[$position]);
$this->set[$position] = ($this->alphabet[$in_a + $add] != null) ? $this->alphabet[$in_a + $add] : $this->set[$position];
}
}
/**
* Test set with given array or string, to check it existance
*
* @param mixed (array) or (string)
* @param boolean
* @return mixed (array) or (boolean) or (float)
*/
public function has($mixed, $boolean = true) {
/**
* In case of array given will be returned array,
* and method call's itself recursively with ararray's alements
*/
if( is_array($mixed) ) {
foreach($mixed as $key => $arg)
$result[$key] = $this->has($arg, $boolean);
return $result;
} else {
$c = 0;
for($i=0; $i < $this->hash_count; $i++) {
if($this->counter === false)
$value = $this->set[ $this->hashes[$i]->crc($mixed, $this->set_size) ];
else
$value = $this->counter($this->hashes[$i]->crc($mixed, $this->set_size), 0, true);
/**
* $boolean parameter allows to choose what to return
* boolean or the procent of entries pass
*/
if($boolean && !$value)
return false;
elseif($boolean === false)
$c += ($value) ? 1 : 0;
}
return ($boolean === true) ? true : $c/$this->hash_count;
}
}
}
/**
* MapClass
* Use cases:
* -check user's setup parameters and merge it with default one
*
* Map view:
* paramskey => arrayMap
*
* ArrayMap view
* -null (boolean): checks required parameter
* -type (string): checks for type of parameter (values from gettype())
* -min, max (float, int): allowed range of number value
*
* Throws Ecpetion with message
*/
class Map {
/**
* Main method, applie's default and given parameters with map
*
* @param array map
* @param array default parameters
* @param array given parameters
* @return array merged result parameters
*/
static public function apply($map, $initial, $setup) {
self::circl($map, $setup);
return array_merge($initial, (array) $setup);
}
/**
* Recursively follows map
*
* @param array map
* @param array given parameters
*/
static private function circl($map, $rabbit) {
foreach($map as $k => $element) {
if (is_array($element) && (!array_key_exists('type', $element) || !$element['type']) && array_key_exists($k, $rabbit)) {
unset($rabbit[$k]);
self::circl($element, $rabbit[$k]);
} else if (array_key_exists($k, $rabbit)) {
self::check($element, $rabbit[$k]);
unset($rabbit[$k]);
}
}
if($rabbit)
throw new Exception('Unexpected array arguments. '.json_encode( $rabbit ));
}
/**
* Check map rules for given object
*
* @param array map
* @param mixed given parameters element
*/
static private function check($map, $rabbit) {
/**
* required statement check
*/
if (array_key_exists('null', $map) && $map['null'] === false && !$rabbit)
throw new Exception('Must be not NULL');
/**
* If no element exists, exit
*/
if(!$rabbit)
return true;
/**
* Check for type
*/
if (array_key_exists('type', $map) && $map['type'] !== gettype($rabbit) && $map['type'])
throw new Exception('Wrong type '.gettype($rabbit).'! Must be '.$map['type']);
/**
* Check for minimal range
*/
if (array_key_exists('min', $map) && $map['min'] > $rabbit && $map['min'] !== null)
throw new Exception('Interval overflow by '.$rabbit.'! Must be '.$map['min']);
/**
* Check for maximal range
*/
if (array_key_exists('min', $map) && $map['min'] > $rabbit && $map['min'] !== null)
throw new Exception('Interval overflow by '.$rabbit.'! Must be '.$map['max']);
}
}
/**
* HashClass
* Use cases:
* -creates random hash generator
*/
class Hash {
/**
* Seed for unification every HashObject
* @var array
*/
public $seed;
/**
* Parameters
* @var array
*/
public $params;
/**
* Map of user setup parameters
* @access private
* @var boolean
*/
private $map = array(
'strtolower' => array(
'type' => 'boolean'
)
);
/**
* Initialization
*
* @param array parameters
* @return object HashObject
*/
public function __construct($setup = null, $hashes = null) {
/**
* Default parameters
*/
$params = array(
'strtolower' => true
);
/**
* Applying income user parameters
*/
$params = Map::apply($this->map, $params, $setup);
$this->params = $params;
/**
* Creating unique seed
*/
$seeds = array();
if($hashes)
foreach($hashes as $hash)
$seeds = array_merge( (array) $seeds, (array) $hash->seed );
do {
$hash = substr(str_shuffle("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), 0, 6);
} while( in_array($hash, $seeds) );
$this->seed[] = $hash;
}
/**
* Hash use's crc32 and md5 algorithms to get number less than $size parameter
*
* @param mixed object to hash
* @param int max number to return
* @return int
*/
public function crc($string, $size) {
$string = strval($string);
if($this->params['strtolower'] === true)
$string = mb_strtolower($string, 'UTF-8');
return abs( crc32( md5($this->seed[0] . $string) ) ) % $size;
}
}