-
Notifications
You must be signed in to change notification settings - Fork 3
/
SymmetricAllocatorImplementation.hpp
1559 lines (1351 loc) · 64.1 KB
/
SymmetricAllocatorImplementation.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#pragma once
////////////////////////////////////////////////////////////////////////
// Copyright (c) 2010-2015, University of Washington and Battelle
// Memorial Institute. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above
// copyright notice, this list of conditions and the following
// disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials
// provided with the distribution.
// * Neither the name of the University of Washington, Battelle
// Memorial Institute, or the names of their contributors may be
// used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// UNIVERSITY OF WASHINGTON OR BATTELLE MEMORIAL INSTITUTE BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
// USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
// DAMAGE.
////////////////////////////////////////////////////////////////////////
/// This is a version of dlmalloc modified to do symmetric allocation
/// across Grappa cores.
// CURRENT LIMITATION: only works in SPMD mode, outside of run()/on_all_cores()
// TODO: support spmd usage in on_all_cores()
// TODO: support single-core usage in run(), etc.
// TODO: add Grappa locks to cover remote calls (set USE_LOCKS and define lock functions)
#include "MPIConnection.hpp"
#include <unordered_map>
#include <cstdlib>
namespace Grappa {
namespace impl {
///
/// Grappa symmetric MORECORE provider
///
// Note: this describes the design we're moving towards; lots of the
// existing Grappa code uses 15 core bits and 48 address bits. We're
// moving away from that.
// Grappa global addresses are 64 bits, composed of a core part and an
// address part.
// * Core field is at least 20 bits
// * Address field uses remaining bits, so it is 44 bits for 20 core bits.
// * Grappa global addresses can only refer to user addresses, not system addresses: this means that the top 17 bits of raw virtual addresses will always be 0.
// idea is to split virtual address into 4 regions differentiated by the top two bits:
// local: 0x00000'000'0000'0000ULL
// symmetric: 0x00000'400'0000'0000ULL
// and 0x00000'800'0000'0000ULL (for now; maybe use for something else later)
// stack: 0x00000'C00'0000'0000ULL to 0x00000'fff'ffff'ffffULL
// (sign-extended to 47 bits, since it usually starts at 7fff'ffff'ffff)
// set personality() to disable aslr? personality(ADDR_NO_RANDOMIZE);
// do we need to exec() to pick this up, or will it apply to shared libraries now?
#define GRAPPA_CORE_BITS (20)
#define GRAPPA_ADDR_BITS (64 - GRAPPA_CORE_BITS)
class Morecore {
private:
static char * const alloc_min_;
static char * const alloc_max_;
size_t alloc_count_;
char * next_alloc_base_;
std::unordered_map< void *, size_t > alloc_sizes_;
public:
Morecore()
: alloc_count_( 0 )
, next_alloc_base_( alloc_min_ )
, alloc_sizes_()
{ }
void * morecore( intptr_t size );
void cleanup();
};
/// storage for Grappa morecore widget, used in allocator
extern Morecore global_morecore;
extern MPIConnection * global_mpi_connection_p;
///
/// dlmalloc configuration
///
// This allocator has the following properties (see "Vital statistics"
// below for more details):
// * pointers are 8 bytes (defined by sizeof(size_t))
// * allocated memory is 16-byte aligned
// * overhead per allocated chunk is 16 bytes
// * minimum allocated size is 32 bytes
// * errors will call abort()
// * not pthread-safe (Grappa-thread safety is a TODO)
// * trim not supported; memory is never returned to the OS until the process exits
///
/// dlmalloc configuration for Grappa:
///
/// Use Grappa's custom symmetric MORECORE in place of system sbrk/MORECORE.
#define HAVE_MORECORE 1
#define MORECORE Grappa::impl::global_morecore.morecore
#define MORECORE_CONTIGUOUS 1
#define MORECORE_CANNOT_TRIM
/// do not use mmap, only use symmetric MORECORE
#define HAVE_MMAP 0
/// name functions dlmalloc/dlfree, not malloc/free
#define USE_DL_PREFIX
///
/// header info for dlmalloc
///
/*
This is a version (aka dlmalloc) of malloc/free/realloc written by
Doug Lea and released to the public domain, as explained at
http://creativecommons.org/publicdomain/zero/1.0/ Send questions,
comments, complaints, performance data, etc to dl@cs.oswego.edu
* Version 2.8.6 Wed Aug 29 06:57:58 2012 Doug Lea
Note: There may be an updated version of this malloc obtainable at
ftp://gee.cs.oswego.edu/pub/misc/malloc.c
Check before installing!
* Quickstart
This library is all in one file to simplify the most common usage:
ftp it, compile it (-O3), and link it into another program. All of
the compile-time options default to reasonable values for use on
most platforms. You might later want to step through various
compile-time and dynamic tuning options.
For convenience, an include file for code using this malloc is at:
ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.6.h
You don't really need this .h file unless you call functions not
defined in your system include files. The .h file contains only the
excerpts from this file needed for using this malloc on ANSI C/C++
systems, so long as you haven't changed compile-time options about
naming and tuning parameters. If you do, then you can create your
own malloc.h that does include all settings by cutting at the point
indicated below. Note that you may already by default be using a C
library containing a malloc that is based on some version of this
malloc (for example in linux). You might still want to use the one
in this file to customize settings or to avoid overheads associated
with library versions.
* Vital statistics:
Supported pointer/size_t representation: 4 or 8 bytes
size_t MUST be an unsigned type of the same width as
pointers. (If you are using an ancient system that declares
size_t as a signed type, or need it to be a different width
than pointers, you can use a previous release of this malloc
(e.g. 2.7.2) supporting these.)
Alignment: 8 bytes (minimum)
This suffices for nearly all current machines and C compilers.
However, you can define MALLOC_ALIGNMENT to be wider than this
if necessary (up to 128bytes), at the expense of using more space.
Minimum overhead per allocated chunk: 4 or 8 bytes (if 4byte sizes)
8 or 16 bytes (if 8byte sizes)
Each malloced chunk has a hidden word of overhead holding size
and status information, and additional cross-check word
if FOOTERS is defined.
Minimum allocated size: 4-byte ptrs: 16 bytes (including overhead)
8-byte ptrs: 32 bytes (including overhead)
Even a request for zero bytes (i.e., malloc(0)) returns a
pointer to something of the minimum allocatable size.
The maximum overhead wastage (i.e., number of extra bytes
allocated than were requested in malloc) is less than or equal
to the minimum size, except for requests >= mmap_threshold that
are serviced via mmap(), where the worst case wastage is about
32 bytes plus the remainder from a system page (the minimal
mmap unit); typically 4096 or 8192 bytes.
Security: static-safe; optionally more or less
The "security" of malloc refers to the ability of malicious
code to accentuate the effects of errors (for example, freeing
space that is not currently malloc'ed or overwriting past the
ends of chunks) in code that calls malloc. This malloc
guarantees not to modify any memory locations below the base of
heap, i.e., static variables, even in the presence of usage
errors. The routines additionally detect most improper frees
and reallocs. All this holds as long as the static bookkeeping
for malloc itself is not corrupted by some other means. This
is only one aspect of security -- these checks do not, and
cannot, detect all possible programming errors.
If FOOTERS is defined nonzero, then each allocated chunk
carries an additional check word to verify that it was malloced
from its space. These check words are the same within each
execution of a program using malloc, but differ across
executions, so externally crafted fake chunks cannot be
freed. This improves security by rejecting frees/reallocs that
could corrupt heap memory, in addition to the checks preventing
writes to statics that are always on. This may further improve
security at the expense of time and space overhead. (Note that
FOOTERS may also be worth using with MSPACES.)
By default detected errors cause the program to abort (calling
"abort()"). You can override this to instead proceed past
errors by defining PROCEED_ON_ERROR. In this case, a bad free
has no effect, and a malloc that encounters a bad address
caused by user overwrites will ignore the bad address by
dropping pointers and indices to all known memory. This may
be appropriate for programs that should continue if at all
possible in the face of programming errors, although they may
run out of memory because dropped memory is never reclaimed.
If you don't like either of these options, you can define
CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything
else. And if if you are sure that your program using malloc has
no errors or vulnerabilities, you can define INSECURE to 1,
which might (or might not) provide a small performance improvement.
It is also possible to limit the maximum total allocatable
space, using malloc_set_footprint_limit. This is not
designed as a security feature in itself (calls to set limits
are not screened or privileged), but may be useful as one
aspect of a secure implementation.
Thread-safety: NOT thread-safe unless USE_LOCKS defined non-zero
When USE_LOCKS is defined, each public call to malloc, free,
etc is surrounded with a lock. By default, this uses a plain
pthread mutex, win32 critical section, or a spin-lock if if
available for the platform and not disabled by setting
USE_SPIN_LOCKS=0. However, if USE_RECURSIVE_LOCKS is defined,
recursive versions are used instead (which are not required for
base functionality but may be needed in layered extensions).
Using a global lock is not especially fast, and can be a major
bottleneck. It is designed only to provide minimal protection
in concurrent environments, and to provide a basis for
extensions. If you are using malloc in a concurrent program,
consider instead using nedmalloc
(http://www.nedprod.com/programs/portable/nedmalloc/) or
ptmalloc (See http://www.malloc.de), which are derived from
versions of this malloc.
System requirements: Any combination of MORECORE and/or MMAP/MUNMAP
This malloc can use unix sbrk or any emulation (invoked using
the CALL_MORECORE macro) and/or mmap/munmap or any emulation
(invoked using CALL_MMAP/CALL_MUNMAP) to get and release system
memory. On most unix systems, it tends to work best if both
MORECORE and MMAP are enabled. On Win32, it uses emulations
based on VirtualAlloc. It also uses common C library functions
like memset.
Compliance: I believe it is compliant with the Single Unix Specification
(See http://www.unix.org). Also SVID/XPG, ANSI C, and probably
others as well.
* Overview of algorithms
This is not the fastest, most space-conserving, most portable, or
most tunable malloc ever written. However it is among the fastest
while also being among the most space-conserving, portable and
tunable. Consistent balance across these factors results in a good
general-purpose allocator for malloc-intensive programs.
In most ways, this malloc is a best-fit allocator. Generally, it
chooses the best-fitting existing chunk for a request, with ties
broken in approximately least-recently-used order. (This strategy
normally maintains low fragmentation.) However, for requests less
than 256bytes, it deviates from best-fit when there is not an
exactly fitting available chunk by preferring to use space adjacent
to that used for the previous small request, as well as by breaking
ties in approximately most-recently-used order. (These enhance
locality of series of small allocations.) And for very large requests
(>= 256Kb by default), it relies on system memory mapping
facilities, if supported. (This helps avoid carrying around and
possibly fragmenting memory used only for large chunks.)
All operations (except malloc_stats and mallinfo) have execution
times that are bounded by a constant factor of the number of bits in
a size_t, not counting any clearing in calloc or copying in realloc,
or actions surrounding MORECORE and MMAP that have times
proportional to the number of non-contiguous regions returned by
system allocation routines, which is often just 1. In real-time
applications, you can optionally suppress segment traversals using
NO_SEGMENT_TRAVERSAL, which assures bounded execution even when
system allocators return non-contiguous spaces, at the typical
expense of carrying around more memory and increased fragmentation.
The implementation is not very modular and seriously overuses
macros. Perhaps someday all C compilers will do as good a job
inlining modular code as can now be done by brute-force expansion,
but now, enough of them seem not to.
Some compilers issue a lot of warnings about code that is
dead/unreachable only on some platforms, and also about intentional
uses of negation on unsigned types. All known cases of each can be
ignored.
For a longer but out of date high-level description, see
http://gee.cs.oswego.edu/dl/html/malloc.html
* MSPACES
If MSPACES is defined, then in addition to malloc, free, etc.,
this file also defines mspace_malloc, mspace_free, etc. These
are versions of malloc routines that take an "mspace" argument
obtained using create_mspace, to control all internal bookkeeping.
If ONLY_MSPACES is defined, only these versions are compiled.
So if you would like to use this allocator for only some allocations,
and your system malloc for others, you can compile with
ONLY_MSPACES and then do something like...
static mspace mymspace = create_mspace(0,0); // for example
#define mymalloc(bytes) mspace_malloc(mymspace, bytes)
(Note: If you only need one instance of an mspace, you can instead
use "USE_DL_PREFIX" to relabel the global malloc.)
You can similarly create thread-local allocators by storing
mspaces as thread-locals. For example:
static __thread mspace tlms = 0;
void* tlmalloc(size_t bytes) {
if (tlms == 0) tlms = create_mspace(0, 0);
return mspace_malloc(tlms, bytes);
}
void tlfree(void* mem) { mspace_free(tlms, mem); }
Unless FOOTERS is defined, each mspace is completely independent.
You cannot allocate from one and free to another (although
conformance is only weakly checked, so usage errors are not always
caught). If FOOTERS is defined, then each chunk carries around a tag
indicating its originating mspace, and frees are directed to their
originating spaces. Normally, this requires use of locks.
------------------------- Compile-time options ---------------------------
Be careful in setting #define values for numerical constants of type
size_t. On some systems, literal values are not automatically extended
to size_t precision unless they are explicitly casted. You can also
use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below.
WIN32 default: defined if _WIN32 defined
Defining WIN32 sets up defaults for MS environment and compilers.
Otherwise defaults are for unix. Beware that there seem to be some
cases where this malloc might not be a pure drop-in replacement for
Win32 malloc: Random-looking failures from Win32 GDI API's (eg;
SetDIBits()) may be due to bugs in some video driver implementations
when pixel buffers are malloc()ed, and the region spans more than
one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb)
default granularity, pixel buffers may straddle virtual allocation
regions more often than when using the Microsoft allocator. You can
avoid this by using VirtualAlloc() and VirtualFree() for all pixel
buffers rather than using malloc(). If this is not possible,
recompile this malloc with a larger DEFAULT_GRANULARITY. Note:
in cases where MSC and gcc (cygwin) are known to differ on WIN32,
conditions use _MSC_VER to distinguish them.
DLMALLOC_EXPORT default: extern
Defines how public APIs are declared. If you want to export via a
Windows DLL, you might define this as
#define DLMALLOC_EXPORT extern __declspec(dllexport)
If you want a POSIX ELF shared object, you might use
#define DLMALLOC_EXPORT extern __attribute__((visibility("default")))
MALLOC_ALIGNMENT default: (size_t)(2 * sizeof(void *))
Controls the minimum alignment for malloc'ed chunks. It must be a
power of two and at least 8, even on machines for which smaller
alignments would suffice. It may be defined as larger than this
though. Note however that code and data structures are optimized for
the case of 8-byte alignment.
MSPACES default: 0 (false)
If true, compile in support for independent allocation spaces.
This is only supported if HAVE_MMAP is true.
ONLY_MSPACES default: 0 (false)
If true, only compile in mspace versions, not regular versions.
USE_LOCKS default: 0 (false)
Causes each call to each public routine to be surrounded with
pthread or WIN32 mutex lock/unlock. (If set true, this can be
overridden on a per-mspace basis for mspace versions.) If set to a
non-zero value other than 1, locks are used, but their
implementation is left out, so lock functions must be supplied manually,
as described below.
USE_SPIN_LOCKS default: 1 iff USE_LOCKS and spin locks available
If true, uses custom spin locks for locking. This is currently
supported only gcc >= 4.1, older gccs on x86 platforms, and recent
MS compilers. Otherwise, posix locks or win32 critical sections are
used.
USE_RECURSIVE_LOCKS default: not defined
If defined nonzero, uses recursive (aka reentrant) locks, otherwise
uses plain mutexes. This is not required for malloc proper, but may
be needed for layered allocators such as nedmalloc.
LOCK_AT_FORK default: not defined
If defined nonzero, performs pthread_atfork upon initialization
to initialize child lock while holding parent lock. The implementation
assumes that pthread locks (not custom locks) are being used. In other
cases, you may need to customize the implementation.
FOOTERS default: 0
If true, provide extra checking and dispatching by placing
information in the footers of allocated chunks. This adds
space and time overhead.
INSECURE default: 0
If true, omit checks for usage errors and heap space overwrites.
USE_DL_PREFIX default: NOT defined
Causes compiler to prefix all public routines with the string 'dl'.
This can be useful when you only want to use this malloc in one part
of a program, using your regular system malloc elsewhere.
MALLOC_INSPECT_ALL default: NOT defined
If defined, compiles malloc_inspect_all and mspace_inspect_all, that
perform traversal of all heap space. Unless access to these
functions is otherwise restricted, you probably do not want to
include them in secure implementations.
ABORT default: defined as abort()
Defines how to abort on failed checks. On most systems, a failed
check cannot die with an "assert" or even print an informative
message, because the underlying print routines in turn call malloc,
which will fail again. Generally, the best policy is to simply call
abort(). It's not very useful to do more than this because many
errors due to overwriting will show up as address faults (null, odd
addresses etc) rather than malloc-triggered checks, so will also
abort. Also, most compilers know that abort() does not return, so
can better optimize code conditionally calling it.
PROCEED_ON_ERROR default: defined as 0 (false)
Controls whether detected bad addresses cause them to bypassed
rather than aborting. If set, detected bad arguments to free and
realloc are ignored. And all bookkeeping information is zeroed out
upon a detected overwrite of freed heap space, thus losing the
ability to ever return it from malloc again, but enabling the
application to proceed. If PROCEED_ON_ERROR is defined, the
static variable malloc_corruption_error_count is compiled in
and can be examined to see if errors have occurred. This option
generates slower code than the default abort policy.
DEBUG default: NOT defined
The DEBUG setting is mainly intended for people trying to modify
this code or diagnose problems when porting to new platforms.
However, it may also be able to better isolate user errors than just
using runtime checks. The assertions in the check routines spell
out in more detail the assumptions and invariants underlying the
algorithms. The checking is fairly extensive, and will slow down
execution noticeably. Calling malloc_stats or mallinfo with DEBUG
set will attempt to check every non-mmapped allocated and free chunk
in the course of computing the summaries.
ABORT_ON_ASSERT_FAILURE default: defined as 1 (true)
Debugging assertion failures can be nearly impossible if your
version of the assert macro causes malloc to be called, which will
lead to a cascade of further failures, blowing the runtime stack.
ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(),
which will usually make debugging easier.
MALLOC_FAILURE_ACTION default: sets errno to ENOMEM, or no-op on win32
The action to take before "return 0" when malloc fails to be able to
return memory because there is none available.
HAVE_MORECORE default: 1 (true) unless win32 or ONLY_MSPACES
True if this system supports sbrk or an emulation of it.
MORECORE default: sbrk
The name of the sbrk-style system routine to call to obtain more
memory. See below for guidance on writing custom MORECORE
functions. The type of the argument to sbrk/MORECORE varies across
systems. It cannot be size_t, because it supports negative
arguments, so it is normally the signed type of the same width as
size_t (sometimes declared as "intptr_t"). It doesn't much matter
though. Internally, we only call it with arguments less than half
the max value of a size_t, which should work across all reasonable
possibilities, although sometimes generating compiler warnings.
MORECORE_CONTIGUOUS default: 1 (true) if HAVE_MORECORE
If true, take advantage of fact that consecutive calls to MORECORE
with positive arguments always return contiguous increasing
addresses. This is true of unix sbrk. It does not hurt too much to
set it true anyway, since malloc copes with non-contiguities.
Setting it false when definitely non-contiguous saves time
and possibly wasted space it would take to discover this though.
MORECORE_CANNOT_TRIM default: NOT defined
True if MORECORE cannot release space back to the system when given
negative arguments. This is generally necessary only if you are
using a hand-crafted MORECORE function that cannot handle negative
arguments.
NO_SEGMENT_TRAVERSAL default: 0
If non-zero, suppresses traversals of memory segments
returned by either MORECORE or CALL_MMAP. This disables
merging of segments that are contiguous, and selectively
releasing them to the OS if unused, but bounds execution times.
HAVE_MMAP default: 1 (true)
True if this system supports mmap or an emulation of it. If so, and
HAVE_MORECORE is not true, MMAP is used for all system
allocation. If set and HAVE_MORECORE is true as well, MMAP is
primarily used to directly allocate very large blocks. It is also
used as a backup strategy in cases where MORECORE fails to provide
space from system. Note: A single call to MUNMAP is assumed to be
able to unmap memory that may have be allocated using multiple calls
to MMAP, so long as they are adjacent.
HAVE_MREMAP default: 1 on linux, else 0
If true realloc() uses mremap() to re-allocate large blocks and
extend or shrink allocation spaces.
MMAP_CLEARS default: 1 except on WINCE.
True if mmap clears memory so calloc doesn't need to. This is true
for standard unix mmap using /dev/zero and on WIN32 except for WINCE.
USE_BUILTIN_FFS default: 0 (i.e., not used)
Causes malloc to use the builtin ffs() function to compute indices.
Some compilers may recognize and intrinsify ffs to be faster than the
supplied C version. Also, the case of x86 using gcc is special-cased
to an asm instruction, so is already as fast as it can be, and so
this setting has no effect. Similarly for Win32 under recent MS compilers.
(On most x86s, the asm version is only slightly faster than the C version.)
malloc_getpagesize default: derive from system includes, or 4096.
The system page size. To the extent possible, this malloc manages
memory from the system in page-size units. This may be (and
usually is) a function rather than a constant. This is ignored
if WIN32, where page size is determined using getSystemInfo during
initialization.
USE_DEV_RANDOM default: 0 (i.e., not used)
Causes malloc to use /dev/random to initialize secure magic seed for
stamping footers. Otherwise, the current time is used.
NO_MALLINFO default: 0
If defined, don't compile "mallinfo". This can be a simple way
of dealing with mismatches between system declarations and
those in this file.
MALLINFO_FIELD_TYPE default: size_t
The type of the fields in the mallinfo struct. This was originally
defined as "int" in SVID etc, but is more usefully defined as
size_t. The value is used only if HAVE_USR_INCLUDE_MALLOC_H is not set
NO_MALLOC_STATS default: 0
If defined, don't compile "malloc_stats". This avoids calls to
fprintf and bringing in stdio dependencies you might not want.
REALLOC_ZERO_BYTES_FREES default: not defined
This should be set if a call to realloc with zero bytes should
be the same as a call to free. Some people think it should. Otherwise,
since this malloc returns a unique pointer for malloc(0), so does
realloc(p, 0).
LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H
LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H, LACKS_ERRNO_H
LACKS_STDLIB_H LACKS_SCHED_H LACKS_TIME_H default: NOT defined unless on WIN32
Define these if your system does not have these header files.
You might need to manually insert some of the declarations they provide.
DEFAULT_GRANULARITY default: page size if MORECORE_CONTIGUOUS,
system_info.dwAllocationGranularity in WIN32,
otherwise 64K.
Also settable using mallopt(M_GRANULARITY, x)
The unit for allocating and deallocating memory from the system. On
most systems with contiguous MORECORE, there is no reason to
make this more than a page. However, systems with MMAP tend to
either require or encourage larger granularities. You can increase
this value to prevent system allocation functions to be called so
often, especially if they are slow. The value must be at least one
page and must be a power of two. Setting to 0 causes initialization
to either page size or win32 region size. (Note: In previous
versions of malloc, the equivalent of this option was called
"TOP_PAD")
DEFAULT_TRIM_THRESHOLD default: 2MB
Also settable using mallopt(M_TRIM_THRESHOLD, x)
The maximum amount of unused top-most memory to keep before
releasing via malloc_trim in free(). Automatic trimming is mainly
useful in long-lived programs using contiguous MORECORE. Because
trimming via sbrk can be slow on some systems, and can sometimes be
wasteful (in cases where programs immediately afterward allocate
more large chunks) the value should be high enough so that your
overall system performance would improve by releasing this much
memory. As a rough guide, you might set to a value close to the
average size of a process (program) running on your system.
Releasing this much memory would allow such a process to run in
memory. Generally, it is worth tuning trim thresholds when a
program undergoes phases where several large chunks are allocated
and released in ways that can reuse each other's storage, perhaps
mixed with phases where there are no such chunks at all. The trim
value must be greater than page size to have any useful effect. To
disable trimming completely, you can set to MAX_SIZE_T. Note that the trick
some people use of mallocing a huge space and then freeing it at
program startup, in an attempt to reserve system memory, doesn't
have the intended effect under automatic trimming, since that memory
will immediately be returned to the system.
DEFAULT_MMAP_THRESHOLD default: 256K
Also settable using mallopt(M_MMAP_THRESHOLD, x)
The request size threshold for using MMAP to directly service a
request. Requests of at least this size that cannot be allocated
using already-existing space will be serviced via mmap. (If enough
normal freed space already exists it is used instead.) Using mmap
segregates relatively large chunks of memory so that they can be
individually obtained and released from the host system. A request
serviced through mmap is never reused by any other request (at least
not directly; the system may just so happen to remap successive
requests to the same locations). Segregating space in this way has
the benefits that: Mmapped space can always be individually released
back to the system, which helps keep the system level memory demands
of a long-lived program low. Also, mapped memory doesn't become
`locked' between other chunks, as can happen with normally allocated
chunks, which means that even trimming via malloc_trim would not
release them. However, it has the disadvantage that the space
cannot be reclaimed, consolidated, and then used to service later
requests, as happens with normal chunks. The advantages of mmap
nearly always outweigh disadvantages for "large" chunks, but the
value of "large" may vary across systems. The default is an
empirically derived value that works well in most systems. You can
disable mmap by setting to MAX_SIZE_T.
MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP
The number of consolidated frees between checks to release
unused segments when freeing. When using non-contiguous segments,
especially with multiple mspaces, checking only for topmost space
doesn't always suffice to trigger trimming. To compensate for this,
free() will, with a period of MAX_RELEASE_CHECK_RATE (or the
current number of segments, if greater) try to release unused
segments to the OS when freeing chunks that result in
consolidation. The best value for this parameter is a compromise
between slowing down frees with relatively costly checks that
rarely trigger versus holding on to unused memory. To effectively
disable, set to MAX_SIZE_T. This may lead to a very slight speed
improvement at the expense of carrying around more memory.
*/
/* Version identifier to allow people to support multiple versions */
#ifndef DLMALLOC_VERSION
#define DLMALLOC_VERSION 20806
#endif /* DLMALLOC_VERSION */
#ifndef DLMALLOC_EXPORT
#define DLMALLOC_EXPORT extern
#endif
#ifndef WIN32
#ifdef _WIN32
#define WIN32 1
#endif /* _WIN32 */
#ifdef _WIN32_WCE
#define LACKS_FCNTL_H
#define WIN32 1
#endif /* _WIN32_WCE */
#endif /* WIN32 */
#ifdef WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <tchar.h>
#define HAVE_MMAP 1
#define HAVE_MORECORE 0
#define LACKS_UNISTD_H
#define LACKS_SYS_PARAM_H
#define LACKS_SYS_MMAN_H
#define LACKS_STRING_H
#define LACKS_STRINGS_H
#define LACKS_SYS_TYPES_H
#define LACKS_ERRNO_H
#define LACKS_SCHED_H
#ifndef MALLOC_FAILURE_ACTION
#define MALLOC_FAILURE_ACTION
#endif /* MALLOC_FAILURE_ACTION */
#ifndef MMAP_CLEARS
#ifdef _WIN32_WCE /* WINCE reportedly does not clear */
#define MMAP_CLEARS 0
#else
#define MMAP_CLEARS 1
#endif /* _WIN32_WCE */
#endif /*MMAP_CLEARS */
#endif /* WIN32 */
#if defined(DARWIN) || defined(_DARWIN)
/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */
#ifndef HAVE_MORECORE
#define HAVE_MORECORE 0
#define HAVE_MMAP 1
/* OSX allocators provide 16 byte alignment */
#ifndef MALLOC_ALIGNMENT
#define MALLOC_ALIGNMENT ((size_t)16U)
#endif
#endif /* HAVE_MORECORE */
#endif /* DARWIN */
#ifndef LACKS_SYS_TYPES_H
#include <sys/types.h> /* For size_t */
#endif /* LACKS_SYS_TYPES_H */
/* The maximum possible size_t value has all bits set */
#define MAX_SIZE_T (~(size_t)0)
#ifndef USE_LOCKS /* ensure true if spin or recursive locks set */
#define USE_LOCKS ((defined(USE_SPIN_LOCKS) && USE_SPIN_LOCKS != 0) || \
(defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0))
#endif /* USE_LOCKS */
#if USE_LOCKS /* Spin locks for gcc >= 4.1, older gcc on x86, MSC >= 1310 */
#if ((defined(__GNUC__) && \
((__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) || \
defined(__i386__) || defined(__x86_64__))) || \
(defined(_MSC_VER) && _MSC_VER>=1310))
#ifndef USE_SPIN_LOCKS
#define USE_SPIN_LOCKS 1
#endif /* USE_SPIN_LOCKS */
#elif USE_SPIN_LOCKS
#error "USE_SPIN_LOCKS defined without implementation"
#endif /* ... locks available... */
#elif !defined(USE_SPIN_LOCKS)
#define USE_SPIN_LOCKS 0
#endif /* USE_LOCKS */
#ifndef ONLY_MSPACES
#define ONLY_MSPACES 0
#endif /* ONLY_MSPACES */
#ifndef MSPACES
#if ONLY_MSPACES
#define MSPACES 1
#else /* ONLY_MSPACES */
#define MSPACES 0
#endif /* ONLY_MSPACES */
#endif /* MSPACES */
#ifndef MALLOC_ALIGNMENT
#define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *)))
#endif /* MALLOC_ALIGNMENT */
#ifndef FOOTERS
#define FOOTERS 0
#endif /* FOOTERS */
#ifndef ABORT
#define ABORT abort()
#endif /* ABORT */
#ifndef ABORT_ON_ASSERT_FAILURE
#define ABORT_ON_ASSERT_FAILURE 1
#endif /* ABORT_ON_ASSERT_FAILURE */
#ifndef PROCEED_ON_ERROR
#define PROCEED_ON_ERROR 0
#endif /* PROCEED_ON_ERROR */
#ifndef INSECURE
#define INSECURE 0
#endif /* INSECURE */
#ifndef MALLOC_INSPECT_ALL
#define MALLOC_INSPECT_ALL 0
#endif /* MALLOC_INSPECT_ALL */
#ifndef HAVE_MMAP
#define HAVE_MMAP 1
#endif /* HAVE_MMAP */
#ifndef MMAP_CLEARS
#define MMAP_CLEARS 1
#endif /* MMAP_CLEARS */
#ifndef HAVE_MREMAP
#ifdef linux
#define HAVE_MREMAP 1
#define _GNU_SOURCE /* Turns on mremap() definition */
#else /* linux */
#define HAVE_MREMAP 0
#endif /* linux */
#endif /* HAVE_MREMAP */
#ifndef MALLOC_FAILURE_ACTION
#define MALLOC_FAILURE_ACTION errno = ENOMEM;
#endif /* MALLOC_FAILURE_ACTION */
#ifndef HAVE_MORECORE
#if ONLY_MSPACES
#define HAVE_MORECORE 0
#else /* ONLY_MSPACES */
#define HAVE_MORECORE 1
#endif /* ONLY_MSPACES */
#endif /* HAVE_MORECORE */
#if !HAVE_MORECORE
#define MORECORE_CONTIGUOUS 0
#else /* !HAVE_MORECORE */
#define MORECORE_DEFAULT sbrk
#ifndef MORECORE_CONTIGUOUS
#define MORECORE_CONTIGUOUS 1
#endif /* MORECORE_CONTIGUOUS */
#endif /* HAVE_MORECORE */
#ifndef DEFAULT_GRANULARITY
#if (MORECORE_CONTIGUOUS || defined(WIN32))
#define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */
#else /* MORECORE_CONTIGUOUS */
#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
#endif /* MORECORE_CONTIGUOUS */
#endif /* DEFAULT_GRANULARITY */
#ifndef DEFAULT_TRIM_THRESHOLD
#ifndef MORECORE_CANNOT_TRIM
#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
#else /* MORECORE_CANNOT_TRIM */
#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
#endif /* MORECORE_CANNOT_TRIM */
#endif /* DEFAULT_TRIM_THRESHOLD */
#ifndef DEFAULT_MMAP_THRESHOLD
#if HAVE_MMAP
#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
#else /* HAVE_MMAP */
#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
#endif /* HAVE_MMAP */
#endif /* DEFAULT_MMAP_THRESHOLD */
#ifndef MAX_RELEASE_CHECK_RATE
#if HAVE_MMAP
#define MAX_RELEASE_CHECK_RATE 4095
#else
#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T
#endif /* HAVE_MMAP */
#endif /* MAX_RELEASE_CHECK_RATE */
#ifndef USE_BUILTIN_FFS
#define USE_BUILTIN_FFS 0
#endif /* USE_BUILTIN_FFS */
#ifndef USE_DEV_RANDOM
#define USE_DEV_RANDOM 0
#endif /* USE_DEV_RANDOM */
#ifndef NO_MALLINFO
#define NO_MALLINFO 0
#endif /* NO_MALLINFO */
#ifndef MALLINFO_FIELD_TYPE
#define MALLINFO_FIELD_TYPE size_t
#endif /* MALLINFO_FIELD_TYPE */
#ifndef NO_MALLOC_STATS
#define NO_MALLOC_STATS 0
#endif /* NO_MALLOC_STATS */
#ifndef NO_SEGMENT_TRAVERSAL
#define NO_SEGMENT_TRAVERSAL 0
#endif /* NO_SEGMENT_TRAVERSAL */
/*
mallopt tuning options. SVID/XPG defines four standard parameter
numbers for mallopt, normally defined in malloc.h. None of these
are used in this malloc, so setting them has no effect. But this
malloc does support the following options.
*/
#define M_TRIM_THRESHOLD (-1)
#define M_GRANULARITY (-2)
#define M_MMAP_THRESHOLD (-3)
/* ------------------------ Mallinfo declarations ------------------------ */
#if !NO_MALLINFO
/*
This version of malloc supports the standard SVID/XPG mallinfo
routine that returns a struct containing usage properties and
statistics. It should work on any system that has a
/usr/include/malloc.h defining struct mallinfo. The main
declaration needed is the mallinfo struct that is returned (by-copy)
by mallinfo(). The malloinfo struct contains a bunch of fields that
are not even meaningful in this version of malloc. These fields are
are instead filled by mallinfo() with other numbers that might be of
interest.
HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
/usr/include/malloc.h file that includes a declaration of struct
mallinfo. If so, it is included; else a compliant version is
declared below. These must be precisely the same for mallinfo() to
work. The original SVID version of this struct, defined on most
systems with mallinfo, declares all fields as ints. But some others
define as unsigned long. If your system defines the fields using a
type of different width than listed here, you MUST #include your
system version and #define HAVE_USR_INCLUDE_MALLOC_H.
*/
/* #define HAVE_USR_INCLUDE_MALLOC_H */
#ifdef HAVE_USR_INCLUDE_MALLOC_H
#include "/usr/include/malloc.h"
#else /* HAVE_USR_INCLUDE_MALLOC_H */
#ifndef STRUCT_MALLINFO_DECLARED
/* HP-UX (and others?) redefines mallinfo unless _STRUCT_MALLINFO is defined */
#define _STRUCT_MALLINFO
#define STRUCT_MALLINFO_DECLARED 1
struct mallinfo {
MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */
MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */
MALLINFO_FIELD_TYPE smblks; /* always 0 */
MALLINFO_FIELD_TYPE hblks; /* always 0 */
MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */
MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */
MALLINFO_FIELD_TYPE fsmblks; /* always 0 */
MALLINFO_FIELD_TYPE uordblks; /* total allocated space */
MALLINFO_FIELD_TYPE fordblks; /* total free space */
MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */
};
#endif /* STRUCT_MALLINFO_DECLARED */
#endif /* HAVE_USR_INCLUDE_MALLOC_H */
#endif /* NO_MALLINFO */
/*
Try to persuade compilers to inline. The most critical functions for
inlining are defined as macros, so these aren't used for them.
*/
#ifndef FORCEINLINE
#if defined(__GNUC__)
#define FORCEINLINE __inline __attribute__ ((always_inline))
#elif defined(_MSC_VER)
#define FORCEINLINE __forceinline
#endif
#endif
#ifndef NOINLINE
#if defined(__GNUC__)
#define NOINLINE __attribute__ ((noinline))
#elif defined(_MSC_VER)
#define NOINLINE __declspec(noinline)
#else
#define NOINLINE
#endif
#endif
#ifdef __cplusplus
extern "C" {
#ifndef FORCEINLINE
#define FORCEINLINE inline
#endif
#endif /* __cplusplus */
#ifndef FORCEINLINE
#define FORCEINLINE
#endif
#if !ONLY_MSPACES
/* ------------------- Declarations of public routines ------------------- */
#ifndef USE_DL_PREFIX
#define dlcalloc calloc
#define dlfree free
#define dlmalloc malloc
#define dlmemalign memalign
#define dlposix_memalign posix_memalign
#define dlrealloc realloc
#define dlrealloc_in_place realloc_in_place
#define dlvalloc valloc
#define dlpvalloc pvalloc
#define dlmallinfo mallinfo
#define dlmallopt mallopt
#define dlmalloc_trim malloc_trim
#define dlmalloc_stats malloc_stats
#define dlmalloc_usable_size malloc_usable_size
#define dlmalloc_footprint malloc_footprint
#define dlmalloc_max_footprint malloc_max_footprint
#define dlmalloc_footprint_limit malloc_footprint_limit
#define dlmalloc_set_footprint_limit malloc_set_footprint_limit
#define dlmalloc_inspect_all malloc_inspect_all
#define dlindependent_calloc independent_calloc
#define dlindependent_comalloc independent_comalloc
#define dlbulk_free bulk_free
#endif /* USE_DL_PREFIX */
/*
malloc(size_t n)
Returns a pointer to a newly allocated chunk of at least n bytes, or
null if no space is available, in which case errno is set to ENOMEM
on ANSI C systems.
If n is zero, malloc returns a minimum-sized chunk. (The minimum
size is 16 bytes on most 32bit systems, and 32 bytes on 64bit
systems.) Note that size_t is an unsigned type, so calls with
arguments that would be negative if signed are interpreted as
requests for huge amounts of space, which will often fail. The
maximum supported value of n differs across systems, but is in all
cases less than the maximum representable value of a size_t.
*/
DLMALLOC_EXPORT void* dlmalloc(size_t);
/*
free(void* p)
Releases the chunk of memory pointed to by p, that had been previously
allocated using malloc or a related routine such as realloc.
It has no effect if p is null. If p was not malloced or already
freed, free(p) will by default cause the current program to abort.
*/