-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcore_gzip.d
150 lines (119 loc) · 4.98 KB
/
core_gzip.d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
module d_glat.core_gzip;
import d_glat.lib_tmpfilename;
import std.array;
import std.conv;
import std.file;
import std.process;
import std.range;
import std.stdio;
import std.zlib;
ubyte[] gunzip( in ubyte[] data )
{
scope auto app = appender!(ubyte[]);
scope auto U = new UnCompress( HeaderFormat.gzip );
app.put( cast( ubyte[] )( U.uncompress( data ) ) );
app.put( cast( ubyte[] )( U.flush() ) );
return app.data;
// Old code below
//
// zlib and gzip formats differ (headers), hence the `47`
// http://www.digitalmars.com/d/archives/digitalmars/D/Trouble_with_std.zlib_140855.html
//
// (else we could create an `Uncompress` instance with
// HeaderFormat.gzip)
//
// return cast( ubyte[] )( uncompress( zdata_0, 0, 47 ) );
}
T gunzip(T)( in T data )
{
return cast(T)( gunzip( cast( ubyte[] )( data ) ) );
}
// Consider using `std.string.representation` in some use cases.
private bool _tried_shell = false, _use_shell = false, _checking_stability = false;
class _GzipLock {}; shared auto _gzipLock = new _GzipLock;
ubyte[] gzip(bool also_disk = true)( in ubyte[] data )
{
static if (also_disk) // for big data, often, disk faster - but when multiprocess/multithreading, rather not
{
immutable MIN_LENGTH_FOR_SHELL_GZIP = 100 * 1024L;
if (MIN_LENGTH_FOR_SHELL_GZIP <= data.length && (_use_shell || !_tried_shell))
{
if (!_tried_shell && !_checking_stability)
{
_checking_stability = true;
// First time, check as well that always the same output
// (useful for hashing etc.). Normally the '-n' option of
// gzip should be enough for that, but checking is safer.
immutable s = "A la claire fontaine, m'en allant promener, j'ai trouve l'eau si claire, que je m'y suis baigne.";
scope const d_1 =
cast(ubyte[])( s.replicate( 2 + (MIN_LENGTH_FOR_SHELL_GZIP / s.length) ) );
assert( MIN_LENGTH_FOR_SHELL_GZIP < d_1.length );
scope const out_1 = gzip!also_disk( d_1 );
if (_tried_shell && _use_shell)
{
scope const d_2 =
cast(ubyte[])( s.replicate( 2 + (MIN_LENGTH_FOR_SHELL_GZIP / s.length) ) );
assert( MIN_LENGTH_FOR_SHELL_GZIP < d_2.length );
scope const out_2 = gzip!also_disk( d_2 );
_tried_shell = true;
_use_shell = out_1 == out_2;
if (!_use_shell)
{
stderr.writeln( "For some reason, gzip() output non deterministic - even if we passed the '-n' option. Falling back onto the (likely slower) D implementation." );
stderr.flush;
}
}
_checking_stability = false;
}
// Tried with pipeShell, but blocking on big files, hence the
// present solution using a temporary file.
if (_tried_shell && _use_shell || _checking_stability)
{
try
{
immutable tmpfn = get_tmpfilename( "d_glat.core_gzip" );
std.file.write( tmpfn, data );
// -n important: do not save the tmpfn into the file, to try to guarantee always same output
scope auto tmp = executeShell("gzip -n \""~tmpfn~"\"");
// Detect errors.
if (0 != tmp.status)
{
throw new Exception ( "d_glat.core_gzip.gzip: error returned by the shell. Falling back onto slower D implementation. Error caught: " ~ to!string(tmp.output) );
}
// Read output
auto outfn = tmpfn~".gz";
auto ret = cast(ubyte[])( std.file.read( outfn ) );
std.file.remove( outfn );
synchronized( _gzipLock )
{
if (!_tried_shell)
{
_tried_shell = true;
_use_shell = true;
}
}
return ret; // Success. Done!
}
catch ( Throwable t )
{
stderr.writeln( "d_glat.core_gzip.gzip failed to use the shell. Falling back onto slower D implementation.");
stderr.flush;
synchronized( _gzipLock )
{
_tried_shell = true;
_use_shell = false;
}
}
}
}
}
scope auto app = appender!(ubyte[]);
scope auto C = new Compress( 9, HeaderFormat.gzip );
app.put( cast( ubyte[] )( C.compress( data ) ) );
app.put( cast( ubyte[] )( C.flush() ) );
return app.data;
}
T gzip(bool also_disk = true, T)( T data )
{
return cast(T)( gzip!also_disk( cast( ubyte[] )( data ) ) );
}