Skip to content

Commit 1ecfc8c

Browse files
committed
Hint compiler about memory aliasing restrictions
1 parent c927a9e commit 1ecfc8c

File tree

9 files changed

+72
-75
lines changed

9 files changed

+72
-75
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ libraries (except original Qualcomm libraries) were compiled with Clang version
6262
| aptxHD100 | — | — | 1m21.950s | 0.89616 |
6363
| [libopenaptx-0.2.0][2] | 1m22.090s | 0.89062 | 1m25.730s | 0.85429 |
6464

65-
[1]: ./archive "Archive with Qualcomm apt-X encoding libraries"
65+
[1]: archive/aarch64 "Archive with Qualcomm apt-X encoding libraries"
6666
[2]: https://github.com/pali/libopenaptx "The apt-X encoder/decoder based on FFmpeg code"
6767

6868
## Resources

src/aptx422/processor.c

+16-15
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@ void aptX_prediction_filtering(int32_t a, aptX_prediction_filter_422 * f) {
3434
int32_t tmp1 = a + f->unk8;
3535
clamp_int24_t(tmp1);
3636

37-
int32_t tmp2 = ((int64_t)tmp1 * f->unk2 + (int64_t)f->unk3 * f->unk6) >> 22;
37+
int64_t x1 = (int64_t)f->unk3 * f->unk6;
38+
int64_t x2 = (int64_t)tmp1 * f->unk2;
39+
int32_t tmp2 = (x1 + x2) >> 22;
3840
clamp_int24_t(tmp2);
3941

4042
int32_t v1 = 128;
@@ -44,24 +46,21 @@ void aptX_prediction_filtering(int32_t a, aptX_prediction_filter_422 * f) {
4446
v2 = ((a >> 31) & 0xFF000000) + 8388736;
4547
}
4648

47-
int32_t * q = &f->arr2[f->i + f->width];
49+
size_t q = f->i + f->width;
4850
int64_t sum = 0;
49-
int32_t c = a;
50-
51-
f->i = (f->i + 1) % f->width;
52-
f->subband_param_unk3_3 = a;
51+
int64_t c = a;
5352

5453
for (size_t i = 0; i < (size_t)f->width; i++, q--) {
5554

5655
int32_t tmp;
57-
if (*q >= 0)
56+
if (f->arr2[q] >= 0)
5857
tmp = v2 - f->arr1[i];
5958
else
6059
tmp = v1 - f->arr1[i];
6160

6261
f->arr1[i] += (tmp >> 8) - (((uint32_t)tmp) << 23 == 0x80000000);
63-
sum += (int64_t)f->arr1[i] * c;
64-
c = *q;
62+
sum += c * f->arr1[i];
63+
c = f->arr2[q];
6564
}
6665

6766
f->unk6 = tmp1;
@@ -70,6 +69,8 @@ void aptX_prediction_filtering(int32_t a, aptX_prediction_filter_422 * f) {
7069
f->unk8 = f->unk7 + tmp2;
7170
clamp_int24_t(f->unk8);
7271

72+
f->i = (f->i + 1) % f->width;
73+
7374
f->arr2[f->i] = a;
7475
f->arr2[f->i + f->width] = a;
7576
}
@@ -88,15 +89,15 @@ void aptX_process_subband(int32_t a, int32_t dither, aptX_prediction_filter_422
8889
f->sign2 = f->sign1;
8990
f->sign1 = -1;
9091
}
91-
if (tmp == 0) {
92-
sign1 *= 0;
93-
sign2 *= 0;
92+
else if (tmp > 0) {
93+
sign1 *= 1;
94+
sign2 *= 1;
9495
f->sign2 = f->sign1;
9596
f->sign1 = 1;
9697
}
97-
if (tmp > 0) {
98-
sign1 *= 1;
99-
sign2 *= 1;
98+
else {
99+
sign1 *= 0;
100+
sign2 *= 0;
100101
f->sign2 = f->sign1;
101102
f->sign1 = 1;
102103
}

src/aptx422/qmf.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ void aptX_QMF_conv_inner(const int32_t s1[16], const int32_t s2[16], int32_t * o
6161
*out_b = r2;
6262
}
6363

64-
void aptX_QMF_analysis(aptX_QMF_analyzer_422 * qmf, const int32_t samples[4], const int32_t refs[4], int32_t diff[4]) {
64+
void aptX_QMF_analysis(aptX_QMF_analyzer_422 * restrict qmf, const int32_t samples[restrict 4],
65+
const int32_t refs[restrict 4], int32_t diff[restrict 4]) {
6566

6667
int32_t a, b, c, d;
6768
int32_t tmp[4];

src/aptx422/search.c

+4-8
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,11 @@ static size_t aptX_search_quant_coeff(uint32_t a, int32_t x, const int32_t * dat
1616
* integer space. The search is done using a simple binary search algorithm. */
1717

1818
int64_t aa = (int64_t)a << 32;
19+
int64_t xx = x << 8;
1920
size_t i = 0;
20-
size_t n;
21-
22-
for (n = size / 2; n > 0; n /= 2)
23-
/* XXX: There might be a potential error during calculation, because it
24-
* seems that the subtraction is performed as an unsigned operation.
25-
* Anyway, this algorithm and the original one (from the apt-X lib)
26-
* have been stress-tested and both return the same values. */
27-
if ((int64_t)data[i + n] * (x << 8) - aa <= 0)
21+
22+
for (size_t n = size / 2; n > 0; n /= 2)
23+
if (xx * data[i + n] <= aa)
2824
i += n;
2925

3026
return i;

src/aptxhd100/processor.c

+36-37
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
*/
1010

1111
#include "processor.h"
12-
#include <stdio.h>
1312

1413
#include "mathex.h"
1514

@@ -21,7 +20,7 @@ void aptXHD_invert_quantization(int32_t a, int32_t dither, aptXHD_inverter_100 *
2120
int64_t tmp = (int64_t)dither * i->subband_param_dith16_sf1[i_];
2221
tmp = rshift32(((int64_t)sl1 << 31) + tmp);
2322
clamp_int24_t(tmp);
24-
i->unk11 = (i->unk9 * tmp) >> 19;
23+
i->unk11 = (tmp * i->unk9) >> 19;
2524
clamp_int24_t(i->unk11);
2625

2726
i->unk10 = rshift15(32620 * i->unk10 + (i->subband_param_incr16[i_] << 15));
@@ -33,82 +32,82 @@ void aptXHD_invert_quantization(int32_t a, int32_t dither, aptXHD_inverter_100 *
3332

3433
void aptXHD_prediction_filtering(int32_t a, aptXHD_prediction_filter_100 * f) {
3534

36-
uint64_t x1 = (unsigned)f->unk6 * (uint64_t)(unsigned)f->unk3;
37-
x1 += (uint64_t)(f->unk6 * (f->unk3 >> 31) + f->unk3 * (f->unk6 >> 31)) << 32;
35+
int32_t tmp1 = a + f->unk8;
36+
clamp_int24_t(tmp1);
3837

39-
f->unk6 = a + f->unk8;
40-
clamp_int24_t(f->unk6);
38+
int64_t x1 = (int64_t)f->unk3 * f->unk6;
39+
int64_t x2 = (int64_t)tmp1 * f->unk2;
40+
int32_t tmp2 = (x1 + x2) >> 22;
41+
clamp_int24_t(tmp2);
4142

42-
uint64_t x2 = (unsigned)f->unk2 * (uint64_t)(unsigned)f->unk6;
43-
x2 += (uint64_t)(f->unk6 * (f->unk2 >> 31) + f->unk2 * (f->unk6 >> 31)) << 32;
44-
45-
f->unk8 = (x1 + x2) >> 22;
46-
clamp_int24_t(f->unk8);
47-
48-
int32_t v1 = 0x80;
49-
int32_t v2 = 0x80;
43+
int32_t v1 = 128;
44+
int32_t v2 = 128;
5045
if (a) {
51-
v1 = ((a >> 31) & 0x01000000) - 0x7FFF80;
52-
v2 = ((a >> 31) & 0xFF000000) + 0x800080;
46+
v1 = ((a >> 31) & 0x01000000) - 8388480;
47+
v2 = ((a >> 31) & 0xFF000000) + 8388736;
5348
}
5449

50+
size_t q = f->i + f->width;
5551
int64_t sum = 0;
5652
int64_t c = a;
5753

58-
for (size_t i = 0; i < (size_t)f->width; i++) {
54+
for (size_t i = 0; i < (size_t)f->width; i++, q--) {
5955

6056
int32_t tmp;
61-
if (f->arr2[f->i + f->width - i] >= 0)
57+
if (f->arr2[q] >= 0)
6258
tmp = v2 - f->arr1[i];
6359
else
6460
tmp = v1 - f->arr1[i];
6561

6662
f->arr1[i] += (tmp >> 8) - (((uint32_t)tmp) << 23 == 0x80000000);
67-
6863
sum += c * f->arr1[i];
69-
c = f->arr2[f->i + f->width - i];
64+
c = f->arr2[q];
7065
}
7166

67+
f->unk6 = tmp1;
7268
f->unk7 = sum >> 22;
7369
clamp_int24_t(f->unk7);
74-
f->unk8 = f->unk7 + f->unk8;
70+
f->unk8 = f->unk7 + tmp2;
7571
clamp_int24_t(f->unk8);
7672

7773
f->i = (f->i + 1) % f->width;
74+
7875
f->arr2[f->i] = a;
7976
f->arr2[f->i + f->width] = a;
80-
f->subband_param_unk3_3 = a;
8177
}
8278

8379
void aptXHD_process_subband(int32_t a, int32_t dither, aptXHD_prediction_filter_100 * f, aptXHD_inverter_100 * i) {
8480

8581
aptXHD_invert_quantization(a, dither, i);
8682

83+
int32_t sign1 = f->sign1;
84+
int32_t sign2 = f->sign2;
85+
8786
int32_t tmp = f->unk7 + i->unk11;
88-
int sign1 = f->sign1;
89-
int sign2 = f->sign2;
90-
if (tmp > 0) {
91-
f->sign1 = 1;
92-
f->sign2 = sign1;
93-
} else if (tmp < 0) {
94-
f->sign1 = -1;
95-
f->sign2 = sign1;
87+
if (tmp < 0) {
9688
sign1 *= -1;
9789
sign2 *= -1;
98-
} else {
90+
f->sign2 = f->sign1;
91+
f->sign1 = -1;
92+
}
93+
else if (tmp > 0) {
94+
sign1 *= 1;
95+
sign2 *= 1;
96+
f->sign2 = f->sign1;
97+
f->sign1 = 1;
98+
}
99+
else {
100+
sign1 *= 0;
101+
sign2 *= 0;
102+
f->sign2 = f->sign1;
99103
f->sign1 = 1;
100-
f->sign2 = sign1;
101-
sign1 = 0;
102-
sign2 = 0;
103104
}
104105

105106
tmp = -1 * f->unk2 * sign1;
106107
tmp = ((tmp + 1) >> 1) - ((tmp & 3) == 1);
107-
108-
tmp = tmp + 0x80000 * sign2;
109108
clip_range(tmp, -0x100000, 0x100000);
110109

111-
f->unk3 = 254 * f->unk3 + (tmp >> 4 << 8);
110+
f->unk3 = 254 * f->unk3 + 0x800000 * sign2 + (tmp >> 4 << 8);
112111
f->unk3 = rshift8(f->unk3);
113112
clip_range(f->unk3, -0x300000, 0x300000);
114113

src/aptxhd100/qmf.c

+6-5
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,11 @@ void aptXHD_QMF_conv_inner(const int32_t s1[16], const int32_t s2[16], int32_t *
6161
*out_b = r2;
6262
}
6363

64-
void aptXHD_QMF_analysis(aptXHD_QMF_analyzer_100 * qmf, const int32_t samples[4], const int32_t refs[4],
65-
int32_t diff[4]) {
64+
void aptXHD_QMF_analysis(aptXHD_QMF_analyzer_100 * restrict qmf, const int32_t samples[restrict 4],
65+
const int32_t refs[restrict 4], int32_t diff[restrict 4]) {
6666

6767
int32_t a, b, c, d;
68+
int32_t tmp[4];
6869

6970
qmf->outer[0][qmf->i_outer + 0] = samples[0];
7071
qmf->outer[0][qmf->i_outer + 16] = samples[0];
@@ -96,12 +97,12 @@ void aptXHD_QMF_analysis(aptXHD_QMF_analyzer_100 * qmf, const int32_t samples[4]
9697

9798
qmf->i_inner = (qmf->i_inner + 1) % 16;
9899

99-
aptXHD_QMF_conv_inner(&qmf->inner[2][qmf->i_inner + 15], &qmf->inner[0][qmf->i_inner], &diff[0], &diff[1]);
100+
aptXHD_QMF_conv_inner(&qmf->inner[2][qmf->i_inner + 15], &qmf->inner[0][qmf->i_inner], &tmp[0], &tmp[1]);
100101

101-
aptXHD_QMF_conv_inner(&qmf->inner[1][qmf->i_inner + 15], &qmf->inner[3][qmf->i_inner], &diff[2], &diff[3]);
102+
aptXHD_QMF_conv_inner(&qmf->inner[1][qmf->i_inner + 15], &qmf->inner[3][qmf->i_inner], &tmp[2], &tmp[3]);
102103

103104
for (size_t i = 0; i < 4; i++)
104-
diff[i] -= refs[i];
105+
diff[i] = tmp[i] - refs[i];
105106
for (size_t i = 0; i < 4; i++)
106107
clamp_int24_t(diff[i]);
107108
}

src/aptxhd100/quantizer.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ static void aptXHD_quantize_difference(int32_t diff, int32_t dither, int32_t qua
2929
int absdiff = abs32(diff);
3030
clamp_int24_t(absdiff);
3131

32-
int64_t v3 = v2 * 16 * (int64_t)(quant * -256);
33-
q->unk3 = rshift3((v3 >> 32) + absdiff);
32+
int32_t v3 = rshift32((int64_t)(v2 << 4) * (quant * -1 << 8)) + absdiff;
33+
q->unk3 = ((v3 + 4) >> 3) - ((uint8_t)(v3 << 5) == 0x80);
3434

35-
if (absdiff + (v3 >> 32) < 0) {
35+
if (q->unk3 < 0) {
3636
q->unk2 = q->unk1;
3737
q->unk1 = q->unk1 - 1;
3838
q->unk3 = -q->unk3;

src/aptxhd100/search.c

+1-2
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ static size_t aptXHD_search_quant_coeff(uint32_t a, int32_t x, const int32_t * d
1515
int64_t aa = (int64_t)a << 32;
1616
int64_t xx = x << 8;
1717
size_t i = 0;
18-
size_t n;
1918

20-
for (n = size / 2; n > 0; n /= 2)
19+
for (size_t n = size / 2; n > 0; n /= 2)
2120
if (xx * data[i + n] <= aa)
2221
i += n;
2322

test/heval-hd100.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,9 @@ static int eval_init(size_t nloops, bool errstop) {
6767
aptxhdbtenc_init(&enc_100, endian);
6868
aptXHD_init(&enc_new, endian);
6969

70-
int c, b, ret = 0;
71-
for (c = 0; c < APTXHD_CHANNELS; c++)
72-
for (b = 0; b < APTXHD_SUBBANDS; b++) {
70+
int ret = 0;
71+
for (size_t c = 0; c < APTXHD_CHANNELS; c++)
72+
for (size_t b = 0; b < APTXHD_SUBBANDS; b++) {
7373
for (size_t i = 0; i < param_sizes[b]; i++)
7474
ret |= diffint("bit16", enc_new.encoder[c].processor[b].inverter.subband_param_bit16_sl1[i],
7575
enc_100.encoder[c].processor[b].inverter.subband_param_bit16_sl1[i]);

0 commit comments

Comments
 (0)