Skip to content

Commit 1d55ca1

Browse files
committed
Implemented dtof in assembly
1 parent 9e250a3 commit 1d55ca1

File tree

8 files changed

+406
-248
lines changed

8 files changed

+406
-248
lines changed

src/crt/dtof.src

Lines changed: 219 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,219 @@
1-
assume adl=1
2-
3-
section .text
4-
5-
public __dtof
6-
7-
__dtof:
8-
; f64_ret_f32
9-
push af, iy, bc, de, hl
10-
call ___f64_to_f32
11-
pop af
12-
ld a, e
13-
pop de
14-
ld e, a
15-
pop bc, iy, af
16-
ret
17-
18-
extern ___f64_to_f32
1+
assume adl=1
2+
3+
section .text
4+
5+
public __dtof
6+
7+
private __dtof_helper
8+
__dtof_helper:
9+
; Moving this block of code to be behind __dtof ensures that
10+
; __dtof.ret_copysign can always be reached by jr in all paths.
11+
.overflow:
12+
; carry is set here
13+
pop hl
14+
; A = $10
15+
add a, c ; attempts to overflow the low 4 bits of the exponent
16+
rl b ; (0x7F << 1) | 1 if the input is inf/NaN
17+
inc b ; B will only be zero if the input was inf/NaN
18+
jr nz, .not_inf_nan
19+
20+
; carry is cleared
21+
adc hl, hl
22+
jr nz, .has_payload
23+
ld a, e
24+
rla
25+
and a, $3F
26+
jr z, .no_payload
27+
.has_payload:
28+
set 5, e ; ensure that NaN stays NaN
29+
.no_payload:
30+
ld a, c
31+
push de
32+
pop bc
33+
ld l, 5
34+
call __lshru
35+
push bc
36+
pop hl
37+
.finish_inf_nan:
38+
ld a, $7F
39+
jr __dtof.ret_copysign
40+
.not_inf_nan:
41+
; return infinity
42+
ld hl, $800000
43+
jr .finish_inf_nan
44+
45+
; Convert BC:UDE:UHL F64 to E:UHL F32
46+
; Rounding: round to nearest with ties to even
47+
; Behaviour:
48+
; Underflow: Returns signed zero. No signals raised.
49+
; Subnormal: No signals raised.
50+
; Rounded to Infinity: No signals raised.
51+
; Overflow: Returns signed infinity. No signals raised.
52+
; Signaling NaN: Quiet bit preserved. No signals raised.
53+
; Quiet NaN: Quiet bit preserved. No signals raised.
54+
; NaN Payloads: Copies the most significant payload bits. The LSB of mantissa is set if payload bits were discarded/truncated out.
55+
__dtof:
56+
bit 7, b
57+
push af ; preserve A and signbit
58+
push bc
59+
push de
60+
push hl
61+
res 7, b
62+
ld hl, -$3810
63+
add.s hl, bc
64+
jr nc, .maybe_subnormal
65+
ld hl, -$47F0 ; $FFB810
66+
ld a, l ; ld a, $10
67+
add.s hl, bc
68+
jr c, __dtof_helper.overflow
69+
; result is normal or rounds to infinity
70+
; calculate new exponent
71+
; we only need the low 8 bits of the exponent
72+
add hl, hl
73+
add hl, hl
74+
add hl, hl
75+
add hl, hl
76+
; offset = -$380 - -$47F = $FF = -1 ; therefore decrement
77+
; H = exponent + 1
78+
ld l, 29 ; f64_mant_bits - f32_mant_bits = 52 - 23 = 29
79+
ex (sp), hl ; (SP) = exponent/shift, HL = lo24
80+
81+
; clear exponent
82+
dec a ; ld a, $0F
83+
and a, c
84+
ld c, a
85+
xor a, a
86+
ld b, a
87+
; test round bit
88+
bit 4, e
89+
jr z, .round_down
90+
; test guard bit
91+
ld a, e
92+
and a, $20
93+
jr nz, .round_up
94+
; test sticky bits
95+
inc a ; make A non-zero
96+
adc hl, hl
97+
jr nz, .round_up
98+
ld a, e
99+
rla
100+
and a, $1F
101+
.round_up:
102+
.round_down:
103+
call __llshru
104+
; B, C, and UDE are zero here
105+
or a, a
106+
jr z, .no_round
107+
inc hl ; does not overflow
108+
.no_round:
109+
pop af ; a = exponent + 1, flags = 29 = ---5H3V-C
110+
sbc a, b ; decrement exponent and clear carry
111+
rra
112+
jr nc, .even_exponent
113+
ld bc, $800000
114+
add hl, bc ; the result might be rounded to infinity here
115+
adc a, c ; adc a, 0 ; wont overflow
116+
.even_exponent:
117+
.subnormal_no_round:
118+
.ret_copysign:
119+
pop de
120+
ld e, a
121+
pop bc
122+
pop af
123+
ret z
124+
set 7, e
125+
ret
126+
127+
.ret_zero:
128+
; carry is cleared
129+
pop hl
130+
xor a, a
131+
sbc hl, hl
132+
jr .ret_copysign
133+
134+
.maybe_subnormal:
135+
ld hl, -$3690
136+
add.s hl, bc
137+
jr nc, .ret_zero
138+
; calculate shift
139+
; A = (uint8_t)((BC - $3690) >> 4)
140+
; A = (uint8_t)((HL << 4) >> 8)
141+
add hl, hl
142+
add hl, hl
143+
add hl, hl
144+
add hl, hl
145+
; Shift = -A + 4 + 24
146+
ld a, 4 + 24
147+
sub a, h
148+
; maximum shift = 24 + 4 + 25 = 24 + 29 = 53
149+
; minimum shift = 24 + 4 + 1 = 24 + 5 = 29
150+
ld b, a
151+
ld e, a ; store shift amount
152+
xor a, a
153+
; calculate sticky bits
154+
sbc hl, hl
155+
inc hl ; ld hl, 1
156+
.shift_loop:
157+
add hl, hl
158+
adc a, a
159+
djnz .shift_loop
160+
; carry won't be set
161+
; set C:UDE to A:UHL
162+
; shift by an additional 24 bits
163+
dec hl
164+
jr z, .the_set_bit_is_in_hl
165+
dec a
166+
.the_set_bit_is_in_hl:
167+
ld c, a
168+
ld a, e ; restore shift amount
169+
ex de, hl
170+
scf
171+
sbc hl, hl
172+
; BC:UDE:UHL = 1 << shift
173+
; (SP) = X
174+
call __lland
175+
call __llcmpzero
176+
pop hl
177+
; DE and BC are swapped here
178+
pop bc
179+
pop de
180+
push de
181+
push bc
182+
183+
; clear exponent and include the implicit mantissa bit
184+
ld d, 0
185+
jr z, .no_sticky_bits
186+
inc d
187+
.no_sticky_bits:
188+
189+
ld l, a ; L = shift
190+
ld a, e
191+
and a, $0F
192+
or a, $10
193+
194+
call __lshru
195+
xor a, a ; subnormal exponent
196+
; HL = BC >> 1
197+
scf
198+
sbc hl, hl ; ld hl, -1
199+
add hl, sp
200+
push bc
201+
srl (hl)
202+
pop hl
203+
rr h
204+
rr l ; round bit shifted out
205+
206+
jr nc, .subnormal_no_round
207+
dec d
208+
jr z, .subnormal_round_up
209+
bit 0, l
210+
jr z, .subnormal_no_round
211+
.subnormal_round_up:
212+
inc hl ; wont overflow, but may become FLT_MIN
213+
; .subnormal_no_round:
214+
jr .ret_copysign
215+
216+
extern __lland
217+
extern __llcmpzero
218+
extern __llshru
219+
extern __lshru

src/softfloat/f64_to_f32.c

Lines changed: 0 additions & 88 deletions
This file was deleted.

src/softfloat/include/specialize.h

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -145,15 +145,7 @@ void softfloat_f32UIToCommonNaN( uint_fast32_t uiA, struct commonNaN *zPtr );
145145
| Converts the common NaN pointed to by 'aPtr' into a 32-bit floating-point
146146
| NaN, and returns the bit pattern of this value as an unsigned integer.
147147
*----------------------------------------------------------------------------*/
148-
#if 0
149148
uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr );
150-
#else
151-
/** only used by f64_to_f32 currently */
152-
static inline uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr )
153-
{
154-
return (uint_fast32_t) aPtr->sign<<31 | 0x7FC00000 | aPtr->v64>>41;
155-
}
156-
#endif
157149

158150
/*----------------------------------------------------------------------------
159151
| Interpreting 'uiA' and 'uiB' as the bit patterns of two 32-bit floating-
@@ -186,20 +178,7 @@ bool softfloat_isSigNaNF64UI(uint64_t a) __attribute__((__const__, __nothrow__,
186178
| location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid
187179
| exception is raised.
188180
*----------------------------------------------------------------------------*/
189-
#if 0
190181
void softfloat_f64UIToCommonNaN( uint_fast64_t uiA, struct commonNaN *zPtr );
191-
#else
192-
/** only used by f64_to_f32 currently */
193-
static inline void softfloat_f64UIToCommonNaN( uint_fast64_t uiA, struct commonNaN *zPtr )
194-
{
195-
if ( softfloat_isSigNaNF64UI( uiA ) ) {
196-
softfloat_raiseFlags( softfloat_flag_invalid );
197-
}
198-
zPtr->sign = uiA>>63;
199-
zPtr->v64 = uiA<<12;
200-
zPtr->v0 = 0;
201-
}
202-
#endif
203182

204183
/*----------------------------------------------------------------------------
205184
| Converts the common NaN pointed to by 'aPtr' into a 64-bit floating-point

0 commit comments

Comments
 (0)