-
Notifications
You must be signed in to change notification settings - Fork 1
/
tinfo.py
882 lines (738 loc) · 42.7 KB
/
tinfo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
from .basetypes import *
import math
@singleton
class TypeVarInt15(Construct):
r"""
construct adapter that handles (de)serialization of tinfo_t dt types (see get_dt)
"""
def _parse(self, stream, context, path):
b = byte2int(stream_read(stream, 1, path))
if b & 0b10000000: #we are in 2 bytes range
b = (b & 0b01111111) + (stream_read(stream, 1, path)[0] << 7)
#its ""somewhat"" little endian
return b - 1
#TODO test
def _build(self, obj, stream, context, path):
if not isinstance(obj, integertypes):
raise IntegerError("value is not an integer", path=path)
if obj < 0:
raise IntegerError("cannot build from negative number: %r" % (obj,), path=path)
if obj > 0x7FFE:
raise IntegerError("cannot build from number above short range: %r" % (obj,), path=path)
if obj > 0x7F:
payload = bytes([obj / 0x80, obj % 0x80 + 1])
stream_write(stream, payload, len(payload), path)
return obj
@singleton
class TypeVarInt32(Construct):
r"""
construct adapter that handles (de)serialization of tinfo_t de types (see get_de)
"""
def _parse(self, stream, context, path):
val = 0
while True:
curr = stream_read(stream, 1, path)[0]
if curr & 0b10000000:
val = val << 7 | curr & 0b01111111
else: #last byte has 0b01000000
val = val << 6 | curr & 0b00111111
break
return val
#TODO test
def _build(self, obj, stream, context, path):
if not isinstance(obj, integertypes):
raise IntegerError("value is not an integer", path=path)
if obj < 0:
raise IntegerError("cannot build from negative number: %r" % (obj,), path=path)
if obj > 0xFFFFFFFF:
raise IntegerError("cannot build from number above 32 bit: %r" % (obj,), path=path)
b = []
cascade = False #once the highest bit location has been found we dont care about whether the lower bits are zero or not - we always build
if obj >> 27:
b.append(((obj >> 27) & 0b01111111) | 0b10000000)
cascade = True
if cascade or obj >> 20:
b.append(((obj >> 20) & 0b01111111) | 0b10000000)
cascade = True
if cascade or obj >> 13:
b.append(((obj >> 13) & 0b01111111) | 0b10000000)
cascade = True
if cascade or obj >> 6:
b.append(((obj >> 6) & 0b01111111) | 0b10000000)
#always craft last byte
b.append((obj & 0b00111111) | 0b01000000)
stream_write(stream, bytes(b), len(b), path)
return obj
@singleton
class TypeArrayData(Construct):
r"""
construct adapter that handles (de)serialization of tinfo_t da types (see get_da)
40bit base / 40bit num_elems
"""
def _parse(self, stream, context, path):
data = stream_read(stream, 9, path)
bb, bne = data[:4], data[5:]
#similar to big endian?
bv = 0
for b in bb:
bv = bv << 7 | (b & 0b01111111)
nev = 0
for b in bne:
nev = nev << 7 | (b & 0b01111111)
return con.Container(base=(bv << 4) | (data[4] & 0xF), num_elems=(data[4] >> 4) | nev)
#TODO test
def _build(self, obj, stream, context, path):
if not isinstance(obj, integertypes):
raise IntegerError("value is not an integer", path=path)
if obj < 0:
raise IntegerError("cannot build from negative number: %r" % (obj,), path=path)
if obj > 0x7FFE:
raise IntegerError("cannot build from number above 0x7FFE: %r" % (obj,), path=path)
bv, nev = obj['base'], obj['num_elems']
bb, bne = [], []
while bv:
bb.append(bv & 0b01111111)
bv = bv >> 7
while nev:
bne.append(nev & 0b01111111)
nev = nev >> 7
stream_write(stream, bb[:4] + bytes([bb[5] & 0xF | bne << 4]) + bne[1:], 9, path)
return obj
# ref p_string
TypeInfoString = con.PascalString(TypeVarInt15, "utf8")
RESERVED_BYTE = 0xFF #multipurpose(?)
# Common Types
# IDs for common types
CommonTypes = con.Enum(con.Byte,
STI_PCHAR = 0x0, #< char *
STI_PUCHAR = 0x1, #< uint8 *
STI_PCCHAR = 0x2, #< const char *
STI_PCUCHAR = 0x3, #< const uint8 *
STI_PBYTE = 0x4, #< _BYTE *
STI_PINT = 0x5, #< int *
STI_PUINT = 0x6, #< unsigned int *
STI_PVOID = 0x7, #< void *
STI_PPVOID = 0x8, #< void **
STI_PCVOID = 0x9, #< const void *
STI_ACHAR = 0xA, #< char[]
STI_AUCHAR = 0xB, #< uint8[]
STI_ACCHAR = 0xC, #< const char[]
STI_ACUCHAR = 0xD, #< const uint8[]
STI_FPURGING = 0xE, #< void __userpurge(int)
STI_FDELOP = 0xF, #< void __cdecl(void *)
STI_MSGSEND = 0x10, #< void *(void *, const char *, ...)
STI_AEABI_LCMP = 0x11, #< int __fastcall(int64 x, int64 y)
STI_AEABI_ULCMP = 0x12, #< int __fastcall(uint64 x, uint64 y)
STI_DONT_USE = 0x13, #< unused stock type id; should not be used
STI_SIZE_T = 0x14, #< size_t
STI_SSIZE_T = 0x15, #< ssize_t
STI_AEABI_MEMCPY = 0x16, #< void __fastcall(void *, const void *, size_t)
STI_AEABI_MEMSET = 0x17, #< void __fastcall(void *, size_t, int)
STI_AEABI_MEMCLR = 0x18, #< void __fastcall(void *, size_t)
STI_RTC_CHECK_2 = 0x19, #< int16 __fastcall(int16 x)
STI_RTC_CHECK_4 = 0x1A, #< int32 __fastcall(int32 x)
STI_RTC_CHECK_8 = 0x1B, #< int64 __fastcall(int64 x)
STI_LAST = 0x1C,
)
# type_t Definitions
# ref tf_mask
TYPE_SIZE = con.BitsInteger(4)
TYPE_BASE_MASK = 0x0F
FLAGS_SIZE = con.BitsInteger(2)
TYPE_FLAGS_MASK = 0x30
MOD_SIZE = con.BitsInteger(2)
TYPE_MODIF_MASK = 0xC0
# ref tf
BaseTypes = con.Enum(TYPE_SIZE,
BT_UNK = 0x00, #< unknown
BT_VOID = 0x01, #< void
BT_INT8 = 0x02, #< __int8
BT_INT16 = 0x03, #< __int16
BT_INT32 = 0x04, #< __int32
BT_INT64 = 0x05, #< __int64
BT_INT128 = 0x06, #< __int128 (for alpha & future use)
BT_INT = 0x07, #< natural int. (size provided by idp module)
BT_BOOL = 0x08, #< bool
BT_FLOAT = 0x09, #< float
BT_PTR = 0x0A, #< pointer.
BT_ARRAY = 0x0B, #< array
BT_FUNC = 0x0C, #< function.
BT_COMPLEX = 0x0D, #< struct/union/enum/typedef.
BT_BITFIELD = 0x0E, #< bitfield (only in struct)
BT_RESERVED = 0x0F, #< RESERVED
)
#BT_INT might be a bit of a pain to support across archs
# start basic type flags
# ref tf_unk
VoidFlags = con.Enum(FLAGS_SIZE,
BTMT_SIZE0 = 0x0, #< ::BT_VOID - normal void; ::BT_UNK - don't use
BTMT_SIZE12 = 0x1, #< size = 1 byte if ::BT_VOID; 2 if ::BT_UNK
BTMT_SIZE48 = 0x2, #< size = 4 bytes if ::BT_VOID; 8 if ::BT_UNK
BTMT_SIZE128 = 0x3, #< size = 16 bytes if ::BT_VOID; unknown if ::BT_UNK
#< (IN struct alignment - see below)
)
# ref tf_int
IntFlags = con.Enum(FLAGS_SIZE,
BTMT_UNKSIGN = 0x0, #< unknown signedness
BTMT_SIGNED = 0x1, #< signed
BTMT_USIGNED = 0x2, #< unsigned
BTMT_CHAR = 0x3, #< specify char or segment register
#< - ::BT_INT8 - char
#< - ::BT_INT - segment register
#< - other BT_INT... - don't use
)
# ref tf_bool
BoolFlags = con.Enum(FLAGS_SIZE,
BTMT_DEFBOOL = 0x0, #< size is model specific or unknown(?)
BTMT_BOOL1 = 0x1, #< size 1byte
BTMT_BOOL2OR8= 0x2, #< size 2bytes - !inf_is_64bit(); size 8bytes - inf_is_64bit()
BTMT_BOOL4 = 0x3, #< size 4bytes
)
#pain another arch dependent flag
# ref tf_float
FloatFlags = con.Enum(FLAGS_SIZE,
BTMT_FLOAT = 0x0, #< float (4 bytes)
BTMT_DOUBLE = 0x1, #< double (8 bytes)
BTMT_LNGDBL = 0x2, #< long double (compiler specific)
BTMT_SPECFLT = 0x3, #< float (variable size).
#< if \ph{use_tbyte()} then use \ph{tbyte_size},
#< otherwise 2 bytes
)
# end basic type flags (ref tf_last_basic)
# ref tf_ptr
PtrFlags = con.Enum(FLAGS_SIZE,
BTMT_DEFPTR = 0x0, #< default for model
BTMT_NEAR = 0x1, #< near
BTMT_FAR = 0x2, #< far
BTMT_CLOSURE = 0x3, #< closure.
#< - if ptr to ::BT_FUNC - __closure.
#< in this case next byte MUST be
#< #RESERVED_BYTE, and after it ::BT_FUNC
#< - else the next byte contains sizeof(ptr)
#< allowed values are 1 - \varmem{ph,processor_t,max_ptr_size}
#< - if value is bigger than \varmem{ph,processor_t,max_ptr_size},
#< based_ptr_name_and_size() is called to
#< find out the typeinfo
)
# ref tf_array
ArrayFlags = con.Enum(FLAGS_SIZE,
BTMT_NONE = 0x0, #(non-IDA) signifies no flags for array
BTMT_NONBASED = 0x1, #< \code
# if set
# array base==0
# format: dt num_elem; [tah-typeattrs]; type_t...
# if num_elem==0 then the array size is unknown
# else
# format: da num_elem, base; [tah-typeattrs]; type_t... \endcode
# used only for serialization
BTMT_ARRESERV = 0x2, #< reserved bit
)
# ref tf_func
FuncFlags = con.Enum(FLAGS_SIZE,
BTMT_DEFCALL = 0x0, #< call method - default for model or unknown
BTMT_NEARCALL = 0x1, #< function returns by retn
BTMT_FARCALL = 0x2, #< function returns by retf
BTMT_INTCALL = 0x3, #< function returns by iret
#< in this case cc MUST be 'unknown'
)
# ref tf_complex
ComplexFlags = con.Enum(FLAGS_SIZE,
BTMT_STRUCT = 0x0, #< struct:
#< MCNT records: type_t; [sdacl-typeattrs];
BTMT_UNION = 0x1, #< union:
#< MCNT records: type_t...
BTMT_ENUM = 0x2, #< enum:
#< next byte bte_t (see below)
#< N records: de delta(s)
#< OR
#< blocks (see below)
BTMT_TYPEDEF = 0x3, #< named reference
#< always p_string name
)
BitFieldFlags = con.Enum(FLAGS_SIZE,
BTMT_BFLDI8 = 0x0, #< __int8
BTMT_BFLDI16 = 0x1, #< __int16
BTMT_BFLDI32 = 0x2, #< __int32
BTMT_BFLDI64 = 0x3, #< __int64
)
# end flags
FlagsMapping = con.Switch(con.this.basetype, {
BaseTypes.BT_VOID : VoidFlags,
BaseTypes.BT_INT8 : IntFlags,
BaseTypes.BT_INT16 : IntFlags,
BaseTypes.BT_INT32 : IntFlags,
BaseTypes.BT_INT64 : IntFlags,
BaseTypes.BT_INT128 : IntFlags,
BaseTypes.BT_INT : IntFlags,
BaseTypes.BT_BOOL : BoolFlags,
BaseTypes.BT_FLOAT : FloatFlags,
BaseTypes.BT_PTR : PtrFlags,
BaseTypes.BT_ARRAY : ArrayFlags,
BaseTypes.BT_FUNC : FuncFlags,
BaseTypes.BT_COMPLEX : ComplexFlags,
BaseTypes.BT_BITFIELD: BitFieldFlags,
}, default=FLAGS_SIZE) #leave it as bits
# ref tf_modifiers
# mutually exclusive; only applies for BT_ARRAY and BT_VOID?
Modifiers = con.Enum(MOD_SIZE,
BTM_NONE = 0x0, #(non-IDA) signifies no modifiers
BTM_CONST = 0x1, #< const
BTM_VOLATILE = 0x2, #< volatile
)
#typedef uchar type_t - 4/2/2 bits, see ref tf
type_t = con.Restreamed(con.Struct(
"basetype" / BaseTypes,
"flags" / FlagsMapping,
"modifiers" / Modifiers,
), lambda data: (s:=con.bytes2bits(data))[4:]+s[2:4]+s[:2], 1
, lambda data: con.bits2bytes(data[:2]+data[2:4]+data[:4]), 8, 1) #we know type_t is 1 byte, so no need for a lambda
#reorders the bitfields so FlagsMapping can parse correctly
# Calling Convention definitions
#ref CM_
CM_SIZE = con.BitsInteger(2) #const cm_t CM_MASK = 0x03;
CM_M_SIZE = con.BitsInteger(2) #const cm_t CM_M_MASK = 0x0C;
CM_CC_SIZE = con.BitsInteger(4) #const cm_t CM_CC_MASK = 0xF0;
#ref CM_ptr
CallingPtrSize = con.Enum(CM_SIZE,
CM_UNKNOWN = 0x00, #< unknown
CM_N8_F16 = 0x01, #< if sizeof(int)<=2: near 1 byte, far 2 bytes
#< if sizeof(int)>2: near 8 bytes, far 8 bytes
CM_N16_F32 = 0x02, #< near 2 bytes, far 4 bytes
CM_N32_F48 = 0x03, #< near 4 bytes, far 6 bytes
)
#ref CM_M_
CallingModel = con.Enum(CM_M_SIZE,
CM_M_NN = 0x00, #< small: code=near, data=near (or unknown if CM_UNKNOWN)
CM_M_FF = 0x01, #< large: code=far, data=far
CM_M_NF = 0x02, #< compact: code=near, data=far
CM_M_FN = 0x03, #< medium: code=far, data=near
)
#ref CM_CC_
CallingConvention = con.Enum(CM_CC_SIZE,
CM_CC_INVALID = 0x00, #< this value is invalid
CM_CC_UNKNOWN = 0x01, #< unknown calling convention
CM_CC_VOIDARG = 0x02, #< function without arguments
#< if has other cc and argnum == 0,
#< represent as f() - unknown list
CM_CC_CDECL = 0x03, #< stack
CM_CC_ELLIPSIS = 0x04, #< cdecl + ellipsis
CM_CC_STDCALL = 0x05, #< stack, purged
CM_CC_PASCAL = 0x06, #< stack, purged, reverse order of args
CM_CC_FASTCALL = 0x07, #< stack, purged (x86), first args are in regs (compiler-dependent)
CM_CC_THISCALL = 0x08, #< stack, purged (x86), first arg is in reg (compiler-dependent)
CM_CC_MANUAL = 0x09, #< special case for compiler specific (not used)
CM_CC_SPOILED = 0x0A, #< This is NOT a cc! Mark of __spoil record
#< the low nibble is count and after n {spoilreg_t}
#< present real cm_t byte. if n == BFA_FUNC_MARKER,
#< the next byte is the function attribute byte.
CM_CC_RESERVE4 = 0x0B,
CM_CC_RESERVE3 = 0x0C,
CM_CC_SPECIALE = 0x0D, #< ::CM_CC_SPECIAL with ellipsis
CM_CC_SPECIALP = 0x0E, #< Equal to ::CM_CC_SPECIAL, but with purged stack
CM_CC_SPECIAL = 0x0F, #< usercall: locations of all arguments
#< and the return value are explicitly specified
)
cm_t = con.BitStruct(
'convention' / CallingConvention,
'model' / CallingModel,
'ptrsize' / CallingPtrSize,
)
#ref BFA_ Function attribute byte
BFA_FUNC_MARKER = 0x0F #< This is NOT a cc! (used internally as a marker)
BfaByte = con.FlagsEnum(con.Byte,
BFA_NORET = 0x01, #< __noreturn
BFA_PURE = 0x02, #< __pure
BFA_HIGH = 0x04, #< high level prototype (with possibly hidden args)
BFA_STATIC = 0x08, #< static
BFA_VIRTUAL = 0x10, #< virtual
BFA_FUNC_EXT_FORMAT = 0x80, #< This is NOT a real attribute (used internally as marker for extended format)
)
#ref FTI_ Function type data property bits
FtiFlags = con.FlagsEnum(con.BitsInteger(6),
FTI_SPOILED = 0x0001, #< information about spoiled registers is present
FTI_NORET = 0x0002, #< noreturn
FTI_PURE = 0x0004, #< __pure
FTI_HIGH = 0x0008, #< high level prototype (with possibly hidden args)
FTI_STATIC = 0x0010, #< static
FTI_VIRTUAL = 0x0020, #< virtual
)
#FTI_CALLTYPE = 0x00C0, #< mask for FTI_*CALL
FtiCallType = con.FlagsEnum(con.BitsInteger(2), #i think its mutually exclusive(?) but ill leave it as flags for now
FTI_DEFCALL = 0x0000, #< default call
FTI_NEARCALL = 0x0040, #< near call
FTI_FARCALL = 0x0080, #< far call
FTI_INTCALL = 0x00C0, #< interrupt call
)
#FTI_ALL = 0x01FF, #< all defined bits
fti_vals = con.BitStruct(
con.Padding(7), #(non-IDA) unnamed padding for the unused 0xFE bits in first byte
'arglocs' / con.Flag, #< info about argument locations has been calculated (FTI_ARGLOCS = 0x100)
#< (stkargs and retloc too)
'calltype' / FtiCallType,
'flags' / FtiFlags,
)
#con.Bitwise does not read enough if the struct does not consume an amount divisible by 8 (integer division)
#not to be directly used just like AttrMapping - should be bundled with get_de here
#ref FAI_ Function argument property bits (funcarg_t::flags)
FuncArgFlags = con.FlagsEnum(TypeVarInt32,
FAI_HIDDEN = 0x0001, #< hidden argument
FAI_RETPTR = 0x0002, #< pointer to return value. implies hidden
FAI_STRUCT = 0x0004, #< was initially a structure
FAI_ARRAY = 0x0008, #< was initially an array
#< see "__org_typedef" or "__org_arrdim" type attributes
#< to determine the original typ
)
# Enum flags definitions
#ref tf_enum
OutputStyle = con.Enum(con.BitsInteger(2),
BTE_HEX = 0b00, #< hex
BTE_CHAR = 0b01, #< char or hex
BTE_SDEC = 0b10, #< signed decimal
BTE_UDEC = 0b11, #< unsigned decimal
)
bte_t = con.BitStruct(
"always" / con.ExprValidator(con.Flag, con.obj_), #< this bit MUST be present (failfast)
"style" / OutputStyle, #< output style mask
"bitfield" / con.Flag, #< 'subarrays'. In this case ANY record
#< has the following format:
#< - 'de' mask (has name)
#< - 'dt' cnt
#< - cnt records of 'de' values
#< (cnt CAN be 0)
#< \note delta for ALL subsegment is ONE
"reserved" / con.ExprValidator(con.Flag, not con.obj_), #< must be 0, in order to distinguish
#< from a tah-byte
"size" / con.ExprAdapter(con.BitsInteger(3), #< storage size.
lambda n,_: 1 << (n-1) if n else -1, #< - if == 0 then inf_get_cc_size_e()
lambda n,_: int(math.log(n,2)) + 1), #< - else 1 << (n -1) = 1,2,4...64
#-1 sigifies default (cc_size_e)
)
# TAH (type attribute header) definitions
#not to be directly used (it requires preprocessing of values - see check_tah_or_sdacl)
TAFLAGS_SIZE = con.BytesInteger(2) #type attribute flags size (ref IDA 7.5, TAH_ALL = 0x01F0 #< all defined bits)
TAH_BYTE = 0xFE #< type attribute header byte
FAH_BYTE = 0xFF #< function argument attribute header byte
TAH_HASATTRS = 0x0010 #< has extended attributes
# ref tattr_udt (Type attributes for udts) (udt = struct || union, complex type)
UdtAttrFlags = con.FlagsEnum(TAFLAGS_SIZE,
TAUDT_UNALIGNED = 0x0040, #< struct: unaligned struct
TAUDT_MSSTRUCT = 0x0020, #< struct: gcc msstruct attribute
TAUDT_CPPOBJ = 0x0080, #< struct: a c++ object, not simple pod type
TAUDT_VFTABLE = 0x0100, #< struct: is virtual function table
)
# ref tattr_field (Type attributes for udt fields)
UdtFieldAttrFlags = con.FlagsEnum(TAFLAGS_SIZE,
TAFLD_BASECLASS = 0x0020, #< field: do not include but inherit from the current field
TAFLD_UNALIGNED = 0x0040, #< field: unaligned field
TAFLD_VIRTBASE = 0x0080, #< field: virtual base (not supported yet)
TAFLD_VFTABLE = 0x0100, #< field: ptr to virtual function table
)
# ref tattr_ptr (Type attributes for pointers)
PtrAttrFlags = con.FlagsEnum(TAFLAGS_SIZE,
TAPTR_PTR32 = 0x0020, #< ptr: __ptr32
TAPTR_PTR64 = 0x0040, #< ptr: __ptr64
TAPTR_RESTRICT = 0x0060, #< ptr: __restrict
TAPTR_SHIFTED = 0x0080, #< ptr: __shifted(parent_struct, delta)
)
# ref tattr_enum (Type attributes for enums)
EnumAttrFlags = con.FlagsEnum(TAFLAGS_SIZE,
TAENUM_64BIT = 0x0020, #< enum: store 64-bit values
TAENUM_UNSIGNED = 0x0040, #< enum: unsigned
TAENUM_SIGNED = 0x0080, #< enum: signed
)
#i believe these are the only ones that are implemented right now, but basically all types' format aside from bitfields have optional tah-typeattrs fields
AttrMapping = con.Switch(con.this.type, {
BaseTypes.BT_PTR : PtrAttrFlags,
BaseTypes.BT_COMPLEX : UdtAttrFlags, #expects BT_COMPLEX to mean struct | union only
ComplexFlags.BTMT_ENUM: EnumAttrFlags,
None : UdtFieldAttrFlags, #None signifies any types that are fields of a udt type (currently unused by this parser)
}, default=TAFLAGS_SIZE)
ArglocType = con.Enum(con.BytesInteger(1),
ALOC_NONE = 0, #< none
ALOC_STACK = 1, #< stack offset
ALOC_DIST = 2, #< distributed (scattered)
ALOC_REG1 = 3, #< one register (and offset within it)
ALOC_REG2 = 4, #< register pair
ALOC_RREL = 5, #< register relative
ALOC_STATIC = 6, #< global address
ALOC_CUSTOM = 7, #< custom argloc (7 or higher)
)
@singleton
class ArgLoc(Construct):
r"""
construct adapter that handles (de)serialization of argloc_t
"""
#only guaranteed field to exist is type - please check type before accessing fields
def _parse(self, stream, context, path):
#TODO test more thoroughly, esp the more complicated arglocs (pure reversed code again woohoo)
#default case - is_badloc() == true
data = con.Container(type=ArglocType.ALOC_NONE)
if (b:=stream_read(stream, 1, path)[0]) != 0xFF:
#simple serializations (basically most cases in Lumina)
val = (b & 0x7F) - 1
if b & 0b10000000:
if b == 0b10000000: #0x80 == ALOC_STACK, unless retval which is forbidden (which we dont check here)
data = con.Container(
type = ArglocType.ALOC_STACK,
stkoff = 0
)
else:
# high 16 bit reg2 / low 16 bit reg1 (main register number only, register size is to be computed by argument size)
data = con.Container(
type = ArglocType.ALOC_REG2,
reginfo = con.Container(reg1=val, reg2=stream_read(stream, 1, path)[0] - 1)
)
else:
data = con.Container(
type = ArglocType.ALOC_REG1,
reginfo = con.Container(reg=val, off=0),
)
else:
#TODO test complex argloc serializations
#a LOT of assumptions were made wrt value consumption - somehow all of the de/dt deserializations are inlined here, so these assumptions might be wrong
val = TypeVarInt15.parse_stream(stream)
if val & 0b1000000: #0x40, probably some sort of internal flag
#special case - seems to be just handling ALOC_DIST
n = val & ~0b1000000
parts = []
for _ in range(n):
parts.append(con.Container(
argloc = ArgLoc.parse_stream(stream),
off = TypeVarInt15.parse_stream(stream),
size = TypeVarInt15.parse_stream(stream),
))
data = con.Container(
type = ArglocType.DIST,
parts = con.ListContainer(parts)
)
else:
#other serializations (honestly a lot of these are overlapping, it's just that the namings are different)
type = val + 1 #for some reason it has to be +1'd (aka directly using the byte value instead) - makes sense since all types fit in a byte normally but still weird
if type == ArglocType.ALOC_STACK.intvalue:
#seems like even though sval_t -> adiff_t -> dependent on arch size when it comes to argloc_t it is always 64 bit
data = con.Container(
type = ArglocType.ALOC_STACK,
stkoff = TypeVarInt32.parse_stream(stream) | (TypeVarInt32.parse_stream(stream) << 32)
)
#somehow ALOC_DIST is missing in this context? it just returns without setting anything, not even signifying a fail (ret == 1)
#ALOC_DIST probably needs to be with the special case instead, and the good just means nothing broke (but argloc is still bad (ALOC_NONE == 0, which is default value after C alloc))
elif type in [ArglocType.ALOC_REG1.intvalue, ArglocType.ALOC_REG2.intvalue]:
pair = type == ArglocType.ALOC_REG2.intvalue
data = con.Container(
type = ArglocType.ALOC_REG2 if pair else ArglocType.ALOC_REG1,
reginfo = con.Container(**{ #different (conditional) naming for keywords, so we do some funny business instead
"reg1" if pair else "reg" : TypeVarInt15.parse_stream(stream),
"reg2" if pair else "off" : TypeVarInt15.parse_stream(stream)
})
)
elif type == ArglocType.ALOC_RREL.intvalue:
data = con.Container(
type = ArglocType.ALOC_RREL,
rrel = con.Container(
reg = TypeVarInt15.parse_stream(stream),
off = TypeVarInt32.parse_stream(stream) | (TypeVarInt32.parse_stream(stream) << 32)
)
)
elif type == ArglocType.ALOC_STATIC.intvalue:
data = con.Container(
type = ArglocType.ALOC_STATIC,
ea = TypeVarInt32.parse_stream(stream) | (TypeVarInt32.parse_stream(stream) << 32)
)
#i dont think extract_argloc supports ALOC_CUSTOM
return data
def _build(self, obj, stream, context, path):
return obj
@singleton
class TypeInfo(Construct):
r"""
construct adapter that handles (de)serialization of tinfo_t
"""
#sdacl's check routine should be exactly the same as tah
#it's very likely that cross-disasm support can be implemented without the full feature set like tah/sdacl so the build counterpart of this is probably not gonna be implemented
#but if it will be, then this method should move to a separate construct class
#TODO test sdacl and hasattrs (all these code is from translating pure reversed code so take it with a huge grain of salt lmao)
def check_tah_or_sdacl(self, stream, type, path, sdacl = False):
byte = stream.read(1) #can't peek with all streams, implement it the dumb way instead
if byte: #do something only when we read something
#sdacl can clash with other bytes - only parse as sdacl when we expect it (aka sdacl is True)
if byte[0] == TAH_BYTE or (sdacl and (((byte[0] & ~TYPE_FLAGS_MASK) ^ TYPE_MODIF_MASK) <= BaseTypes.BT_VOID.intvalue)): #is TAH or SDACL headers
sdacl_variable_bits = (byte[0] & 1 | (byte[0] >> 3) & 6)
val = 0
if byte[0] == TAH_BYTE or sdacl_variable_bits == 0b111:
bit = 0
#assumes this is a valid serialization of tah bytes, otherwise probably returns garbage (checks IDA implemented is not implemented here)
while True: #read it similar to da, but little endian and with variable size
b = stream_read(stream, 1, path)[0]
val |= (b & 0b01111111) << bit
if b & 0b10000000 == 0:
break
bit += 7
else: #not new headers, handle it the legacy way(?) and treat the variable bits as the value directly
val = sdacl_variable_bits + 1
attrs = []
if val & TAH_HASATTRS: #has attributes, otherwise only has flags
n = TypeVarInt15.parse_stream(stream)
for _ in range(n):
name = TypeInfoString.parse_stream(stream)
#TODO logic on 0xAE/0xAC/0xFD checking? probably not
#TODO figure out how this maps to the AttrMapping flags
data = stream_read(stream, TypeVarInt15.parse_stream(stream), path)
attrs.append(con.Container(name=name, data=data))
return con.Container(flags=AttrMapping.parse(val.to_bytes(TAFLAGS_SIZE.sizeof(), byteorder='big'), type=type), attrs=con.ListContainer(attrs))
else:
#likely not TAH or SDACL, reset file pointer and leave
stream.seek(stream.tell()-1)
return None
#it's likely that we won't ever use ordinal outside of parsing (or even in Lumina at all since replace_ordinal_typerefs is always called) so not making another construct for this
def parse_ordinal_or_name(self, stream, path):
size = TypeVarInt15.parse_stream(stream) #not used in ordinals, but we need to read it like a string anyway
byte = stream.read(1) #can't peek with all streams, implement it the dumb way instead
if byte: #do something only when we read something
if byte == b'#': #ordinal
return '#' + str(TypeVarInt32.parse_stream(stream))
else:
stream.seek(stream.tell()-1) #go back since it's not an ordinal
return stream_read(stream, size, path).decode() #it's easier to just do this instead of using TypeInfoString since size is already consumed
def _parse(self, stream, context, path):
typedef = type_t.parse_stream(stream)
data, rest, tah = None, b'', None
if typedef.basetype.intvalue > BaseTypes.BT_FLOAT.intvalue: #only process advanced types; basic types have no special logic aside from optional tah
if typedef.basetype == BaseTypes.BT_PTR:
#TODO test closure somehow
#db sizeof(ptr) seems to be exactly behaving like a byte? i guess this makes sense since allowed values are from 1 onwards
size = stream_read(stream, 1, path) #even if we dont have size we must still have at least 1 byte left
if typedef.flags != PtrFlags.BTMT_CLOSURE: #if not closure theres no ptr size
stream.seek(stream.tell()-1)
size = None
tah = self.check_tah_or_sdacl(stream, typedef.basetype, path)
#if closure and size == RESERVED_BYTE, implicitly expects the parsed tinfo is of BT_FUNC, and size is ignored
data = con.Container(ptrsize=size, type=TypeInfo.parse_stream(stream))
elif typedef.basetype == BaseTypes.BT_ARRAY:
if typedef.flags == ArrayFlags.BTMT_NONBASED:
base = 0
size = TypeVarInt15.parse_stream(stream)
else:
base, size = TypeArrayData.parse_stream(stream).values()
tah = self.check_tah_or_sdacl(stream, typedef.basetype, path)
data = con.Container(base=base, num_elems=size, type=TypeInfo.parse_stream(stream))
elif typedef.basetype == BaseTypes.BT_FUNC:
cm = cm_t.parse_stream(stream)
spoiled = None
if cm.convention == CallingConvention.CM_CC_SPOILED: #not actually a cm_t yet, we need to handle spoiled regs
#TODO figure out when this would be used in IDA and test (a large portion of this is again from pure reversed code)
regsize = cm_t.build(cm)[0] & 0xF #rebuild it into a byte to get the lower nibble
bfa, ftiflags = None, None
if regsize == BFA_FUNC_MARKER:
bfa = BfaByte.parse_stream(stream)
if bfa.BFA_FUNC_EXT_FORMAT:
#we only care about the lower bits (flags)
ftiflags = fti_vals.parse(TypeVarInt32.parse_stream(stream).to_bytes(fti_vals.sizeof(), byteorder='big')).flags
regsize = TypeVarInt15.parse_stream(stream)
#extract_spoiledreg - might wanna move it to a separate construct
regs = []
for _ in range(regsize):
#reg numbering is architecture dependent unfortunately
if (b:=stream_read(stream, 1, path)[0]) & 0b10000000:
if b == 0xFF: #probably for cases where the reg number is larger than 8 bit? again kinda wasteful tbh
reg = TypeVarInt15.parse_stream(stream)
else:
reg = b & 0b01111111
size = stream_read(stream, 1, path)[0] #seems like they assume the size is != 0? considering their principle of not letting null bytes in tinfo
else: #smallest version - both in a single byte
reg = (b & 0xF) - 1
size = (b >> 4) + 1
regs.append(con.Container(reg=reg, size=size)) #ref reg_info_t
spoiled = con.Container(bfa=bfa, ftiflags=ftiflags, regs=con.ListContainer(regs))
cm = cm_t.parse_stream(stream)
tah = self.check_tah_or_sdacl(stream, typedef.basetype, path)
rettype = TypeInfo.parse_stream(stream)
argloc = None
check_argloc = cm.convention in [CallingConvention.CM_CC_SPECIAL, CallingConvention.CM_CC_SPECIALP, CallingConvention.CM_CC_SPECIALE]
if check_argloc and rettype.typedef.basetype != BaseTypes.BT_VOID:
argloc = ArgLoc.parse_stream(stream)
#empty list = void arg, None = unknown arg; otherwise container, with type = BT_UNK for ellipsis or actual arg type
params = []
if cm.convention != CallingConvention.CM_CC_VOIDARG:
n = TypeVarInt15.parse_stream(stream)
if not n:
if cm.convention in [CallingConvention.CM_CC_ELLIPSIS, CallingConvention.CM_CC_SPECIALE]:
#build BT_UNK to signify ellipsis
params.append(con.Container(type=TypeInfo.parse('\0'), argloc=None, flags=None))
else:
params = None
else:
for _ in range(n):
argtype = TypeInfo.parse_stream(stream)
argargloc = ArgLoc.parse_stream(stream) if check_argloc else None
argflags = None
if (b:=stream.read(1)): #don't use stream_read since it's optional
if b[0] == FAH_BYTE:
argflags = FuncArgFlags.parse_stream(stream)
else:
stream.seek(stream.tell()-1) #reset since that is not actually a FAH byte; only when we read a byte in the first place
params.append(con.Container(type=argtype, argloc=argargloc, flags=argflags))
data = con.Container(spoiled=spoiled, cc=cm, rettype=rettype, argloc=argloc, params=con.ListContainer(params))
elif typedef.basetype == BaseTypes.BT_COMPLEX:
if typedef.flags == ComplexFlags.BTMT_TYPEDEF:
#only useful complex type wrt Lumina - see comment below
data = con.Container(name=self.parse_ordinal_or_name(stream, path))
else:
#IDA does not push typerefs verbatim - instead it represents it as a typedef with the ordinal string ('#' + set_de(ord)) as name
#Lumina (calc_func_metadata) always call replace_ordinal_typerefs before serialize_tinfo, so instead of the ordinal string it is replaced with the actual type name
#to get the actual serialized type definition from the ordinal, something like get_numbered_type will have to be called, but they are never called in Lumina afaict
#(which checks out since Lumina does not have any way to pass things like field names, but this also means that complex types never gets pushed unfortunately)
#the best thing one can do when applying the info obtained from Lumina is probably to check whether the type name exists in the disasm database, and utilize it if the size matches nbytes; otherwise probably make an empty struct of size nbytes and let ppl fill it in later
#and the actual struct definitions themselves will likely have to come from exporting local types as a header file and parsing it in the other disassemblers manually
n = TypeVarInt15.parse_stream(stream)
if n == 0: #treat the same as a typedef if there are no member fields
data = con.Container(name=self.parse_ordinal_or_name(stream, path))
tah = self.check_tah_or_sdacl(stream, typedef.basetype, path, True) #sdacl
else:
if n == 0x7FFE: #support high member count; this is technically defined separately for enum and struct & union, but should work the same here
n = TypeVarInt32.parse_stream(stream) #discard existing n from dt (idk why IDA chose such a wasteful method - i wouldve thought they would reuse the 0x7FFE value)
#sdacl for structs & unions; tah for enums
#(technically defined separately again at the end of each respective case, but IDA does it at the same time internally just like we are doing right now)
tah = self.check_tah_or_sdacl(stream, typedef.basetype if typedef.flags != ComplexFlags.BTMT_ENUM else ComplexFlags.BTMT_ENUM, path, typedef.flags != ComplexFlags.BTMT_ENUM)
if typedef.flags == ComplexFlags.BTMT_ENUM:
#bte_t; list of de delta(s) OR blocks(bitfield?) of size n
bte = bte_t.parse_stream(stream)
if bte.bitfield: #blocks (defined in BTE_BITFIELD)
#TODO somehow test - it seems like IDA never carries the bitfield definitions forward from IDB format to TIL format so this never gets set
vals = []
for _ in range(n):
mask = TypeVarInt32.parse_stream(stream)
cnt = TypeVarInt15.parse_stream(stream)
subarr = con.ListContainer([TypeVarInt32.parse_stream(stream) for _ in range(cnt)])
vals.append(con.Container(mask=mask, cnt=cnt, subarr=subarr)) #subarr probably also have deltas as values
data = con.Container(flags=bte, vals=con.ListContainer(vals))
else: #much easier: de delta(s) (delta is difference between each enum value)
#TODO signedness (make TypeVarInt32 extend BytesInteger?)
#delta can be negative (currently represented as unsigned 32bit value) if width is smaller than required to fit all values
#TAENUM_64BIT is supported by mashing 2 de types (32bit each) into one value; width can be bigger than 8 bytes (64 bit), but values get truncated to 8 bytes anyway
if tah and tah.flags.TAENUM_64BIT:
deltas = [TypeVarInt32.parse_stream(stream) | TypeVarInt32.parse_stream(stream) << 32 for _ in range(n)]
else:
deltas = [TypeVarInt32.parse_stream(stream) for _ in range(n)]
data = con.Container(flags=bte, deltas=con.ListContainer(deltas[::2] if tah and tah.flags.TAENUM_64BIT else deltas))
else: #struct & union
# list of type_t; [sdacl-typeattrs]; of size mcnt (technically union doesn't have sdacl but we should be good to call check anyways since sdacl bytes should be distinct from all valid base type_ts)
#align == -1 if use default align (processor specific; need additional logic in parsing.py (lumina-binja/lumina-ghidra))
align = -1 if (alpow:=(n & 0x7)) == 0 else (1 << (alpow - 1))
mcnt = n >> 3
tah = self.check_tah_or_sdacl(stream, typedef.basetype, path, True) #sdacl
fields = []
for _ in range(mcnt):
tif = TypeInfo.parse_stream(stream)
sdacl = self.check_tah_or_sdacl(stream, tif, path, True) #each field has its own sdacl-typeattr if it's structs, see comment above
fields.append(con.Container(type=tif, sdacl=sdacl))
data=con.Container(align=align, fields=con.ListContainer(fields))
elif typedef.basetype == BaseTypes.BT_BITFIELD:
val = TypeVarInt15.parse_stream(stream)
data = con.Container(bitsize=val >> 1, unsigned=bool(val & 0b1)) #TODO abstract simple logic like these into construct?
else: #should never happen once everything is implemented - unparsed data due to unknown type (or BT_RESERVED)
rest += con.GreedyBytes.parse_stream(stream)
else: #all basic types may be followed by [tah-typeattrs]
tah = self.check_tah_or_sdacl(stream, typedef.basetype, path)
return con.Container(typedef=typedef, typeattr=tah, data=data, unparsed=rest)
def _build(self, obj, stream, context, path):
return obj