1
+ from __future__ import annotations
2
+
1
3
import abc
2
4
import warnings
5
+ from dataclasses import dataclass
3
6
from fractions import Fraction
4
7
from types import MappingProxyType , SimpleNamespace
5
8
from typing import (
11
14
MutableMapping ,
12
15
NamedTuple ,
13
16
Optional ,
17
+ Sequence ,
14
18
Union ,
15
19
cast ,
16
20
)
@@ -30,6 +34,7 @@ class RLanguage(NamedTuple):
30
34
"""R language construct."""
31
35
32
36
elements : List [Any ]
37
+ attributes : Mapping [str , Any ]
33
38
34
39
35
40
class RExpression (NamedTuple ):
@@ -38,6 +43,56 @@ class RExpression(NamedTuple):
38
43
elements : List [RLanguage ]
39
44
40
45
46
+ @dataclass
47
+ class RBuiltin :
48
+ """R builtin."""
49
+
50
+ name : str
51
+
52
+
53
+ @dataclass
54
+ class RFunction :
55
+ """R function."""
56
+
57
+ environment : Mapping [str , Any ]
58
+ formals : Optional [Mapping [str , Any ]]
59
+ body : RLanguage
60
+ attributes : StrMap
61
+
62
+ @property
63
+ def source (self ) -> str :
64
+ return "\n " .join (self .attributes ["srcref" ].srcfile .lines )
65
+
66
+
67
+ @dataclass
68
+ class RExternalPointer :
69
+ """R bytecode."""
70
+
71
+ protected : Any
72
+ tag : Any
73
+
74
+
75
+ @dataclass
76
+ class RBytecode :
77
+ """R bytecode."""
78
+
79
+ code : xarray .DataArray
80
+ constants : Sequence [Any ]
81
+ attributes : StrMap
82
+
83
+
84
+ class REnvironment (ChainMap [Union [str , bytes ], Any ]):
85
+ """R environment."""
86
+
87
+ def __init__ (
88
+ self ,
89
+ * maps : MutableMapping [str | bytes , Any ],
90
+ frame : StrMap | None = None ,
91
+ ) -> None :
92
+ super ().__init__ (* maps )
93
+ self .frame = frame
94
+
95
+
41
96
def convert_list (
42
97
r_list : parser .RObject ,
43
98
conversion_function : ConversionFunction ,
@@ -94,7 +149,7 @@ def convert_list(
94
149
def convert_env (
95
150
r_env : parser .RObject ,
96
151
conversion_function : ConversionFunction ,
97
- ) -> ChainMap [ Union [ str , bytes ], Any ] :
152
+ ) -> REnvironment :
98
153
"""Convert environment objects."""
99
154
if r_env .info .type is not parser .RObjectType .ENV :
100
155
raise TypeError ("Must receive a ENV object" )
@@ -104,11 +159,12 @@ def convert_env(
104
159
hash_table = conversion_function (r_env .value .hash_table )
105
160
106
161
dictionary = {}
107
- for d in hash_table :
108
- if d is not None :
109
- dictionary .update (d )
162
+ if hash_table is not None :
163
+ for d in hash_table :
164
+ if d is not None :
165
+ dictionary .update (d )
110
166
111
- return ChainMap (dictionary , enclosure )
167
+ return REnvironment (dictionary , enclosure , frame = frame )
112
168
113
169
114
170
def convert_attrs (
@@ -352,6 +408,9 @@ def convert_array(
352
408
# R matrix order is like FORTRAN
353
409
value = np .reshape (value , shape , order = 'F' )
354
410
411
+ dimension_names = None
412
+ coords = None
413
+
355
414
dimnames = attrs .get ('dimnames' )
356
415
if dimnames :
357
416
if isinstance (dimnames , Mapping ):
@@ -365,7 +424,11 @@ def convert_array(
365
424
if d is not None
366
425
}
367
426
368
- value = xarray .DataArray (value , dims = dimension_names , coords = coords )
427
+ value = xarray .DataArray (
428
+ value ,
429
+ dims = dimension_names ,
430
+ coords = coords ,
431
+ )
369
432
370
433
return value
371
434
@@ -438,6 +501,72 @@ def ts_constructor(
438
501
return pandas .Series (obj , index = index )
439
502
440
503
504
+ @dataclass
505
+ class SrcRef :
506
+ first_line : int
507
+ first_byte : int
508
+ last_line : int
509
+ last_byte : int
510
+ first_column : int
511
+ last_column : int
512
+ first_parsed : int
513
+ last_parsed : int
514
+ srcfile : SrcFile
515
+
516
+
517
+ def srcref_constructor (
518
+ obj : Any ,
519
+ attrs : StrMap ,
520
+ ) -> SrcRef :
521
+ return SrcRef (* obj , srcfile = attrs ["srcfile" ])
522
+
523
+
524
+ @dataclass
525
+ class SrcFile :
526
+ filename : str
527
+ file_encoding : str | None
528
+ string_encoding : str | None
529
+
530
+
531
+ def srcfile_constructor (
532
+ obj : Any ,
533
+ attrs : StrMap ,
534
+ ) -> SrcFile :
535
+
536
+ filename = obj .frame ["filename" ][0 ]
537
+ file_encoding = obj .frame .get ("encoding" )
538
+ string_encoding = obj .frame .get ("Enc" )
539
+
540
+ return SrcFile (
541
+ filename = filename ,
542
+ file_encoding = file_encoding ,
543
+ string_encoding = string_encoding ,
544
+ )
545
+
546
+
547
+ @dataclass
548
+ class SrcFileCopy (SrcFile ):
549
+ lines : Sequence [str ]
550
+
551
+
552
+ def srcfilecopy_constructor (
553
+ obj : Any ,
554
+ attrs : StrMap ,
555
+ ) -> SrcFile :
556
+
557
+ filename = obj .frame ["filename" ][0 ]
558
+ file_encoding = obj .frame .get ("encoding" , (None ,))[0 ]
559
+ string_encoding = obj .frame .get ("Enc" , (None ,))[0 ]
560
+ lines = obj .frame ["lines" ]
561
+
562
+ return SrcFileCopy (
563
+ filename = filename ,
564
+ file_encoding = file_encoding ,
565
+ string_encoding = string_encoding ,
566
+ lines = lines ,
567
+ )
568
+
569
+
441
570
Constructor = Callable [[Any , Mapping ], Any ]
442
571
ConstructorDict = Mapping [
443
572
Union [str , bytes ],
@@ -449,6 +578,9 @@ def ts_constructor(
449
578
"factor" : factor_constructor ,
450
579
"ordered" : ordered_constructor ,
451
580
"ts" : ts_constructor ,
581
+ "srcref" : srcref_constructor ,
582
+ "srcfile" : srcfile_constructor ,
583
+ "srcfilecopy" : srcfilecopy_constructor ,
452
584
}
453
585
454
586
DEFAULT_CLASS_MAP = MappingProxyType (default_class_map_dict )
@@ -508,17 +640,17 @@ def __init__(
508
640
constructor_dict : ConstructorDict = DEFAULT_CLASS_MAP ,
509
641
default_encoding : Optional [str ] = None ,
510
642
force_default_encoding : bool = False ,
511
- global_environment : Optional [ StrMap ] = None ,
643
+ global_environment : MutableMapping [ str | bytes , Any ] | None = None ,
512
644
) -> None :
513
645
514
646
self .constructor_dict = constructor_dict
515
647
self .default_encoding = default_encoding
516
648
self .force_default_encoding = force_default_encoding
517
- self .global_environment = ChainMap (
649
+ self .global_environment = REnvironment (
518
650
{} if global_environment is None
519
651
else global_environment ,
520
652
)
521
- self .empty_environment : StrMap = ChainMap ({})
653
+ self .empty_environment : StrMap = REnvironment ({})
522
654
523
655
self ._reset ()
524
656
@@ -562,6 +694,20 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any:
562
694
# Expand the list and process the elements
563
695
value = convert_list (obj , self ._convert_next )
564
696
697
+ elif obj .info .type == parser .RObjectType .CLO :
698
+ assert obj .tag is not None
699
+ environment = self ._convert_next (obj .tag )
700
+ formals = self ._convert_next (obj .value [0 ])
701
+ body = self ._convert_next (obj .value [1 ])
702
+ attributes = self ._convert_next (obj .attributes )
703
+
704
+ value = RFunction (
705
+ environment = environment ,
706
+ formals = formals ,
707
+ body = body ,
708
+ attributes = attributes ,
709
+ )
710
+
565
711
elif obj .info .type == parser .RObjectType .ENV :
566
712
567
713
# Return a ChainMap of the environments
@@ -573,8 +719,15 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any:
573
719
# special object
574
720
rlanguage_list = convert_list (obj , self ._convert_next )
575
721
assert isinstance (rlanguage_list , list )
722
+ attributes = self ._convert_next (
723
+ obj .attributes ,
724
+ ) if obj .attributes else {}
576
725
577
- value = RLanguage (rlanguage_list )
726
+ value = RLanguage (rlanguage_list , attributes )
727
+
728
+ elif obj .info .type in {parser .RObjectType .SPECIAL , parser .RObjectType .BUILTIN }:
729
+
730
+ value = RBuiltin (name = obj .value .decode ("ascii" ))
578
731
579
732
elif obj .info .type == parser .RObjectType .CHAR :
580
733
@@ -616,12 +769,30 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any:
616
769
# Convert the internal objects returning a special object
617
770
value = RExpression (rexpression_list )
618
771
772
+ elif obj .info .type == parser .RObjectType .BCODE :
773
+
774
+ value = RBytecode (
775
+ code = self ._convert_next (obj .value [0 ]),
776
+ constants = [self ._convert_next (c ) for c in obj .value [1 ]],
777
+ attributes = attrs ,
778
+ )
779
+
780
+ elif obj .info .type == parser .RObjectType .EXTPTR :
781
+
782
+ value = RExternalPointer (
783
+ protected = self ._convert_next (obj .value [0 ]),
784
+ tag = self ._convert_next (obj .value [1 ]),
785
+ )
786
+
619
787
elif obj .info .type == parser .RObjectType .S4 :
620
788
value = SimpleNamespace (** attrs )
621
789
622
790
elif obj .info .type == parser .RObjectType .EMPTYENV :
623
791
value = self .empty_environment
624
792
793
+ elif obj .info .type == parser .RObjectType .MISSINGARG :
794
+ value = NotImplemented
795
+
625
796
elif obj .info .type == parser .RObjectType .GLOBALENV :
626
797
value = self .global_environment
627
798
@@ -641,8 +812,8 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any:
641
812
else :
642
813
raise NotImplementedError (f"Type { obj .info .type } not implemented" )
643
814
644
- if obj .info .object :
645
- classname = attrs [ "class" ]
815
+ if obj .info .object and attrs is not None :
816
+ classname = attrs . get ( "class" , ())
646
817
for i , c in enumerate (classname ):
647
818
648
819
constructor = self .constructor_dict .get (c , None )
0 commit comments