Skip to content

Commit f942b18

Browse files
committed
Experimental support for R functions and external pointers.
Merge branch 'release/0.9'
2 parents 4faa331 + fae6ad4 commit f942b18

15 files changed

+685
-123
lines changed

rdata/VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.8
1+
0.9

rdata/conversion/__init__.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
11
from ._conversion import (
2-
DEFAULT_CLASS_MAP,
3-
Converter,
4-
RExpression,
5-
RLanguage,
6-
SimpleConverter,
7-
convert,
8-
convert_array,
9-
convert_attrs,
10-
convert_char,
11-
convert_list,
12-
convert_symbol,
13-
convert_vector,
14-
dataframe_constructor,
15-
factor_constructor,
16-
ts_constructor,
2+
DEFAULT_CLASS_MAP as DEFAULT_CLASS_MAP,
3+
Converter as Converter,
4+
RBuiltin as RBuiltin,
5+
RBytecode as RBytecode,
6+
RExpression as RExpression,
7+
RFunction as RFunction,
8+
RLanguage as RLanguage,
9+
SimpleConverter as SimpleConverter,
10+
convert as convert,
11+
convert_array as convert_array,
12+
convert_attrs as convert_attrs,
13+
convert_char as convert_char,
14+
convert_list as convert_list,
15+
convert_symbol as convert_symbol,
16+
convert_vector as convert_vector,
17+
dataframe_constructor as dataframe_constructor,
18+
factor_constructor as factor_constructor,
19+
ts_constructor as ts_constructor,
1720
)

rdata/conversion/_conversion.py

Lines changed: 183 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
from __future__ import annotations
2+
13
import abc
24
import warnings
5+
from dataclasses import dataclass
36
from fractions import Fraction
47
from types import MappingProxyType, SimpleNamespace
58
from typing import (
@@ -11,6 +14,7 @@
1114
MutableMapping,
1215
NamedTuple,
1316
Optional,
17+
Sequence,
1418
Union,
1519
cast,
1620
)
@@ -30,6 +34,7 @@ class RLanguage(NamedTuple):
3034
"""R language construct."""
3135

3236
elements: List[Any]
37+
attributes: Mapping[str, Any]
3338

3439

3540
class RExpression(NamedTuple):
@@ -38,6 +43,56 @@ class RExpression(NamedTuple):
3843
elements: List[RLanguage]
3944

4045

46+
@dataclass
47+
class RBuiltin:
48+
"""R builtin."""
49+
50+
name: str
51+
52+
53+
@dataclass
54+
class RFunction:
55+
"""R function."""
56+
57+
environment: Mapping[str, Any]
58+
formals: Optional[Mapping[str, Any]]
59+
body: RLanguage
60+
attributes: StrMap
61+
62+
@property
63+
def source(self) -> str:
64+
return "\n".join(self.attributes["srcref"].srcfile.lines)
65+
66+
67+
@dataclass
68+
class RExternalPointer:
69+
"""R bytecode."""
70+
71+
protected: Any
72+
tag: Any
73+
74+
75+
@dataclass
76+
class RBytecode:
77+
"""R bytecode."""
78+
79+
code: xarray.DataArray
80+
constants: Sequence[Any]
81+
attributes: StrMap
82+
83+
84+
class REnvironment(ChainMap[Union[str, bytes], Any]):
85+
"""R environment."""
86+
87+
def __init__(
88+
self,
89+
*maps: MutableMapping[str | bytes, Any],
90+
frame: StrMap | None = None,
91+
) -> None:
92+
super().__init__(*maps)
93+
self.frame = frame
94+
95+
4196
def convert_list(
4297
r_list: parser.RObject,
4398
conversion_function: ConversionFunction,
@@ -94,7 +149,7 @@ def convert_list(
94149
def convert_env(
95150
r_env: parser.RObject,
96151
conversion_function: ConversionFunction,
97-
) -> ChainMap[Union[str, bytes], Any]:
152+
) -> REnvironment:
98153
"""Convert environment objects."""
99154
if r_env.info.type is not parser.RObjectType.ENV:
100155
raise TypeError("Must receive a ENV object")
@@ -104,11 +159,12 @@ def convert_env(
104159
hash_table = conversion_function(r_env.value.hash_table)
105160

106161
dictionary = {}
107-
for d in hash_table:
108-
if d is not None:
109-
dictionary.update(d)
162+
if hash_table is not None:
163+
for d in hash_table:
164+
if d is not None:
165+
dictionary.update(d)
110166

111-
return ChainMap(dictionary, enclosure)
167+
return REnvironment(dictionary, enclosure, frame=frame)
112168

113169

114170
def convert_attrs(
@@ -352,6 +408,9 @@ def convert_array(
352408
# R matrix order is like FORTRAN
353409
value = np.reshape(value, shape, order='F')
354410

411+
dimension_names = None
412+
coords = None
413+
355414
dimnames = attrs.get('dimnames')
356415
if dimnames:
357416
if isinstance(dimnames, Mapping):
@@ -365,7 +424,11 @@ def convert_array(
365424
if d is not None
366425
}
367426

368-
value = xarray.DataArray(value, dims=dimension_names, coords=coords)
427+
value = xarray.DataArray(
428+
value,
429+
dims=dimension_names,
430+
coords=coords,
431+
)
369432

370433
return value
371434

@@ -438,6 +501,72 @@ def ts_constructor(
438501
return pandas.Series(obj, index=index)
439502

440503

504+
@dataclass
505+
class SrcRef:
506+
first_line: int
507+
first_byte: int
508+
last_line: int
509+
last_byte: int
510+
first_column: int
511+
last_column: int
512+
first_parsed: int
513+
last_parsed: int
514+
srcfile: SrcFile
515+
516+
517+
def srcref_constructor(
518+
obj: Any,
519+
attrs: StrMap,
520+
) -> SrcRef:
521+
return SrcRef(*obj, srcfile=attrs["srcfile"])
522+
523+
524+
@dataclass
525+
class SrcFile:
526+
filename: str
527+
file_encoding: str | None
528+
string_encoding: str | None
529+
530+
531+
def srcfile_constructor(
532+
obj: Any,
533+
attrs: StrMap,
534+
) -> SrcFile:
535+
536+
filename = obj.frame["filename"][0]
537+
file_encoding = obj.frame.get("encoding")
538+
string_encoding = obj.frame.get("Enc")
539+
540+
return SrcFile(
541+
filename=filename,
542+
file_encoding=file_encoding,
543+
string_encoding=string_encoding,
544+
)
545+
546+
547+
@dataclass
548+
class SrcFileCopy(SrcFile):
549+
lines: Sequence[str]
550+
551+
552+
def srcfilecopy_constructor(
553+
obj: Any,
554+
attrs: StrMap,
555+
) -> SrcFile:
556+
557+
filename = obj.frame["filename"][0]
558+
file_encoding = obj.frame.get("encoding", (None,))[0]
559+
string_encoding = obj.frame.get("Enc", (None,))[0]
560+
lines = obj.frame["lines"]
561+
562+
return SrcFileCopy(
563+
filename=filename,
564+
file_encoding=file_encoding,
565+
string_encoding=string_encoding,
566+
lines=lines,
567+
)
568+
569+
441570
Constructor = Callable[[Any, Mapping], Any]
442571
ConstructorDict = Mapping[
443572
Union[str, bytes],
@@ -449,6 +578,9 @@ def ts_constructor(
449578
"factor": factor_constructor,
450579
"ordered": ordered_constructor,
451580
"ts": ts_constructor,
581+
"srcref": srcref_constructor,
582+
"srcfile": srcfile_constructor,
583+
"srcfilecopy": srcfilecopy_constructor,
452584
}
453585

454586
DEFAULT_CLASS_MAP = MappingProxyType(default_class_map_dict)
@@ -508,17 +640,17 @@ def __init__(
508640
constructor_dict: ConstructorDict = DEFAULT_CLASS_MAP,
509641
default_encoding: Optional[str] = None,
510642
force_default_encoding: bool = False,
511-
global_environment: Optional[StrMap] = None,
643+
global_environment: MutableMapping[str | bytes, Any] | None = None,
512644
) -> None:
513645

514646
self.constructor_dict = constructor_dict
515647
self.default_encoding = default_encoding
516648
self.force_default_encoding = force_default_encoding
517-
self.global_environment = ChainMap(
649+
self.global_environment = REnvironment(
518650
{} if global_environment is None
519651
else global_environment,
520652
)
521-
self.empty_environment: StrMap = ChainMap({})
653+
self.empty_environment: StrMap = REnvironment({})
522654

523655
self._reset()
524656

@@ -562,6 +694,20 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any:
562694
# Expand the list and process the elements
563695
value = convert_list(obj, self._convert_next)
564696

697+
elif obj.info.type == parser.RObjectType.CLO:
698+
assert obj.tag is not None
699+
environment = self._convert_next(obj.tag)
700+
formals = self._convert_next(obj.value[0])
701+
body = self._convert_next(obj.value[1])
702+
attributes = self._convert_next(obj.attributes)
703+
704+
value = RFunction(
705+
environment=environment,
706+
formals=formals,
707+
body=body,
708+
attributes=attributes,
709+
)
710+
565711
elif obj.info.type == parser.RObjectType.ENV:
566712

567713
# Return a ChainMap of the environments
@@ -573,8 +719,15 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any:
573719
# special object
574720
rlanguage_list = convert_list(obj, self._convert_next)
575721
assert isinstance(rlanguage_list, list)
722+
attributes = self._convert_next(
723+
obj.attributes,
724+
) if obj.attributes else {}
576725

577-
value = RLanguage(rlanguage_list)
726+
value = RLanguage(rlanguage_list, attributes)
727+
728+
elif obj.info.type in {parser.RObjectType.SPECIAL, parser.RObjectType.BUILTIN}:
729+
730+
value = RBuiltin(name=obj.value.decode("ascii"))
578731

579732
elif obj.info.type == parser.RObjectType.CHAR:
580733

@@ -616,12 +769,30 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any:
616769
# Convert the internal objects returning a special object
617770
value = RExpression(rexpression_list)
618771

772+
elif obj.info.type == parser.RObjectType.BCODE:
773+
774+
value = RBytecode(
775+
code=self._convert_next(obj.value[0]),
776+
constants=[self._convert_next(c) for c in obj.value[1]],
777+
attributes=attrs,
778+
)
779+
780+
elif obj.info.type == parser.RObjectType.EXTPTR:
781+
782+
value = RExternalPointer(
783+
protected=self._convert_next(obj.value[0]),
784+
tag=self._convert_next(obj.value[1]),
785+
)
786+
619787
elif obj.info.type == parser.RObjectType.S4:
620788
value = SimpleNamespace(**attrs)
621789

622790
elif obj.info.type == parser.RObjectType.EMPTYENV:
623791
value = self.empty_environment
624792

793+
elif obj.info.type == parser.RObjectType.MISSINGARG:
794+
value = NotImplemented
795+
625796
elif obj.info.type == parser.RObjectType.GLOBALENV:
626797
value = self.global_environment
627798

@@ -641,8 +812,8 @@ def _convert_next(self, data: Union[parser.RData, parser.RObject]) -> Any:
641812
else:
642813
raise NotImplementedError(f"Type {obj.info.type} not implemented")
643814

644-
if obj.info.object:
645-
classname = attrs["class"]
815+
if obj.info.object and attrs is not None:
816+
classname = attrs.get("class", ())
646817
for i, c in enumerate(classname):
647818

648819
constructor = self.constructor_dict.get(c, None)

rdata/parser/__init__.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
"""Utilities for parsing a rdata file."""
22

33
from ._parser import (
4-
DEFAULT_ALTREP_MAP,
5-
CharFlags,
6-
RData,
7-
RObject,
8-
RObjectInfo,
9-
RObjectType,
10-
parse_data,
11-
parse_file,
4+
DEFAULT_ALTREP_MAP as DEFAULT_ALTREP_MAP,
5+
CharFlags as CharFlags,
6+
RData as RData,
7+
RObject as RObject,
8+
RObjectInfo as RObjectInfo,
9+
RObjectType as RObjectType,
10+
parse_data as parse_data,
11+
parse_file as parse_file,
1212
)

0 commit comments

Comments
 (0)