You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
* fix type coercion in bmerge
* fix bracket
* add test cases
* fix lint
* fix old test case
* rename x/i class
* add minimal test
* indent loop
* add fix in one direction
* remove indent to cater for diff
* Revert "remove indent to cater for diff"
This reverts commit 562a9fd.
* remove indent
* add 2nd case
* remove trailing ws
* update all cases
* fix typo
* fix test cases
* update testcases
* update copying attributes from int to dbl
* start modularize
* fix cases
* ensure same types for test
* add test for codecov
* simplify
* fix test on windows
* simplify
* add coerce function
* modularize more
* Use gettext() on character strings directly
* rename getClass helper: mergeType
* rename: {i,x}c --> {i,x}col
I found myself wondering `ic`... "`i` character? `i` class?". Simpler to encode more info in the name
* comment ref. issue
* exchange subset with .shallow
* undo test
* Revert "undo test"
This reverts commit c9d3d74.
* update tests
* add comment
* add non right join testcase
* move helper outside bmerge
* update comment
* add NEWS
* update numbering
* tweak NEWS
---------
Co-authored-by: Michael Chirico <chiricom@google.com>
if (ans=="integer") { if (is.factor(x)) ans="factor" }
31
-
elseif (ans=="double") { if (inherits(x, "integer64")) ans="integer64" }
32
-
# do not call isReallyReal(x) yet because i) if both types are double we don't need to coerce even if one or both sides
33
-
# are int-as-double, and ii) to save calling it until we really need it
34
-
ans
35
-
}
36
-
37
49
if (nrow(i)) for (ain seq_along(icols)) {
38
50
# - check that join columns have compatible types
39
51
# - do type coercions if necessary on just the shallow local copies for the purpose of join
40
52
# - handle factor columns appropriately
41
53
# Note that if i is keyed, if this coerces i's key gets dropped by set()
42
-
ic=icols[a]
43
-
xc=xcols[a]
44
-
xclass=getClass(x[[xc]])
45
-
iclass=getClass(i[[ic]])
46
-
xname= paste0("x.", names(x)[xc])
47
-
iname= paste0("i.", names(i)[ic])
48
-
if (!xclass %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, xclass)
49
-
if (!iclass %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, iclass)
50
-
if (xclass=="factor"||iclass=="factor") {
54
+
icol=icols[a]
55
+
xcol=xcols[a]
56
+
x_merge_type=mergeType(x[[xcol]])
57
+
i_merge_type=mergeType(i[[icol]])
58
+
xname= paste0("x.", names(x)[xcol])
59
+
iname= paste0("i.", names(i)[icol])
60
+
if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type)
61
+
if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type)
62
+
if (x_merge_type=="factor"||i_merge_type=="factor") {
51
63
if (roll!=0.0&&a==length(icols))
52
64
stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname)
53
-
if (xclass=="factor"&&iclass=="factor") {
65
+
if (x_merge_type=="factor"&&i_merge_type=="factor") {
54
66
if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname)
55
-
set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values
67
+
set(i, j=icol, value=chmatch(levels(i[[icol]]), levels(x[[xcol]]), nomatch=0L)[i[[icol]]]) # nomatch=0L otherwise a level that is missing would match to NA values
56
68
next
57
69
} else {
58
-
if (xclass=="character") {
70
+
if (x_merge_type=="character") {
59
71
if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname)
60
-
set(i, j=ic, value=val<-as.character(i[[ic]]))
61
-
set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581
if (anyNA(i[[icol]])) newvalue[is.na(i[[icol]])] =NA_integer_# NA_character_ should match to NA in factor, #3809
79
+
set(i, j=icol, value=newvalue)
68
80
next
69
81
}
70
82
}
71
-
stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, xclass, iname, iclass)
83
+
stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type)
72
84
}
73
-
if (xclass==iclass) {
74
-
if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, xclass, xname)
85
+
# we check factors first to cater for the case when trying to do rolling joins on factors
86
+
if (x_merge_type==i_merge_type) {
87
+
if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname)
75
88
next
76
89
}
77
-
if (xclass=="character"||iclass=="character"||
78
-
xclass=="logical"||iclass=="logical"||
79
-
xclass=="factor"||iclass=="factor") {
80
-
if (anyNA(i[[ic]]) && allNA(i[[ic]])) {
81
-
if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, iclass, xclass, xname)
if (x_merge_type=="integer64"||i_merge_type=="integer64") {
93
104
nm= c(iname, xname)
94
-
if (xclass=="integer64") { w=i; wc=ic; wclass=iclass; } else { w=x; wc=xc; wclass=xclass; nm=rev(nm) } # w is which to coerce
105
+
if (x_merge_type=="integer64") { w=i; wc=icol; wclass=i_merge_type; } else { w=x; wc=xcol; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce
95
106
if (wclass=="integer"|| (wclass=="double"&&!isReallyReal(w[[wc]]))) {
96
107
if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)"else"", nm[2L])
97
108
set(w, j=wc, value=bit64::as.integer64(w[[wc]]))
98
109
} else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L])
99
110
} else {
100
111
# just integer and double left
101
-
if (iclass=="double") {
102
-
if (!isReallyReal(i[[ic]])) {
112
+
ic_idx= which(icol==icols) # check if on is joined on multiple conditions, #6602
113
+
if (i_merge_type=="double") {
114
+
coerce_x=FALSE
115
+
if (!isReallyReal(i[[icol]])) {
116
+
coerce_x=TRUE
103
117
# common case of ad hoc user-typed integers missing L postfix joining to correct integer keys
104
118
# we've always coerced to int and returned int, for convenience.
105
-
if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname)
106
-
val= as.integer(i[[ic]])
107
-
if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679
108
-
set(i, j=ic, value=val)
109
-
set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too.
110
-
} else {
111
-
if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname)
112
-
set(x, j=xc, value=as.double(x[[xc]]))
119
+
if (length(ic_idx)>1L) {
120
+
xc_idx=xcols[ic_idx]
121
+
for (xbinxc_idx[which(vapply_1c(.shallow(x, xc_idx), mergeType) =="double")]) {
122
+
if (isReallyReal(x[[xb]])) {
123
+
coerce_x=FALSE
124
+
break
125
+
}
126
+
}
127
+
}
128
+
if (coerce_x) {
129
+
msg=if (verbose) gettext("Coercing %s column %s (which contains no fractions) to type %s to match type of %s.\n") elseNULL
test(2070.02, DT[, .N, keyby=d, verbose=TRUE], output="Column 1.*date.*8 byte double.*no fractions are present.*4 byte integer.*to save space and time")
0 commit comments