|
28 | 28 | import java.io.IOException; |
29 | 29 | import java.io.StringReader; |
30 | 30 | import java.util.Collection; |
| 31 | +import java.util.HashMap; |
| 32 | +import java.util.Map; |
31 | 33 |
|
32 | 34 | /** |
33 | 35 | * @author pavlidis |
@@ -185,24 +187,109 @@ public static boolean isLatinLetter( char c ) { |
185 | 187 | } |
186 | 188 |
|
187 | 189 | /** |
188 | | - * Mimics the make.names method in R (character.c) to make valid variables names; we use this for column headers in |
189 | | - * some output files. This doesn't give the exact sames results as R; we avoid repeated '.'. |
| 190 | + * Mimics the {@code make.names} method in R (character.c) to make valid variables names; we use this for column |
| 191 | + * headers in some output files. |
| 192 | + * <p> |
| 193 | + * This was modified in 1.1.26 to match the behavior of R more closely, if not exactly. |
190 | 194 | * |
191 | | - * @param s |
| 195 | + * @param s a string to be made valid for R |
192 | 196 | * @return modified string |
193 | 197 | * @author paul |
| 198 | + * @deprecated use {@link #makeNames(String[], boolean)} instead |
194 | 199 | */ |
195 | 200 | public static String makeValidForR( String s ) { |
| 201 | + return makeNames( s ); |
| 202 | + } |
196 | 203 |
|
197 | | - // If string starts with a digit or "." and then a digit, prepend an X. |
198 | | - if ( s.matches( "^\\.?[0-9].+" ) ) { |
199 | | - s = "X" + s; |
| 204 | + /** |
| 205 | + * Mimics the {@code make.names} method in R when using with a vector of strings and the unique argument set to TRUE. |
| 206 | + * @author poirigui |
| 207 | + * @deprecated use {@link #makeNames(String[], boolean)} instead |
| 208 | + */ |
| 209 | + @Deprecated |
| 210 | + public static String[] makeValidForR( String[] strings ) { |
| 211 | + return makeNames( strings, true ); |
| 212 | + } |
| 213 | + |
| 214 | + /** |
| 215 | + * Mimics the {@code make.names} method in R. |
| 216 | + * @param strings a list of strings to be made valid for R |
| 217 | + * @param unique if true, will ensure that the names are unique by appending a number to duplicates as per |
| 218 | + * {@link #makeUnique(String[])} |
| 219 | + * @author poirigui |
| 220 | + */ |
| 221 | + public static String[] makeNames( String[] strings, boolean unique ) { |
| 222 | + String[] result = new String[strings.length]; |
| 223 | + if ( unique ) { |
| 224 | + Map<String, Integer> counts = new HashMap<>(); |
| 225 | + for ( int i = 0; i < strings.length; i++ ) { |
| 226 | + String s = strings[i]; |
| 227 | + String rs = makeNames( s ); |
| 228 | + if ( counts.containsKey( rs ) ) { |
| 229 | + int count = counts.get( rs ); |
| 230 | + result[i] = rs + "." + count; |
| 231 | + counts.put( rs, count + 1 ); |
| 232 | + } else { |
| 233 | + result[i] = rs; |
| 234 | + counts.put( rs, 1 ); |
| 235 | + } |
| 236 | + } |
| 237 | + } else { |
| 238 | + for ( int i = 0; i < strings.length; i++ ) { |
| 239 | + result[i] = makeNames( strings[i] ); |
| 240 | + } |
200 | 241 | } |
| 242 | + return result; |
| 243 | + } |
| 244 | + |
| 245 | + private static final String[] R_RESERVED_WORDS = { |
| 246 | + "if", "else", "repeat", "while", "function", "for", "in", "next", "break", |
| 247 | + "TRUE", "FALSE", "NULL", "Inf", "NaN", "NA", "NA_integer_", "NA_real_", "NA_character_", "NA_complex_", |
| 248 | + }; |
201 | 249 |
|
202 | | - // TODO: check for reserved words. https://stat.ethz.ch/R-manual/R-devel/library/base/html/Reserved.html |
| 250 | + /** |
| 251 | + * Mimics the {@code make.names} method in R for a single string. |
| 252 | + * @author paul |
| 253 | + */ |
| 254 | + public static String makeNames( String s ) { |
| 255 | + if ( s == null ) { |
| 256 | + return "NA"; |
| 257 | + } |
| 258 | + if ( s.isEmpty() |
| 259 | + // starts with a non-letter or non-dot |
| 260 | + || ( !Character.isAlphabetic( s.charAt( 0 ) ) && s.charAt( 0 ) != '.' ) |
| 261 | + // dot followed by a digit |
| 262 | + || ( s.charAt( 0 ) == '.' && s.length() > 1 && Character.isDigit( s.charAt( 1 ) ) ) ) { |
| 263 | + return "X" + s.replaceAll( "[^A-Za-z0-9._]", "." ); |
| 264 | + } |
| 265 | + if ( StringUtils.equalsAny( s, R_RESERVED_WORDS ) ) { |
| 266 | + return s + "."; |
| 267 | + } |
| 268 | + return s.replaceAll( "[^A-Za-z0-9._]", "." ); |
| 269 | + } |
| 270 | + |
| 271 | + /** |
| 272 | + * Mimics the {@code make.unique} method in R. |
| 273 | + * <p> |
| 274 | + * Duplicated values in the input array will be suffixed with a dot and a number, starting from 1. |
| 275 | + * @author poirigui |
| 276 | + */ |
| 277 | + public static String[] makeUnique( String[] strings ) { |
| 278 | + Map<String, Integer> counts = new HashMap<>(); |
| 279 | + String[] result = new String[strings.length]; |
| 280 | + for ( int i = 0; i < strings.length; i++ ) { |
| 281 | + String cn = strings[i]; |
| 282 | + if ( counts.containsKey( cn ) ) { |
| 283 | + int count = counts.get( cn ); |
| 284 | + result[i] = cn + "." + count; |
| 285 | + counts.put( cn, count + 1 ); |
| 286 | + } else { |
| 287 | + result[i] = cn; |
| 288 | + counts.put( cn, 1 ); |
| 289 | + } |
| 290 | + } |
| 291 | + return result; |
203 | 292 |
|
204 | | - // no dashes or white space or other punctuation. '.' is okay and so is "_", now. |
205 | | - return s.replaceAll( "[\\W]+", "." ); |
206 | 293 | } |
207 | 294 |
|
208 | 295 | /** |
|
0 commit comments