Skip to content

Commit cd69193

Browse files
committed
Merge branch 'hotfix-1.1.26'
2 parents 9d69acb + e79be41 commit cd69193

File tree

3 files changed

+128
-33
lines changed

3 files changed

+128
-33
lines changed

pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<name>baseCode</name>
66
<groupId>baseCode</groupId>
77
<artifactId>baseCode</artifactId>
8-
<version>1.1.25</version>
8+
<version>1.1.26</version>
99
<inceptionYear>2003</inceptionYear>
1010
<description>
1111
<![CDATA[Data structures, math and statistics tools, and utilities that are often needed across projects.]]>
@@ -294,7 +294,7 @@
294294
<dependency>
295295
<groupId>org.mockito</groupId>
296296
<artifactId>mockito-core</artifactId>
297-
<version>5.15.2</version>
297+
<version>5.17.0</version>
298298
<scope>test</scope>
299299
</dependency>
300300
<dependency>

src/ubic/basecode/util/StringUtil.java

Lines changed: 96 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
import java.io.IOException;
2929
import java.io.StringReader;
3030
import java.util.Collection;
31+
import java.util.HashMap;
32+
import java.util.Map;
3133

3234
/**
3335
* @author pavlidis
@@ -185,24 +187,109 @@ public static boolean isLatinLetter( char c ) {
185187
}
186188

187189
/**
188-
* Mimics the make.names method in R (character.c) to make valid variables names; we use this for column headers in
189-
* some output files. This doesn't give the exact sames results as R; we avoid repeated '.'.
190+
* Mimics the {@code make.names} method in R (character.c) to make valid variables names; we use this for column
191+
* headers in some output files.
192+
* <p>
193+
* This was modified in 1.1.26 to match the behavior of R more closely, if not exactly.
190194
*
191-
* @param s
195+
* @param s a string to be made valid for R
192196
* @return modified string
193197
* @author paul
198+
* @deprecated use {@link #makeNames(String[], boolean)} instead
194199
*/
195200
public static String makeValidForR( String s ) {
201+
return makeNames( s );
202+
}
196203

197-
// If string starts with a digit or "." and then a digit, prepend an X.
198-
if ( s.matches( "^\\.?[0-9].+" ) ) {
199-
s = "X" + s;
204+
/**
205+
* Mimics the {@code make.names} method in R when using with a vector of strings and the unique argument set to TRUE.
206+
* @author poirigui
207+
* @deprecated use {@link #makeNames(String[], boolean)} instead
208+
*/
209+
@Deprecated
210+
public static String[] makeValidForR( String[] strings ) {
211+
return makeNames( strings, true );
212+
}
213+
214+
/**
215+
* Mimics the {@code make.names} method in R.
216+
* @param strings a list of strings to be made valid for R
217+
* @param unique if true, will ensure that the names are unique by appending a number to duplicates as per
218+
* {@link #makeUnique(String[])}
219+
* @author poirigui
220+
*/
221+
public static String[] makeNames( String[] strings, boolean unique ) {
222+
String[] result = new String[strings.length];
223+
if ( unique ) {
224+
Map<String, Integer> counts = new HashMap<>();
225+
for ( int i = 0; i < strings.length; i++ ) {
226+
String s = strings[i];
227+
String rs = makeNames( s );
228+
if ( counts.containsKey( rs ) ) {
229+
int count = counts.get( rs );
230+
result[i] = rs + "." + count;
231+
counts.put( rs, count + 1 );
232+
} else {
233+
result[i] = rs;
234+
counts.put( rs, 1 );
235+
}
236+
}
237+
} else {
238+
for ( int i = 0; i < strings.length; i++ ) {
239+
result[i] = makeNames( strings[i] );
240+
}
200241
}
242+
return result;
243+
}
244+
245+
private static final String[] R_RESERVED_WORDS = {
246+
"if", "else", "repeat", "while", "function", "for", "in", "next", "break",
247+
"TRUE", "FALSE", "NULL", "Inf", "NaN", "NA", "NA_integer_", "NA_real_", "NA_character_", "NA_complex_",
248+
};
201249

202-
// TODO: check for reserved words. https://stat.ethz.ch/R-manual/R-devel/library/base/html/Reserved.html
250+
/**
251+
* Mimics the {@code make.names} method in R for a single string.
252+
* @author paul
253+
*/
254+
public static String makeNames( String s ) {
255+
if ( s == null ) {
256+
return "NA";
257+
}
258+
if ( s.isEmpty()
259+
// starts with a non-letter or non-dot
260+
|| ( !Character.isAlphabetic( s.charAt( 0 ) ) && s.charAt( 0 ) != '.' )
261+
// dot followed by a digit
262+
|| ( s.charAt( 0 ) == '.' && s.length() > 1 && Character.isDigit( s.charAt( 1 ) ) ) ) {
263+
return "X" + s.replaceAll( "[^A-Za-z0-9._]", "." );
264+
}
265+
if ( StringUtils.equalsAny( s, R_RESERVED_WORDS ) ) {
266+
return s + ".";
267+
}
268+
return s.replaceAll( "[^A-Za-z0-9._]", "." );
269+
}
270+
271+
/**
272+
* Mimics the {@code make.unique} method in R.
273+
* <p>
274+
* Duplicated values in the input array will be suffixed with a dot and a number, starting from 1.
275+
* @author poirigui
276+
*/
277+
public static String[] makeUnique( String[] strings ) {
278+
Map<String, Integer> counts = new HashMap<>();
279+
String[] result = new String[strings.length];
280+
for ( int i = 0; i < strings.length; i++ ) {
281+
String cn = strings[i];
282+
if ( counts.containsKey( cn ) ) {
283+
int count = counts.get( cn );
284+
result[i] = cn + "." + count;
285+
counts.put( cn, count + 1 );
286+
} else {
287+
result[i] = cn;
288+
counts.put( cn, 1 );
289+
}
290+
}
291+
return result;
203292

204-
// no dashes or white space or other punctuation. '.' is okay and so is "_", now.
205-
return s.replaceAll( "[\\W]+", "." );
206293
}
207294

208295
/**

test/ubic/basecode/util/StringUtilTest.java

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
/*
22
* The baseCode project
3-
*
3+
*
44
* Copyright (c) 2006 University of British Columbia
5-
*
5+
*
66
* Licensed under the Apache License, Version 2.0 (the "License");
77
* you may not use this file except in compliance with the License.
88
* You may obtain a copy of the License at
@@ -18,17 +18,16 @@
1818
*/
1919
package ubic.basecode.util;
2020

21-
import static org.junit.Assert.assertEquals;
22-
import static org.junit.Assert.assertNull;
21+
import org.junit.Test;
2322

2423
import java.util.Collection;
2524
import java.util.HashSet;
2625

27-
import org.junit.Test;
26+
import static org.junit.Assert.*;
2827

2928
/**
3029
* @author pavlidis
31-
*
30+
*
3231
*/
3332
public class StringUtilTest {
3433

@@ -73,25 +72,34 @@ public void testCommonSuffixNone() {
7372
test.add( "aaaab" );
7473
test.add( "aaaacb" );
7574
String suf = StringUtil.commonSuffix( test );
76-
assertEquals( null, suf );
75+
assertNull( suf );
7776
}
7877

7978
@Test
80-
public void testMakeRnames() {
81-
String actual = StringUtil.makeValidForR( "f33oo dd . [f] a" );
82-
assertEquals( "f33oo.dd.f.a", actual );
83-
84-
actual = StringUtil.makeValidForR( ".f33oo" );
85-
assertEquals( ".f33oo", actual );
86-
87-
actual = StringUtil.makeValidForR( "...f33oo" );
88-
assertEquals( ".f33oo", actual );
89-
90-
actual = StringUtil.makeValidForR( "1foo dd . [f] a" );
91-
assertEquals( "X1foo.dd.f.a", actual );
92-
93-
actual = StringUtil.makeValidForR( ".1foo dd . [f] a" );
94-
assertEquals( "X.1foo.dd.f.a", actual );
79+
public void testMakeNames() {
80+
assertFalse( Character.isDigit( '.' ) );
81+
assertEquals( "NA", StringUtil.makeNames( null ) );
82+
assertEquals( "test", StringUtil.makeNames( "test" ) );
83+
assertEquals( "X", StringUtil.makeNames( "X" ) );
84+
assertEquals( "X123", StringUtil.makeNames( "123" ) );
85+
assertEquals( "X", StringUtil.makeNames( "" ) );
86+
assertEquals( "X..", StringUtil.makeNames( " " ) );
87+
assertEquals( "if.", StringUtil.makeNames( "if" ) );
88+
assertEquals( "TRUE.", StringUtil.makeNames( "TRUE" ) );
89+
assertEquals( "...", StringUtil.makeNames( "..." ) );
90+
assertEquals( "..", StringUtil.makeNames( ". " ) );
91+
assertEquals( "X.2way", StringUtil.makeNames( ".2way" ) );
92+
assertEquals( "f33oo.dd....f..a", StringUtil.makeNames( "f33oo dd . [f] a" ) );
93+
assertEquals( ".f33oo", StringUtil.makeNames( ".f33oo" ) );
94+
assertEquals( "...f33oo", StringUtil.makeNames( "...f33oo" ) );
95+
assertEquals( "X1foo.dd....f..a", StringUtil.makeNames( "1foo dd . [f] a" ) );
96+
assertEquals( "X.1foo.dd....f..a", StringUtil.makeNames( ".1foo dd . [f] a" ) );
97+
assertArrayEquals( new String[] { "foo", "foo.1", "foo.2", "bar" }, StringUtil.makeNames( new String[] { "foo", "foo", "foo", "bar" }, true ) );
9598
}
9699

100+
@Test
101+
public void testMakeUnique() {
102+
assertArrayEquals( new String[] { "foo", "foo.1" }, StringUtil.makeUnique( new String[] { "foo", "foo" } ) );
103+
assertArrayEquals( new String[] { "foo", "bar", "foo.1", "foo.2" }, StringUtil.makeUnique( new String[] { "foo", "bar", "foo", "foo" } ) );
104+
}
97105
}

0 commit comments

Comments
 (0)