Skip to content

Commit

Permalink
[BACKLOG-6461] Oracle repository: Impossible to create CSV datasource…
Browse files Browse the repository at this point in the history
… where column names in csv contain - and / which get converted to text, HYPHEN and DIVIDED_BY, causing column name in table to be too long [ORA-00972 - Identifier too long].
  • Loading branch information
ivanpogodin committed Apr 14, 2016
1 parent 9ebdb0f commit fa5042f
Show file tree
Hide file tree
Showing 3 changed files with 238 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,28 @@
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* Copyright (c) 2002-2013 Pentaho Corporation.. All rights reserved.
* Copyright (c) 2002-2016 Pentaho Corporation.. All rights reserved.
*/

package org.pentaho.platform.dataaccess.datasource.wizard.service.agile;

import java.io.File;
import java.sql.DatabaseMetaData;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.pentaho.di.core.database.Database;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.exception.KettleDatabaseException;
import org.pentaho.di.core.exception.KettleStepException;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMeta;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.StepErrorMeta;
import org.pentaho.di.trans.step.StepMeta;
Expand All @@ -33,8 +42,8 @@
import org.pentaho.di.trans.steps.textfileinput.TextFileInputField;
import org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo;
import org.pentaho.platform.dataaccess.datasource.wizard.models.CsvFileInfo;
import org.pentaho.platform.dataaccess.datasource.wizard.sources.csv.FileTransformStats;
import org.pentaho.platform.dataaccess.datasource.wizard.models.ModelInfo;
import org.pentaho.platform.dataaccess.datasource.wizard.sources.csv.FileTransformStats;
import org.pentaho.platform.engine.core.system.PentahoSystem;

public class CsvTransformGenerator extends StagingTransformGenerator {
Expand All @@ -45,6 +54,8 @@ public class CsvTransformGenerator extends StagingTransformGenerator {

private static final String SELECT_VALUES = "select"; //$NON-NLS-1$

private static final String CUT_LONG_NAMES = "cutLongNames"; //$NON-NLS-1$

public static final String DEFAULT_RELATIVE_UPLOAD_FILE_PATH = File.separatorChar
+ "system" + File.separatorChar + "metadata" + File.separatorChar + "csvfiles" + File.separatorChar;
//$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
Expand Down Expand Up @@ -106,6 +117,17 @@ protected StepMeta[] getSteps( TransMeta transMeta ) {
createHop(steps.get(steps.size()-2), step, transMeta);
}
*/

final int targetDatabaseMaxColumnNameLength = getMaxColumnNameLength();
if ( targetDatabaseMaxColumnNameLength > 0 ) {
StepMeta prevStep = steps.get( steps.size() - 1 );
StepMeta cutLongNamesStep = createCutLongNamesStep( transMeta, prevStep, targetDatabaseMaxColumnNameLength, CUT_LONG_NAMES );
if ( cutLongNamesStep != null ) {
steps.add( cutLongNamesStep );
createHop( prevStep, cutLongNamesStep, transMeta );
}
}

return steps.toArray( new StepMeta[ steps.size() ] );
}

Expand Down Expand Up @@ -229,4 +251,145 @@ public Log getLogger() {
return log;
}

/**
* The target database maxColumnNameLength value if available;
* 0 otherwise.
* @return
*/
protected int getMaxColumnNameLength() {
int maxLen = 0;
Database db = null;
try {
db = this.getDatabase( getTargetDatabaseMeta() );
if ( db == null ) {
log.debug( "Cannot getMaxColumnNameLength (defaults to 0): database is not available." ); //$NON-NLS-1$
return maxLen;
}
db.connect( null );
final DatabaseMetaData databaseMetaData = db.getDatabaseMetaData();
if ( databaseMetaData == null ) {
log.debug( "Cannot getMaxColumnNameLength (defaults to 0): database metadata are not available." ); //$NON-NLS-1$
return maxLen;
}
maxLen = databaseMetaData.getMaxColumnNameLength();
} catch ( KettleDatabaseException e ) {
log.debug( "Cannot getMaxColumnNameLength (defaults to 0): " + e.getMessage(), e ); //$NON-NLS-1$
} catch ( SQLException e ) {
log.debug( "Cannot getMaxColumnNameLength (defaults to 0): " + e.getMessage(), e ); //$NON-NLS-1$
} finally {
if ( db != null ) {
db.disconnect();
}
}
return maxLen;
}

/**
* This step scans output fields of <code>prevStepMeta</code>,
*
* cut field names that longer than <code>maxColumnNameLength</code>,
*
* renames them if necessary to keep them unique.
* <br/>
* If all field names are short enough, the step is not created;
* @param transMeta
* @param prevStepMeta
* @param maxColumnNameLength
* @param stepName
* @return created {@link StepMeta} or null
*/
protected StepMeta createCutLongNamesStep( TransMeta transMeta, StepMeta prevStepMeta, int maxColumnNameLength, String stepName ) {
try {
RowMetaInterface fields = transMeta.getStepFields( prevStepMeta );
StepMeta stepMeta = createCutLongNamesStep( fields, maxColumnNameLength, stepName );
if ( stepMeta != null ) {
transMeta.addStep( stepMeta );
}
return stepMeta;
} catch ( KettleStepException e ) {
log.debug( "Unable to createCutLongNamesStep", e );
}
return null;
}

/**
*
* @param fields
* @param maxColumnNameLength
* @param stepName
* @return
*/
protected StepMeta createCutLongNamesStep( RowMetaInterface fields, int maxColumnNameLength, String stepName ) {
final int fieldsCount = fields.size();

SelectValuesMeta meta = new SelectValuesMeta();
List<String> selectNameList = new ArrayList<String>( fieldsCount );
List<String> selectRenameList = new ArrayList<String>( fieldsCount );
List<Integer> selectLengthList = new ArrayList<Integer>( fieldsCount );
List<Integer> selectPrecisionList = new ArrayList<Integer>( fieldsCount );
final Collection<String> controlNames;
if ( fieldsCount < 32 ) {
controlNames = new ArrayList<String>( fieldsCount ); // light and fast with few items
} else {
controlNames = new HashSet<String>(); // heavy but fast with a lot of items
}
boolean renameRequired = false;
for ( ValueMetaInterface valueMeta : fields.getValueMetaList() ) {
final String oldName = valueMeta.getName();
selectNameList.add( oldName );
String newName = oldName;
if ( newName.length() > maxColumnNameLength ) {
renameRequired = true;
newName = newName.substring( 0, maxColumnNameLength );
}
if ( controlNames.contains( newName.toLowerCase() ) ) {
renameRequired = true;
newName = null;
String candidateName = null;
final int maxAppendableSuffixLength = maxColumnNameLength - oldName.length();
for ( int j = 1; newName == null && j < Integer.MAX_VALUE; j++ ) {
String suffix = "_" + j;
if ( suffix.length() > maxColumnNameLength ) {
throw new RuntimeException( "Cannot cut field name. Maximum suffix length is exceeded" ); //$NON-NLS-1$
}
if ( suffix.length() <= maxAppendableSuffixLength ) {
candidateName = oldName + suffix;
} else {
candidateName = oldName.substring( 0, maxColumnNameLength - suffix.length() ) + suffix;
}
if ( !controlNames.contains( candidateName.toLowerCase() ) ) {
newName = candidateName;
}
}
if ( newName == null ) {
throw new RuntimeException( "Cannot cut field name. Maximum trials number is reached." ); //$NON-NLS-1$
}
}
controlNames.add( newName.toLowerCase() );
selectRenameList.add( newName );
selectLengthList.add( valueMeta.getLength() );
selectPrecisionList.add( valueMeta.getPrecision() );
}
if ( !renameRequired ) {
return null;
}
String[] selectName = selectNameList.toArray( new String[ selectNameList.size() ] );
meta.setSelectName( selectName );
String[] selectRename = selectRenameList.toArray( new String[ selectRenameList.size() ] );
meta.setSelectRename( selectRename );

int[] selectLength = new int[ selectLengthList.size() ];
int[] selectPrecision = new int[ selectPrecisionList.size() ];
for ( int i = 0; i < selectLength.length; i++ ) {
selectLength[ i ] = selectLengthList.get( i );
}
for ( int i = 0; i < selectPrecision.length; i++ ) {
selectPrecision[ i ] = selectPrecisionList.get( i );
}
meta.setSelectLength( selectLength );
meta.setSelectPrecision( selectPrecision );

StepMeta stepMeta = new StepMeta( stepName, stepName, meta );
return stepMeta;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -690,4 +690,8 @@ private boolean checkTableExists( String tableName ) throws CsvTransformGenerato
db.disconnect();
}
}

DatabaseMeta getTargetDatabaseMeta() {
return targetDatabaseMeta;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*!
* This program is free software; you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License, version 2.1 as published by the Free Software
* Foundation.
*
* You should have received a copy of the GNU Lesser General Public License along with this
* program; if not, you can obtain a copy at http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html
* or from the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* Copyright (c) 2002-2016 Pentaho Corporation.. All rights reserved.
*/
package org.pentaho.platform.dataaccess.datasource.wizard.service.agile;

import org.junit.Assert;
import org.junit.Test;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.value.ValueMetaInteger;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
import org.pentaho.di.trans.steps.selectvalues.SelectValuesMeta;
import org.pentaho.platform.dataaccess.datasource.wizard.models.ModelInfo;

public class CsvTransformGeneratorTest {

@Test
public void testCreateCutLongNamesStep_long() {
CsvTransformGenerator ctg = new CsvTransformGenerator( new ModelInfo(), null );
int maxColumnNameLength = 8;
String stepName = "TEST_STEP_CutLongNames";
RowMetaInterface fields = new RowMeta();
String[] fieldNames = new String[] {"a", "b", "A_1", "b_1", "LONGlonglong", "longlonglong_again", "a_2", };
String[] fieldRenames = new String[] {"a", "b", "A_1", "b_1", "LONGlong", "longlo_1", "a_2"};
for ( int i = 0; i < fieldNames.length; i++ ) {
fields.addValueMeta( new ValueMetaInteger( fieldNames[i] ) );
}
StepMeta step = ctg.createCutLongNamesStep( fields, maxColumnNameLength, stepName );
Assert.assertNotNull( "step", step );
Assert.assertEquals( "step name", stepName, step.getName() );
StepMetaInterface stepMetaIntegrface = step.getStepMetaInterface();
Assert.assertNotNull( "stepMetaIntegrface", stepMetaIntegrface );
Assert.assertTrue( "stepMetaIntegrface instanceof SelectValuesMeta", stepMetaIntegrface instanceof SelectValuesMeta );
SelectValuesMeta svm = (SelectValuesMeta) stepMetaIntegrface;
String[] selectName = svm.getSelectName();
Assert.assertArrayEquals( "selectName", fieldNames, selectName );
String[] selectRename = svm.getSelectRename();
Assert.assertArrayEquals( "selectName", fieldRenames, selectRename );
}

@Test
public void testCreateCutLongNamesStep_short() {
CsvTransformGenerator ctg = new CsvTransformGenerator( new ModelInfo(), null );
int maxColumnNameLength = 18;
String stepName = "TEST_STEP_CutLongNames";
RowMetaInterface fields = new RowMeta();
String[] fieldNames = new String[] {"a", "b", "A_1", "b_1", "LONGlonglong", "longlonglong_again", "a_2", };
for ( int i = 0; i < fieldNames.length; i++ ) {
fields.addValueMeta( new ValueMetaInteger( fieldNames[i] ) );
}
StepMeta step = ctg.createCutLongNamesStep( fields, maxColumnNameLength, stepName );
Assert.assertNull( "step", step );
}

}

0 comments on commit fa5042f

Please sign in to comment.