use of org.pentaho.di.core.row.RowMeta in project pentaho-kettle by pentaho.
the class TextFileCSVImportProgressDialog method doScan.
private String doScan(IProgressMonitor monitor) throws KettleException {
if (samples > 0) {
monitor.beginTask(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Task.ScanningFile"), samples + 1);
} else {
monitor.beginTask(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Task.ScanningFile"), 2);
}
String line = "";
long fileLineNumber = 0;
DecimalFormatSymbols dfs = new DecimalFormatSymbols();
int nrfields = meta.inputFields.length;
RowMetaInterface outputRowMeta = new RowMeta();
meta.getFields(outputRowMeta, null, null, null, transMeta, null, null);
// Remove the storage meta-data (don't go for lazy conversion during scan)
for (ValueMetaInterface valueMeta : outputRowMeta.getValueMetaList()) {
valueMeta.setStorageMetadata(null);
valueMeta.setStorageType(ValueMetaInterface.STORAGE_TYPE_NORMAL);
}
RowMetaInterface convertRowMeta = outputRowMeta.cloneToType(ValueMetaInterface.TYPE_STRING);
// How many null values?
// How many times null value?
int[] nrnull = new int[nrfields];
// String info
// min string
String[] minstr = new String[nrfields];
// max string
String[] maxstr = new String[nrfields];
// first occ. of string?
boolean[] firststr = new boolean[nrfields];
// Date info
// is the field perhaps a Date?
boolean[] isDate = new boolean[nrfields];
// How many date formats work?
int[] dateFormatCount = new int[nrfields];
// What are the date formats that
boolean[][] dateFormat = new boolean[nrfields][Const.getDateFormats().length];
// work?
// min date value
Date[][] minDate = new Date[nrfields][Const.getDateFormats().length];
// max date value
Date[][] maxDate = new Date[nrfields][Const.getDateFormats().length];
// Number info
// is the field perhaps a Number?
boolean[] isNumber = new boolean[nrfields];
// How many number formats work?
int[] numberFormatCount = new int[nrfields];
// What are the number format
boolean[][] numberFormat = new boolean[nrfields][Const.getNumberFormats().length];
// that work?
// min number value
double[][] minValue = new double[nrfields][Const.getDateFormats().length];
// max number value
double[][] maxValue = new double[nrfields][Const.getDateFormats().length];
// remember the precision?
int[][] numberPrecision = new int[nrfields][Const.getNumberFormats().length];
// remember the length?
int[][] numberLength = new int[nrfields][Const.getNumberFormats().length];
for (int i = 0; i < nrfields; i++) {
BaseFileField field = meta.inputFields[i];
if (log.isDebug()) {
debug = "init field #" + i;
}
if (replaceMeta) {
// Clear previous info...
field.setName(meta.inputFields[i].getName());
field.setType(meta.inputFields[i].getType());
field.setFormat("");
field.setLength(-1);
field.setPrecision(-1);
field.setCurrencySymbol(dfs.getCurrencySymbol());
field.setDecimalSymbol("" + dfs.getDecimalSeparator());
field.setGroupSymbol("" + dfs.getGroupingSeparator());
field.setNullString("-");
field.setTrimType(ValueMetaInterface.TRIM_TYPE_NONE);
}
nrnull[i] = 0;
minstr[i] = "";
maxstr[i] = "";
firststr[i] = true;
// Init data guess
isDate[i] = true;
for (int j = 0; j < Const.getDateFormats().length; j++) {
dateFormat[i][j] = true;
minDate[i][j] = Const.MAX_DATE;
maxDate[i][j] = Const.MIN_DATE;
}
dateFormatCount[i] = Const.getDateFormats().length;
// Init number guess
isNumber[i] = true;
for (int j = 0; j < Const.getNumberFormats().length; j++) {
numberFormat[i][j] = true;
minValue[i][j] = Double.MAX_VALUE;
maxValue[i][j] = -Double.MAX_VALUE;
numberPrecision[i][j] = -1;
numberLength[i][j] = -1;
}
numberFormatCount[i] = Const.getNumberFormats().length;
}
TextFileInputMeta strinfo = (TextFileInputMeta) meta.clone();
for (int i = 0; i < nrfields; i++) {
strinfo.inputFields[i].setType(ValueMetaInterface.TYPE_STRING);
}
// Sample <samples> rows...
debug = "get first line";
StringBuilder lineBuffer = new StringBuilder(256);
int fileFormatType = meta.getFileFormatTypeNr();
// If the file has a header we overwrite the first line
// However, if it doesn't have a header, take a new line
//
line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineBuffer);
fileLineNumber++;
int skipped = 1;
if (meta.content.header) {
while (line != null && skipped < meta.content.nrHeaderLines) {
line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineBuffer);
skipped++;
fileLineNumber++;
}
}
int linenr = 1;
List<StringEvaluator> evaluators = new ArrayList<StringEvaluator>();
// Allocate number and date parsers
DecimalFormat df2 = (DecimalFormat) NumberFormat.getInstance();
DecimalFormatSymbols dfs2 = new DecimalFormatSymbols();
SimpleDateFormat daf2 = new SimpleDateFormat();
boolean errorFound = false;
while (!errorFound && line != null && (linenr <= samples || samples == 0) && !monitor.isCanceled()) {
monitor.subTask(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Task.ScanningLine", "" + linenr));
if (samples > 0) {
monitor.worked(1);
}
if (log.isDebug()) {
debug = "convert line #" + linenr + " to row";
}
RowMetaInterface rowMeta = new RowMeta();
meta.getFields(rowMeta, "stepname", null, null, transMeta, null, null);
// Remove the storage meta-data (don't go for lazy conversion during scan)
for (ValueMetaInterface valueMeta : rowMeta.getValueMetaList()) {
valueMeta.setStorageMetadata(null);
valueMeta.setStorageType(ValueMetaInterface.STORAGE_TYPE_NORMAL);
}
String delimiter = transMeta.environmentSubstitute(meta.content.separator);
String enclosure = transMeta.environmentSubstitute(meta.content.enclosure);
String escapeCharacter = transMeta.environmentSubstitute(meta.content.escapeCharacter);
Object[] r = TextFileInputUtils.convertLineToRow(log, new TextFileLine(line, fileLineNumber, null), strinfo, null, 0, outputRowMeta, convertRowMeta, FileInputList.createFilePathList(transMeta, meta.inputFiles.fileName, meta.inputFiles.fileMask, meta.inputFiles.excludeFileMask, meta.inputFiles.fileRequired, meta.inputFiles.includeSubFolderBoolean())[0], rownumber, delimiter, enclosure, escapeCharacter, null, new BaseFileInputAdditionalField(), null, null, false, null, null, null, null, null);
if (r == null) {
errorFound = true;
continue;
}
rownumber++;
for (int i = 0; i < nrfields && i < r.length; i++) {
StringEvaluator evaluator;
if (i >= evaluators.size()) {
evaluator = new StringEvaluator(true);
evaluators.add(evaluator);
} else {
evaluator = evaluators.get(i);
}
String string = rowMeta.getString(r, i);
if (i == 0) {
System.out.println();
}
evaluator.evaluateString(string);
}
fileLineNumber++;
if (r != null) {
linenr++;
}
// Grab another line...
//
line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineBuffer);
}
monitor.worked(1);
monitor.setTaskName(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Task.AnalyzingResults"));
// Show information on items using a dialog box
//
StringBuilder message = new StringBuilder();
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.ResultAfterScanning", "" + (linenr - 1)));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.HorizontalLine"));
for (int i = 0; i < nrfields; i++) {
BaseFileField field = meta.inputFields[i];
StringEvaluator evaluator = evaluators.get(i);
List<StringEvaluationResult> evaluationResults = evaluator.getStringEvaluationResults();
//
if (evaluationResults.isEmpty()) {
field.setType(ValueMetaInterface.TYPE_STRING);
field.setLength(evaluator.getMaxLength());
} else {
StringEvaluationResult result = evaluator.getAdvicedResult();
if (result != null) {
// Take the first option we find, list the others below...
//
ValueMetaInterface conversionMeta = result.getConversionMeta();
field.setType(conversionMeta.getType());
field.setTrimType(conversionMeta.getTrimType());
field.setFormat(conversionMeta.getConversionMask());
field.setDecimalSymbol(conversionMeta.getDecimalSymbol());
field.setGroupSymbol(conversionMeta.getGroupingSymbol());
field.setLength(conversionMeta.getLength());
field.setPrecision(conversionMeta.getPrecision());
nrnull[i] = result.getNrNull();
minstr[i] = result.getMin() == null ? "" : result.getMin().toString();
maxstr[i] = result.getMax() == null ? "" : result.getMax().toString();
}
}
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.FieldNumber", "" + (i + 1)));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.FieldName", field.getName()));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.FieldType", field.getTypeDesc()));
switch(field.getType()) {
case ValueMetaInterface.TYPE_NUMBER:
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.EstimatedLength", (field.getLength() < 0 ? "-" : "" + field.getLength())));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.EstimatedPrecision", field.getPrecision() < 0 ? "-" : "" + field.getPrecision()));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.NumberFormat", field.getFormat()));
if (!evaluationResults.isEmpty()) {
if (evaluationResults.size() > 1) {
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.WarnNumberFormat"));
}
for (StringEvaluationResult seResult : evaluationResults) {
String mask = seResult.getConversionMeta().getConversionMask();
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.NumberFormat2", mask));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.TrimType", seResult.getConversionMeta().getTrimType()));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.NumberMinValue", seResult.getMin()));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.NumberMaxValue", seResult.getMax()));
try {
df2.applyPattern(mask);
df2.setDecimalFormatSymbols(dfs2);
double mn = df2.parse(seResult.getMin().toString()).doubleValue();
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.NumberExample", mask, seResult.getMin(), Double.toString(mn)));
} catch (Exception e) {
if (log.isDetailed()) {
log.logDetailed("This is unexpected: parsing [" + seResult.getMin() + "] with format [" + mask + "] did not work.");
}
}
}
}
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.NumberNrNullValues", "" + nrnull[i]));
break;
case ValueMetaInterface.TYPE_STRING:
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.StringMaxLength", "" + field.getLength()));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.StringMinValue", minstr[i]));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.StringMaxValue", maxstr[i]));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.StringNrNullValues", "" + nrnull[i]));
break;
case ValueMetaInterface.TYPE_DATE:
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateMaxLength", field.getLength() < 0 ? "-" : "" + field.getLength()));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateFormat", field.getFormat()));
if (dateFormatCount[i] > 1) {
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.WarnDateFormat"));
}
if (!Utils.isEmpty(minstr[i])) {
for (int x = 0; x < Const.getDateFormats().length; x++) {
if (dateFormat[i][x]) {
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateFormat2", Const.getDateFormats()[x]));
Date mindate = minDate[i][x];
Date maxdate = maxDate[i][x];
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateMinValue", mindate.toString()));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateMaxValue", maxdate.toString()));
daf2.applyPattern(Const.getDateFormats()[x]);
try {
Date md = daf2.parse(minstr[i]);
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateExample", Const.getDateFormats()[x], minstr[i], md.toString()));
} catch (Exception e) {
if (log.isDetailed()) {
log.logDetailed("This is unexpected: parsing [" + minstr[i] + "] with format [" + Const.getDateFormats()[x] + "] did not work.");
}
}
}
}
}
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateNrNullValues", "" + nrnull[i]));
break;
default:
break;
}
if (nrnull[i] == linenr - 1) {
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.AllNullValues"));
}
message.append(Const.CR);
}
monitor.worked(1);
monitor.done();
return message.toString();
}
use of org.pentaho.di.core.row.RowMeta in project pentaho-kettle by pentaho.
the class KettleDatabaseRepositoryConnectionDelegate method getOneRow.
/**
* This method should be called WITH AN ALREADY QUOTED schema and table
*/
public RowMetaAndData getOneRow(String schemaAndTable, String keyfield, ObjectId id) throws KettleException {
String sql = "SELECT * FROM " + schemaAndTable + " WHERE " + keyfield + " = ?";
// Get the prepared statement
//
PreparedStatement ps = getPreparedStatement(sql);
// Assemble the parameter (if any)
//
RowMetaInterface parameterMeta = new RowMeta();
parameterMeta.addValueMeta(new ValueMetaInteger("id"));
Object[] parameterData = new Object[] { id != null ? Long.parseLong(id.getId()) : null };
try {
return callRead(new Callable<RowMetaAndData>() {
@Override
public RowMetaAndData call() throws Exception {
ResultSet resultSet = null;
resultSet = database.openQuery(ps, parameterMeta, parameterData);
Object[] result = database.getRow(resultSet);
if (resultSet != null) {
database.closeQuery(resultSet);
}
if (result == null) {
return new RowMetaAndData(database.getReturnRowMeta(), RowDataUtil.allocateRowData(database.getReturnRowMeta().size()));
}
return new RowMetaAndData(database.getReturnRowMeta(), result);
}
});
} catch (KettleException e) {
throw e;
}
}
use of org.pentaho.di.core.row.RowMeta in project pentaho-kettle by pentaho.
the class KettleDatabaseRepositoryConnectionDelegate method getParameterMetaData.
public RowMetaAndData getParameterMetaData(ObjectId... ids) throws KettleException {
RowMetaInterface parameterMeta = new RowMeta();
Object[] parameterData = new Object[ids.length];
for (int i = 0; i < ids.length; i++) {
parameterMeta.addValueMeta(new ValueMetaInteger("id" + (i + 1)));
parameterData[i] = Long.valueOf(ids[i].getId());
}
return new RowMetaAndData(parameterMeta, parameterData);
}
use of org.pentaho.di.core.row.RowMeta in project pentaho-kettle by pentaho.
the class KettleDatabaseRepositoryConnectionDelegate method getIDs.
public ObjectId[] getIDs(String sql, ObjectId... objectId) throws KettleException {
// Get the prepared statement
//
PreparedStatement ps = getPreparedStatement(sql);
// Assemble the parameters (if any)
//
RowMetaInterface parameterMeta = new RowMeta();
Object[] parameterData = new Object[objectId.length];
for (int i = 0; i < objectId.length; i++) {
parameterMeta.addValueMeta(new ValueMetaInteger("id" + (i + 1)));
parameterData[i] = ((LongObjectId) objectId[i]).longValue();
}
return callRead(new Callable<ObjectId[]>() {
@Override
public ObjectId[] call() throws Exception {
ResultSet resultSet = database.openQuery(ps, parameterMeta, parameterData);
List<Object[]> rows = database.getRows(resultSet, 0, null);
if (Utils.isEmpty(rows)) {
return new ObjectId[0];
}
RowMetaInterface rowMeta = database.getReturnRowMeta();
ObjectId[] ids = new ObjectId[rows.size()];
for (int i = 0; i < ids.length; i++) {
Object[] row = rows.get(i);
ids[i] = new LongObjectId(rowMeta.getInteger(row, 0));
}
return ids;
}
});
}
use of org.pentaho.di.core.row.RowMeta in project pentaho-kettle by pentaho.
the class KettleDatabaseRepositoryConnectionDelegate method getStrings.
public String[] getStrings(String sql, ObjectId... objectId) throws KettleException {
// Get the prepared statement
//
PreparedStatement ps = getPreparedStatement(sql);
// Assemble the parameters (if any)
//
// Assemble the parameters (if any)
//
RowMetaInterface parameterMeta = new RowMeta();
Object[] parameterData = new Object[objectId.length];
for (int i = 0; i < objectId.length; i++) {
parameterMeta.addValueMeta(new ValueMetaInteger("id" + (i + 1)));
parameterData[i] = ((LongObjectId) objectId[i]).longValue();
}
//
return callRead(new Callable<String[]>() {
@Override
public String[] call() throws Exception {
ResultSet resultSet = database.openQuery(ps, parameterMeta, parameterData);
List<Object[]> rows = database.getRows(resultSet, 0, null);
if (Utils.isEmpty(rows)) {
return new String[0];
}
// assemble the result
//
RowMetaInterface rowMeta = database.getReturnRowMeta();
String[] strings = new String[rows.size()];
for (int i = 0; i < strings.length; i++) {
Object[] row = rows.get(i);
strings[i] = rowMeta.getString(row, 0);
}
return strings;
}
});
}
Aggregations