use of org.pentaho.di.core.row.ValueMetaInterface in project pentaho-kettle by pentaho.
the class TextFileCSVImportProgressDialog method doScan.
private String doScan(IProgressMonitor monitor) throws KettleException {
if (samples > 0) {
monitor.beginTask(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Task.ScanningFile"), samples + 1);
} else {
monitor.beginTask(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Task.ScanningFile"), 2);
}
String line = "";
long fileLineNumber = 0;
DecimalFormatSymbols dfs = new DecimalFormatSymbols();
int nrfields = meta.inputFields.length;
RowMetaInterface outputRowMeta = new RowMeta();
meta.getFields(outputRowMeta, null, null, null, transMeta, null, null);
// Remove the storage meta-data (don't go for lazy conversion during scan)
for (ValueMetaInterface valueMeta : outputRowMeta.getValueMetaList()) {
valueMeta.setStorageMetadata(null);
valueMeta.setStorageType(ValueMetaInterface.STORAGE_TYPE_NORMAL);
}
RowMetaInterface convertRowMeta = outputRowMeta.cloneToType(ValueMetaInterface.TYPE_STRING);
// How many null values?
// How many times null value?
int[] nrnull = new int[nrfields];
// String info
// min string
String[] minstr = new String[nrfields];
// max string
String[] maxstr = new String[nrfields];
// first occ. of string?
boolean[] firststr = new boolean[nrfields];
// Date info
// is the field perhaps a Date?
boolean[] isDate = new boolean[nrfields];
// How many date formats work?
int[] dateFormatCount = new int[nrfields];
// What are the date formats that
boolean[][] dateFormat = new boolean[nrfields][Const.getDateFormats().length];
// work?
// min date value
Date[][] minDate = new Date[nrfields][Const.getDateFormats().length];
// max date value
Date[][] maxDate = new Date[nrfields][Const.getDateFormats().length];
// Number info
// is the field perhaps a Number?
boolean[] isNumber = new boolean[nrfields];
// How many number formats work?
int[] numberFormatCount = new int[nrfields];
// What are the number format
boolean[][] numberFormat = new boolean[nrfields][Const.getNumberFormats().length];
// that work?
// min number value
double[][] minValue = new double[nrfields][Const.getDateFormats().length];
// max number value
double[][] maxValue = new double[nrfields][Const.getDateFormats().length];
// remember the precision?
int[][] numberPrecision = new int[nrfields][Const.getNumberFormats().length];
// remember the length?
int[][] numberLength = new int[nrfields][Const.getNumberFormats().length];
for (int i = 0; i < nrfields; i++) {
BaseFileField field = meta.inputFields[i];
if (log.isDebug()) {
debug = "init field #" + i;
}
if (replaceMeta) {
// Clear previous info...
field.setName(meta.inputFields[i].getName());
field.setType(meta.inputFields[i].getType());
field.setFormat("");
field.setLength(-1);
field.setPrecision(-1);
field.setCurrencySymbol(dfs.getCurrencySymbol());
field.setDecimalSymbol("" + dfs.getDecimalSeparator());
field.setGroupSymbol("" + dfs.getGroupingSeparator());
field.setNullString("-");
field.setTrimType(ValueMetaInterface.TRIM_TYPE_NONE);
}
nrnull[i] = 0;
minstr[i] = "";
maxstr[i] = "";
firststr[i] = true;
// Init data guess
isDate[i] = true;
for (int j = 0; j < Const.getDateFormats().length; j++) {
dateFormat[i][j] = true;
minDate[i][j] = Const.MAX_DATE;
maxDate[i][j] = Const.MIN_DATE;
}
dateFormatCount[i] = Const.getDateFormats().length;
// Init number guess
isNumber[i] = true;
for (int j = 0; j < Const.getNumberFormats().length; j++) {
numberFormat[i][j] = true;
minValue[i][j] = Double.MAX_VALUE;
maxValue[i][j] = -Double.MAX_VALUE;
numberPrecision[i][j] = -1;
numberLength[i][j] = -1;
}
numberFormatCount[i] = Const.getNumberFormats().length;
}
TextFileInputMeta strinfo = (TextFileInputMeta) meta.clone();
for (int i = 0; i < nrfields; i++) {
strinfo.inputFields[i].setType(ValueMetaInterface.TYPE_STRING);
}
// Sample <samples> rows...
debug = "get first line";
StringBuilder lineBuffer = new StringBuilder(256);
int fileFormatType = meta.getFileFormatTypeNr();
// If the file has a header we overwrite the first line
// However, if it doesn't have a header, take a new line
//
line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineBuffer);
fileLineNumber++;
int skipped = 1;
if (meta.content.header) {
while (line != null && skipped < meta.content.nrHeaderLines) {
line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineBuffer);
skipped++;
fileLineNumber++;
}
}
int linenr = 1;
List<StringEvaluator> evaluators = new ArrayList<StringEvaluator>();
// Allocate number and date parsers
DecimalFormat df2 = (DecimalFormat) NumberFormat.getInstance();
DecimalFormatSymbols dfs2 = new DecimalFormatSymbols();
SimpleDateFormat daf2 = new SimpleDateFormat();
boolean errorFound = false;
while (!errorFound && line != null && (linenr <= samples || samples == 0) && !monitor.isCanceled()) {
monitor.subTask(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Task.ScanningLine", "" + linenr));
if (samples > 0) {
monitor.worked(1);
}
if (log.isDebug()) {
debug = "convert line #" + linenr + " to row";
}
RowMetaInterface rowMeta = new RowMeta();
meta.getFields(rowMeta, "stepname", null, null, transMeta, null, null);
// Remove the storage meta-data (don't go for lazy conversion during scan)
for (ValueMetaInterface valueMeta : rowMeta.getValueMetaList()) {
valueMeta.setStorageMetadata(null);
valueMeta.setStorageType(ValueMetaInterface.STORAGE_TYPE_NORMAL);
}
String delimiter = transMeta.environmentSubstitute(meta.content.separator);
String enclosure = transMeta.environmentSubstitute(meta.content.enclosure);
String escapeCharacter = transMeta.environmentSubstitute(meta.content.escapeCharacter);
Object[] r = TextFileInputUtils.convertLineToRow(log, new TextFileLine(line, fileLineNumber, null), strinfo, null, 0, outputRowMeta, convertRowMeta, FileInputList.createFilePathList(transMeta, meta.inputFiles.fileName, meta.inputFiles.fileMask, meta.inputFiles.excludeFileMask, meta.inputFiles.fileRequired, meta.inputFiles.includeSubFolderBoolean())[0], rownumber, delimiter, enclosure, escapeCharacter, null, new BaseFileInputAdditionalField(), null, null, false, null, null, null, null, null);
if (r == null) {
errorFound = true;
continue;
}
rownumber++;
for (int i = 0; i < nrfields && i < r.length; i++) {
StringEvaluator evaluator;
if (i >= evaluators.size()) {
evaluator = new StringEvaluator(true);
evaluators.add(evaluator);
} else {
evaluator = evaluators.get(i);
}
String string = rowMeta.getString(r, i);
if (i == 0) {
System.out.println();
}
evaluator.evaluateString(string);
}
fileLineNumber++;
if (r != null) {
linenr++;
}
// Grab another line...
//
line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineBuffer);
}
monitor.worked(1);
monitor.setTaskName(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Task.AnalyzingResults"));
// Show information on items using a dialog box
//
StringBuilder message = new StringBuilder();
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.ResultAfterScanning", "" + (linenr - 1)));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.HorizontalLine"));
for (int i = 0; i < nrfields; i++) {
BaseFileField field = meta.inputFields[i];
StringEvaluator evaluator = evaluators.get(i);
List<StringEvaluationResult> evaluationResults = evaluator.getStringEvaluationResults();
//
if (evaluationResults.isEmpty()) {
field.setType(ValueMetaInterface.TYPE_STRING);
field.setLength(evaluator.getMaxLength());
} else {
StringEvaluationResult result = evaluator.getAdvicedResult();
if (result != null) {
// Take the first option we find, list the others below...
//
ValueMetaInterface conversionMeta = result.getConversionMeta();
field.setType(conversionMeta.getType());
field.setTrimType(conversionMeta.getTrimType());
field.setFormat(conversionMeta.getConversionMask());
field.setDecimalSymbol(conversionMeta.getDecimalSymbol());
field.setGroupSymbol(conversionMeta.getGroupingSymbol());
field.setLength(conversionMeta.getLength());
field.setPrecision(conversionMeta.getPrecision());
nrnull[i] = result.getNrNull();
minstr[i] = result.getMin() == null ? "" : result.getMin().toString();
maxstr[i] = result.getMax() == null ? "" : result.getMax().toString();
}
}
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.FieldNumber", "" + (i + 1)));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.FieldName", field.getName()));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.FieldType", field.getTypeDesc()));
switch(field.getType()) {
case ValueMetaInterface.TYPE_NUMBER:
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.EstimatedLength", (field.getLength() < 0 ? "-" : "" + field.getLength())));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.EstimatedPrecision", field.getPrecision() < 0 ? "-" : "" + field.getPrecision()));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.NumberFormat", field.getFormat()));
if (!evaluationResults.isEmpty()) {
if (evaluationResults.size() > 1) {
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.WarnNumberFormat"));
}
for (StringEvaluationResult seResult : evaluationResults) {
String mask = seResult.getConversionMeta().getConversionMask();
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.NumberFormat2", mask));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.TrimType", seResult.getConversionMeta().getTrimType()));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.NumberMinValue", seResult.getMin()));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.NumberMaxValue", seResult.getMax()));
try {
df2.applyPattern(mask);
df2.setDecimalFormatSymbols(dfs2);
double mn = df2.parse(seResult.getMin().toString()).doubleValue();
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.NumberExample", mask, seResult.getMin(), Double.toString(mn)));
} catch (Exception e) {
if (log.isDetailed()) {
log.logDetailed("This is unexpected: parsing [" + seResult.getMin() + "] with format [" + mask + "] did not work.");
}
}
}
}
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.NumberNrNullValues", "" + nrnull[i]));
break;
case ValueMetaInterface.TYPE_STRING:
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.StringMaxLength", "" + field.getLength()));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.StringMinValue", minstr[i]));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.StringMaxValue", maxstr[i]));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.StringNrNullValues", "" + nrnull[i]));
break;
case ValueMetaInterface.TYPE_DATE:
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateMaxLength", field.getLength() < 0 ? "-" : "" + field.getLength()));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateFormat", field.getFormat()));
if (dateFormatCount[i] > 1) {
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.WarnDateFormat"));
}
if (!Utils.isEmpty(minstr[i])) {
for (int x = 0; x < Const.getDateFormats().length; x++) {
if (dateFormat[i][x]) {
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateFormat2", Const.getDateFormats()[x]));
Date mindate = minDate[i][x];
Date maxdate = maxDate[i][x];
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateMinValue", mindate.toString()));
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateMaxValue", maxdate.toString()));
daf2.applyPattern(Const.getDateFormats()[x]);
try {
Date md = daf2.parse(minstr[i]);
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateExample", Const.getDateFormats()[x], minstr[i], md.toString()));
} catch (Exception e) {
if (log.isDetailed()) {
log.logDetailed("This is unexpected: parsing [" + minstr[i] + "] with format [" + Const.getDateFormats()[x] + "] did not work.");
}
}
}
}
}
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateNrNullValues", "" + nrnull[i]));
break;
default:
break;
}
if (nrnull[i] == linenr - 1) {
message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.AllNullValues"));
}
message.append(Const.CR);
}
monitor.worked(1);
monitor.done();
return message.toString();
}
use of org.pentaho.di.core.row.ValueMetaInterface in project pentaho-kettle by pentaho.
the class GetXMLData method processPutRow.
private Object[] processPutRow(AbstractNode node) throws KettleException {
// Create new row...
Object[] outputRowData = buildEmptyRow();
// Create new row or clone
if (meta.isInFields()) {
System.arraycopy(data.readrow, 0, outputRowData, 0, data.nrReadRow);
}
try {
data.nodenr++;
// Read fields...
for (int i = 0; i < data.nrInputFields; i++) {
// Get field
GetXMLDataField xmlDataField = meta.getInputFields()[i];
// Get the Path to look for
String XPathValue = xmlDataField.getXPath();
XPathValue = environmentSubstitute(XPathValue);
if (xmlDataField.getElementType() == GetXMLDataField.ELEMENT_TYPE_ATTRIBUT) {
// We have an attribute
// do we need to add leading @?
// Only put @ to the last element in path, not in front at all
int last = XPathValue.lastIndexOf(GetXMLDataMeta.N0DE_SEPARATOR);
if (last > -1) {
last++;
String attribut = XPathValue.substring(last, XPathValue.length());
if (!attribut.startsWith(GetXMLDataMeta.AT)) {
XPathValue = XPathValue.substring(0, last) + GetXMLDataMeta.AT + attribut;
}
} else {
if (!XPathValue.startsWith(GetXMLDataMeta.AT)) {
XPathValue = GetXMLDataMeta.AT + XPathValue;
}
}
}
if (meta.isuseToken()) {
// See if user use Token inside path field
// The syntax is : @_Fieldname-
// PDI will search for Fieldname value and replace it
// Fieldname must be defined before the current node
XPathValue = substituteToken(XPathValue, outputRowData);
if (isDetailed()) {
logDetailed(XPathValue);
}
}
// Get node value
String nodevalue;
// Handle namespaces
if (meta.isNamespaceAware()) {
XPath xpathField = node.createXPath(addNSPrefix(XPathValue, data.PathValue));
xpathField.setNamespaceURIs(data.NAMESPACE);
if (xmlDataField.getResultType() == GetXMLDataField.RESULT_TYPE_VALUE_OF) {
nodevalue = xpathField.valueOf(node);
} else {
// nodevalue=xpathField.selectSingleNode(node).asXML();
Node n = xpathField.selectSingleNode(node);
if (n != null) {
nodevalue = n.asXML();
} else {
nodevalue = "";
}
}
} else {
if (xmlDataField.getResultType() == GetXMLDataField.RESULT_TYPE_VALUE_OF) {
nodevalue = node.valueOf(XPathValue);
} else {
// nodevalue=node.selectSingleNode(XPathValue).asXML();
Node n = node.selectSingleNode(XPathValue);
if (n != null) {
nodevalue = n.asXML();
} else {
nodevalue = "";
}
}
}
// Do trimming
switch(xmlDataField.getTrimType()) {
case GetXMLDataField.TYPE_TRIM_LEFT:
nodevalue = Const.ltrim(nodevalue);
break;
case GetXMLDataField.TYPE_TRIM_RIGHT:
nodevalue = Const.rtrim(nodevalue);
break;
case GetXMLDataField.TYPE_TRIM_BOTH:
nodevalue = Const.trim(nodevalue);
break;
default:
break;
}
// Do conversions
//
ValueMetaInterface targetValueMeta = data.outputRowMeta.getValueMeta(data.totalpreviousfields + i);
ValueMetaInterface sourceValueMeta = data.convertRowMeta.getValueMeta(data.totalpreviousfields + i);
outputRowData[data.totalpreviousfields + i] = targetValueMeta.convertData(sourceValueMeta, nodevalue);
// Do we need to repeat this field if it is null?
if (meta.getInputFields()[i].isRepeated()) {
if (data.previousRow != null && Utils.isEmpty(nodevalue)) {
outputRowData[data.totalpreviousfields + i] = data.previousRow[data.totalpreviousfields + i];
}
}
}
// End of loop over fields...
int rowIndex = data.totalpreviousfields + data.nrInputFields;
// See if we need to add the filename to the row...
if (meta.includeFilename() && !Utils.isEmpty(meta.getFilenameField())) {
outputRowData[rowIndex++] = data.filename;
}
// See if we need to add the row number to the row...
if (meta.includeRowNumber() && !Utils.isEmpty(meta.getRowNumberField())) {
outputRowData[rowIndex++] = data.rownr;
}
// Possibly add short filename...
if (meta.getShortFileNameField() != null && meta.getShortFileNameField().length() > 0) {
outputRowData[rowIndex++] = data.shortFilename;
}
// Add Extension
if (meta.getExtensionField() != null && meta.getExtensionField().length() > 0) {
outputRowData[rowIndex++] = data.extension;
}
// add path
if (meta.getPathField() != null && meta.getPathField().length() > 0) {
outputRowData[rowIndex++] = data.path;
}
// Add Size
if (meta.getSizeField() != null && meta.getSizeField().length() > 0) {
outputRowData[rowIndex++] = data.size;
}
// add Hidden
if (meta.isHiddenField() != null && meta.isHiddenField().length() > 0) {
outputRowData[rowIndex++] = Boolean.valueOf(data.path);
}
// Add modification date
if (meta.getLastModificationDateField() != null && meta.getLastModificationDateField().length() > 0) {
outputRowData[rowIndex++] = data.lastModificationDateTime;
}
// Add Uri
if (meta.getUriField() != null && meta.getUriField().length() > 0) {
outputRowData[rowIndex++] = data.uriName;
}
// Add RootUri
if (meta.getRootUriField() != null && meta.getRootUriField().length() > 0) {
outputRowData[rowIndex] = data.rootUriName;
}
RowMetaInterface irow = getInputRowMeta();
if (irow == null) {
data.previousRow = outputRowData;
} else {
// clone to previously allocated array to make sure next step doesn't
// change it in between...
System.arraycopy(outputRowData, 0, this.prevRow, 0, outputRowData.length);
// Pick up everything else that needs a real deep clone
data.previousRow = irow.cloneRow(outputRowData, this.prevRow);
}
} catch (Exception e) {
if (getStepMeta().isDoingErrorHandling()) {
// Simply add this row to the error row
putError(data.outputRowMeta, outputRowData, 1, e.toString(), null, "GetXMLData001");
data.errorInRowButContinue = true;
return null;
} else {
logError(e.toString());
throw new KettleException(e.toString());
}
}
return outputRowData;
}
use of org.pentaho.di.core.row.ValueMetaInterface in project pentaho-kettle by pentaho.
the class BaseStep method putRow.
/**
* putRow is used to copy a row, to the alternate rowset(s) This should get priority over everything else!
* (synchronized) If distribute is true, a row is copied only once to the output rowsets, otherwise copies are sent to
* each rowset!
*
* @param row The row to put to the destination rowset(s).
* @throws KettleStepException
*/
@Override
public void putRow(RowMetaInterface rowMeta, Object[] row) throws KettleStepException {
if (rowMeta != null) {
String property = System.getProperties().getProperty(Const.ALLOW_EMPTY_FIELD_NAMES_AND_TYPES, "false");
boolean allowEmpty = Boolean.parseBoolean(property);
if (!allowEmpty) {
// check row meta for empty field name (BACKLOG-18004)
for (ValueMetaInterface vmi : rowMeta.getValueMetaList()) {
if (StringUtils.isBlank(vmi.getName())) {
throw new KettleStepException("Please set a field name for all field(s) that have 'null'.");
}
if (vmi.getType() <= 0) {
throw new KettleStepException("Please set a value for the missing field(s) type.");
}
}
}
}
getRowHandler().putRow(rowMeta, row);
}
use of org.pentaho.di.core.row.ValueMetaInterface in project pdi-dataservice-server-plugin by pentaho.
the class ValueMetaResolver method inListToTypedObjectArray.
/**
* Converts a semi-colon delimited list to an array of the type corresponding to fieldName.
* Semi-colon delimited in-lists is a convention originating in
* org.pentaho.di.core.Condition
*/
public Object[] inListToTypedObjectArray(String fieldName, String value) throws PushDownOptimizationException {
String[] inList = Const.splitString(value, ';', true);
unescapeList(inList);
ValueMetaInterface valueMeta = getValueMeta(fieldName);
if (valueMeta.isString()) {
// no type conversion necessary
return inList;
}
return convertArrayToType(inList, valueMeta);
}
use of org.pentaho.di.core.row.ValueMetaInterface in project pdi-dataservice-server-plugin by pentaho.
the class DataServiceExecutor method convertListCondition.
private static void convertListCondition(Condition condition, ValueMetaResolver resolver) {
String fieldName = condition.getLeftValuename();
try {
// Determine meta and resolve values
ValueMetaInterface resolvedValueMeta = resolver.getValueMeta(fieldName);
Object[] typedValues = resolver.inListToTypedObjectArray(fieldName, condition.getRightExactString());
// Encode list values
String[] typedValueStrings = new String[typedValues.length];
for (int i = 0; i < typedValues.length; i++) {
typedValueStrings[i] = resolvedValueMeta.getCompatibleString(typedValues[i]);
}
// Set new condition in-list (leave meta as string)
condition.getRightExact().setValueData(StringUtils.join(typedValueStrings, ';'));
} catch (KettleException e) {
// Skip conversion of this condition?
}
}
Aggregations