use of org.pentaho.di.core.exception.KettleFileException in project pentaho-kettle by pentaho.
the class FixedInput method readOneRow.
/**
* Read a single row of data from the file...
*
* @param doConversions
* if you want to do conversions, set to false for the header row.
* @return a row of data...
* @throws KettleException
*/
private Object[] readOneRow(boolean doConversions) throws KettleException {
try {
//
if (meta.isRunningInParallel()) {
if (getLinesInput() >= data.rowsToRead) {
// We're done. The rest is for the other steps in the cluster
return null;
}
}
Object[] outputRowData = RowDataUtil.allocateRowData(data.convertRowMeta.size());
int outputIndex = 0;
if (data.stopReading) {
return null;
}
FixedFileInputField[] fieldDefinitions = meta.getFieldDefinition();
for (int i = 0; i < fieldDefinitions.length; i++) {
int fieldWidth = fieldDefinitions[i].getWidth();
data.endBuffer = data.startBuffer + fieldWidth;
if (data.endBuffer > data.bufferSize) {
// Oops, we need to read more data...
// Better resize this before we read other things in it...
//
data.resizeByteBuffer();
// Also read another chunk of data, now that we have the space for it...
// Ignore EOF, there might be other stuff in the buffer.
//
data.readBufferFromFile();
}
//
if (data.endBuffer > data.bufferSize) {
// a row because we're done.
if ((0 == i) && data.bufferSize <= 0) {
return null;
}
// This is the last record of data in the file.
data.stopReading = true;
// Just take what's left for the current field.
fieldWidth = data.bufferSize;
}
byte[] field = new byte[fieldWidth];
System.arraycopy(data.byteBuffer, data.startBuffer, field, 0, fieldWidth);
if (doConversions) {
if (meta.isLazyConversionActive()) {
outputRowData[outputIndex++] = field;
} else {
// We're not lazy so we convert the data right here and now.
// The convert object uses binary storage as such we just have to ask the native type from it.
// That will do the actual conversion.
//
ValueMetaInterface sourceValueMeta = data.convertRowMeta.getValueMeta(outputIndex);
outputRowData[outputIndex++] = sourceValueMeta.convertBinaryStringToNativeType(field);
}
} else {
// nothing for the header, no conversions here.
outputRowData[outputIndex++] = null;
}
// OK, onto the next field...
//
data.startBuffer = data.endBuffer;
}
//
if (meta.isLineFeedPresent()) {
data.endBuffer += 2;
if (data.endBuffer >= data.bufferSize) {
// Oops, we need to read more data...
// Better resize this before we read other things in it...
//
data.resizeByteBuffer();
// Also read another chunk of data, now that we have the space for it...
data.readBufferFromFile();
}
//
if (data.byteBuffer[data.startBuffer] == '\n' || data.byteBuffer[data.startBuffer] == '\r') {
data.startBuffer++;
if (data.byteBuffer[data.startBuffer] == '\n' || data.byteBuffer[data.startBuffer] == '\r') {
data.startBuffer++;
}
}
data.endBuffer = data.startBuffer;
}
incrementLinesInput();
return outputRowData;
} catch (Exception e) {
throw new KettleFileException("Exception reading line using NIO: " + e.toString(), e);
}
}
use of org.pentaho.di.core.exception.KettleFileException in project pentaho-kettle by pentaho.
the class ExcelInput method processRow.
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
meta = (ExcelInputMeta) smi;
data = (ExcelInputData) sdi;
if (first) {
first = false;
// start from scratch!
data.outputRowMeta = new RowMeta();
meta.getFields(data.outputRowMeta, getStepname(), null, null, this, repository, metaStore);
if (meta.isAcceptingFilenames()) {
// Read the files from the specified input stream...
data.files.getFiles().clear();
int idx = -1;
RowSet rowSet = findInputRowSet(meta.getAcceptingStepName());
Object[] fileRow = getRowFrom(rowSet);
while (fileRow != null) {
if (idx < 0) {
idx = rowSet.getRowMeta().indexOfValue(meta.getAcceptingField());
if (idx < 0) {
logError(BaseMessages.getString(PKG, "ExcelInput.Error.FilenameFieldNotFound", "" + meta.getAcceptingField()));
setErrors(1);
stopAll();
return false;
}
}
String fileValue = rowSet.getRowMeta().getString(fileRow, idx);
try {
data.files.addFile(KettleVFS.getFileObject(fileValue, getTransMeta()));
} catch (KettleFileException e) {
throw new KettleException(BaseMessages.getString(PKG, "ExcelInput.Exception.CanNotCreateFileObject", fileValue), e);
}
// Grab another row
fileRow = getRowFrom(rowSet);
}
}
handleMissingFiles();
}
// We are done processing if the filenr >= number of files.
if (data.filenr >= data.files.nrOfFiles()) {
if (log.isDetailed()) {
logDetailed(BaseMessages.getString(PKG, "ExcelInput.Log.NoMoreFiles", "" + data.filenr));
}
// signal end to receiver(s)
setOutputDone();
// end of data or error.
return false;
}
// in this case we have to stop a row "earlier", since we start a row number 0 !!!
if ((meta.getRowLimit() > 0 && data.rownr > meta.getRowLimit()) || (meta.readAllSheets() && meta.getRowLimit() > 0 && data.defaultStartRow == 0 && data.rownr > meta.getRowLimit() - 1) || (!meta.readAllSheets() && meta.getRowLimit() > 0 && data.startRow[data.sheetnr] == 0 && data.rownr > meta.getRowLimit() - 1)) {
// The close of the openFile is in dispose()
if (log.isDetailed()) {
logDetailed(BaseMessages.getString(PKG, "ExcelInput.Log.RowLimitReached", "" + meta.getRowLimit()));
}
// signal end to receiver(s)
setOutputDone();
// end of data or error.
return false;
}
Object[] r = getRowFromWorkbooks();
if (r != null) {
incrementLinesInput();
// OK, see if we need to repeat values.
if (data.previousRow != null) {
for (int i = 0; i < meta.getField().length; i++) {
ValueMetaInterface valueMeta = data.outputRowMeta.getValueMeta(i);
Object valueData = r[i];
if (valueMeta.isNull(valueData) && meta.getField()[i].isRepeated()) {
// Take the value from the previous row.
r[i] = data.previousRow[i];
}
}
}
// Remember this row for the next time around!
data.previousRow = data.outputRowMeta.cloneRow(r);
// Send out the good news: we found a row of data!
putRow(data.outputRowMeta, r);
return true;
} else {
// We continue though.
return true;
}
}
use of org.pentaho.di.core.exception.KettleFileException in project pentaho-kettle by pentaho.
the class IngresVectorwiseLoader method createCommandLine.
/**
* Create the command line for a sql process depending on the meta information supplied.
*
* @param meta
* The meta data to create the command line from
*
* @return The string to execute.
*
* @throws KettleException
* Upon any exception
*/
public String createCommandLine(IngresVectorwiseLoaderMeta meta) throws KettleException {
StringBuilder sb = new StringBuilder(300);
if (!Utils.isEmpty(meta.getSqlPath())) {
try {
FileObject fileObject = KettleVFS.getFileObject(environmentSubstitute(meta.getSqlPath()), getTransMeta());
String sqlexec = Const.optionallyQuoteStringByOS(KettleVFS.getFilename(fileObject));
sb.append(sqlexec);
// sql @tc-dwh-test.timocom.net,tcp_ip,VW[ingres,pwd]::dwh
} catch (KettleFileException ex) {
throw new KettleException("Error retrieving command string", ex);
}
} else {
if (meta.isUsingVwload()) {
if (isDetailed()) {
logDetailed("vwload defaults to system path");
}
sb.append("vwload");
} else {
if (isDetailed()) {
logDetailed("sql defaults to system path");
}
sb.append("sql");
}
}
DatabaseMeta dm = meta.getDatabaseMeta();
if (dm != null) {
String databaseName = environmentSubstitute(Const.NVL(dm.getDatabaseName(), ""));
String password = Encr.decryptPasswordOptionallyEncrypted(environmentSubstitute(Const.NVL(dm.getDatabaseInterface().getPassword(), "")));
String port = environmentSubstitute(Const.NVL(dm.getDatabasePortNumberString(), "")).replace("7", "");
String username = environmentSubstitute(Const.NVL(dm.getDatabaseInterface().getUsername(), ""));
String hostname = environmentSubstitute(Const.NVL(dm.getDatabaseInterface().getHostname(), ""));
String schemaTable = dm.getQuotedSchemaTableCombination(null, environmentSubstitute(meta.getTableName()));
String encoding = environmentSubstitute(Const.NVL(meta.getEncoding(), ""));
String fifoFile = Const.optionallyQuoteStringByOS(environmentSubstitute(Const.NVL(meta.getFifoFileName(), "")));
String errorFile = Const.optionallyQuoteStringByOS(environmentSubstitute(Const.NVL(meta.getErrorFileName(), "")));
int maxNrErrors = Const.toInt(environmentSubstitute(Const.NVL(meta.getMaxNrErrors(), "0")), 0);
if (meta.isUsingVwload()) {
sb.append(" -u ").append(username);
sb.append(" -P ").append(password);
sb.append(" -f ").append(meta.getDelimiter()).append("");
sb.append(" -t ").append(schemaTable);
if (!Utils.isEmpty(encoding)) {
sb.append(" -C ").append(encoding);
}
if (!Utils.isEmpty(errorFile)) {
sb.append(" -l ").append(errorFile);
}
if (maxNrErrors > 0) {
// need multiplication for two because every wrong rows
// provide 2 errors that is not evident
sb.append(" -x ").append(maxNrErrors * 2);
}
sb.append(" ").append(databaseName);
sb.append(" ").append(fifoFile);
} else if (meta.isUseDynamicVNode()) {
// logical portname in JDBC use a 7
sb.append(" @").append(hostname).append(",").append(port).append("[").append(username).append(",").append(password).append("]::").append(databaseName);
} else {
// Database Name
//
sb.append(" ").append(databaseName);
if (meta.isUseAuthentication()) {
sb.append("-P").append(password);
}
}
} else {
throw new KettleException("No connection specified");
}
return sb.toString();
}
use of org.pentaho.di.core.exception.KettleFileException in project pentaho-kettle by pentaho.
the class JoinRows method getRowData.
/**
* Get a row of data from the indicated rowset or buffer (memory/disk)
*
* @param filenr
* The rowset or buffer to read a row from
* @return a row of data
* @throws KettleException
* in case something goes wrong
*/
public Object[] getRowData(int filenr) throws KettleException {
data.restart[filenr] = false;
Object[] rowData = null;
// Do we read from the first rowset or a file?
if (filenr == 0) {
// Rowset 0:
RowSet rowSet = getFirstInputRowSet();
rowData = getRowFrom(rowSet);
if (rowData != null) {
data.fileRowMeta[0] = rowSet.getRowMeta();
}
if (log.isRowLevel()) {
logRowlevel(BaseMessages.getString(PKG, "JoinRows.Log.ReadRowFromStream") + (rowData == null ? "<null>" : data.fileRowMeta[0].getString(rowData)));
}
} else {
if (data.cache[filenr] == null) {
// See if we need to open the file?
if (data.dataInputStream[filenr] == null) {
try {
data.fileInputStream[filenr] = new FileInputStream(data.file[filenr]);
data.dataInputStream[filenr] = new DataInputStream(data.fileInputStream[filenr]);
} catch (FileNotFoundException fnfe) {
logError(BaseMessages.getString(PKG, "JoinRows.Log.UnableToFindOrOpenTemporaryFile") + data.file[filenr] + "] : " + fnfe.toString());
setErrors(1);
stopAll();
return null;
}
}
if (data.size[filenr] == 0) {
if (log.isBasic()) {
logBasic(BaseMessages.getString(PKG, "JoinRows.Log.NoRowsComingFromStep") + data.rs[filenr].getOriginStepName() + "]");
}
return null;
}
try {
rowData = data.fileRowMeta[filenr].readData(data.dataInputStream[filenr]);
} catch (KettleFileException e) {
logError(BaseMessages.getString(PKG, "JoinRows.Log.UnableToReadDataFromTempFile") + filenr + " [" + data.file[filenr] + "]");
setErrors(1);
stopAll();
return null;
} catch (SocketTimeoutException e) {
logError(BaseMessages.getString(PKG, "JoinRows.Log.UnableToReadDataFromTempFile") + filenr + " [" + data.file[filenr] + "]");
setErrors(1);
stopAll();
return null;
}
if (log.isRowLevel()) {
logRowlevel(BaseMessages.getString(PKG, "JoinRows.Log.ReadRowFromFile") + filenr + " : " + data.fileRowMeta[filenr].getString(rowData));
}
data.position[filenr]++;
// The file will then be re-opened if needed later on.
if (data.position[filenr] >= data.size[filenr]) {
try {
data.dataInputStream[filenr].close();
data.fileInputStream[filenr].close();
data.dataInputStream[filenr] = null;
data.fileInputStream[filenr] = null;
data.position[filenr] = 0;
// indicate that we restarted.
data.restart[filenr] = true;
} catch (IOException ioe) {
logError(BaseMessages.getString(PKG, "JoinRows.Log.UnableToCloseInputStream") + data.file[filenr] + "] : " + ioe.toString());
setErrors(1);
stopAll();
return null;
}
}
} else {
if (data.size[filenr] == 0) {
if (log.isBasic()) {
logBasic(BaseMessages.getString(PKG, "JoinRows.Log.NoRowsComingFromStep") + data.rs[filenr].getOriginStepName() + "]");
}
return null;
}
rowData = data.cache[filenr].get(data.position[data.filenr]);
// Don't forget to clone the data to protect it against data alteration downstream.
//
rowData = data.fileRowMeta[filenr].cloneRow(rowData);
data.position[filenr]++;
// The file will then be re-opened if needed later on.
if (data.position[filenr] >= data.size[filenr]) {
data.position[filenr] = 0;
// indicate that we restarted.
data.restart[filenr] = true;
}
}
}
return rowData;
}
use of org.pentaho.di.core.exception.KettleFileException in project pentaho-kettle by pentaho.
the class CsvInput method readOneRow.
/**
* Read a single row of data from the file...
*
* @param skipRow if row should be skipped: header row or part of row in case of parallel read
* @param ignoreEnclosures if enclosures should be ignored, i.e. in case of we need to skip part of the row during
* parallel read
* @return a row of data...
* @throws KettleException
*/
private Object[] readOneRow(boolean skipRow, boolean ignoreEnclosures) throws KettleException {
try {
Object[] outputRowData = RowDataUtil.allocateRowData(data.outputRowMeta.size());
int outputIndex = 0;
boolean newLineFound = false;
boolean endOfBuffer = false;
List<Exception> conversionExceptions = null;
List<ValueMetaInterface> exceptionFields = null;
//
while (!newLineFound && outputIndex < data.fieldsMapping.size()) {
if (data.resizeBufferIfNeeded()) {
// there is no end of line delimiter
if (outputRowData != null) {
// filling the rest of them with null
if (outputIndex > 0) {
//
if (meta.isIncludingFilename() && !Utils.isEmpty(meta.getFilenameField())) {
if (meta.isLazyConversionActive()) {
outputRowData[data.filenameFieldIndex] = data.binaryFilename;
} else {
outputRowData[data.filenameFieldIndex] = data.filenames[data.filenr - 1];
}
}
if (data.isAddingRowNumber) {
outputRowData[data.rownumFieldIndex] = data.rowNumber++;
}
incrementLinesInput();
return outputRowData;
}
}
// nothing more to read, call it a day.
return null;
}
// OK, at this point we should have data in the byteBuffer and we should be able to scan for the next
// delimiter (;)
// So let's look for a delimiter.
// Also skip over the enclosures ("), it is NOT taking into account escaped enclosures.
// Later we can add an option for having escaped or double enclosures in the file. <sigh>
//
boolean delimiterFound = false;
boolean enclosureFound = false;
boolean doubleLineEnd = false;
int escapedEnclosureFound = 0;
boolean ignoreEnclosuresInField = ignoreEnclosures;
while (!delimiterFound && !newLineFound && !endOfBuffer) {
//
if (data.delimiterFound()) {
delimiterFound = true;
} else if ((!meta.isNewlinePossibleInFields() || outputIndex == data.fieldsMapping.size() - 1) && data.newLineFound()) {
// Perhaps we found a (pre-mature) new line?
//
// In case we are not using an enclosure and in case fields contain new lines
// we need to make sure that we check the newlines possible flag.
// If the flag is enable we skip newline checking except for the last field in the row.
// In that one we can't support newlines without enclosure (handled below).
//
newLineFound = true;
// Skip new line character
for (int i = 0; i < data.encodingType.getLength(); i++) {
data.moveEndBufferPointer();
}
// Re-check for double new line (\r\n)...
if (data.newLineFound()) {
// Found another one, need to skip it later
doubleLineEnd = true;
}
} else if (data.enclosureFound() && !ignoreEnclosuresInField) {
int enclosurePosition = data.getEndBuffer();
int fieldFirstBytePosition = data.getStartBuffer();
if (fieldFirstBytePosition == enclosurePosition) {
// Perhaps we need to skip over an enclosed part?
// We always expect exactly one enclosure character
// If we find the enclosure doubled, we consider it escaped.
// --> "" is converted to " later on.
//
enclosureFound = true;
boolean keepGoing;
do {
if (data.moveEndBufferPointer()) {
enclosureFound = false;
break;
}
keepGoing = !data.enclosureFound();
if (!keepGoing) {
// Read another byte...
if (!data.endOfBuffer() && data.moveEndBufferPointer()) {
break;
}
if (data.enclosure.length > 1) {
data.moveEndBufferPointer();
}
// If this character is also an enclosure, we can consider the enclosure "escaped".
// As such, if this is an enclosure, we keep going...
//
keepGoing = data.enclosureFound();
if (keepGoing) {
escapedEnclosureFound++;
}
}
} while (keepGoing);
//
if (data.endOfBuffer()) {
endOfBuffer = true;
break;
}
} else {
// Ignoring enclosure if it's not at the field start
ignoreEnclosuresInField = true;
}
} else {
if (data.moveEndBufferPointer()) {
endOfBuffer = true;
break;
}
}
}
// If we're still here, we found a delimiter...
// Since the starting point never changed really, we just can grab range:
//
// [startBuffer-endBuffer[
//
// This is the part we want.
// data.byteBuffer[data.startBuffer]
//
byte[] field = data.getField(delimiterFound, enclosureFound, newLineFound, endOfBuffer);
//
if (escapedEnclosureFound > 0) {
if (log.isRowLevel()) {
logRowlevel("Escaped enclosures found in " + new String(field));
}
field = data.removeEscapedEnclosures(field, escapedEnclosureFound);
}
final int currentFieldIndex = outputIndex++;
final int actualFieldIndex = data.fieldsMapping.fieldMetaIndex(currentFieldIndex);
if (actualFieldIndex != FieldsMapping.FIELD_DOES_NOT_EXIST) {
if (!skipRow) {
if (meta.isLazyConversionActive()) {
outputRowData[actualFieldIndex] = field;
} else {
// We're not lazy so we convert the data right here and now.
// The convert object uses binary storage as such we just have to ask the native type from it.
// That will do the actual conversion.
//
ValueMetaInterface sourceValueMeta = data.convertRowMeta.getValueMeta(actualFieldIndex);
try {
outputRowData[actualFieldIndex] = sourceValueMeta.convertBinaryStringToNativeType(field);
} catch (KettleValueException e) {
// There was a conversion error,
//
outputRowData[actualFieldIndex] = null;
if (conversionExceptions == null) {
conversionExceptions = new ArrayList<Exception>();
exceptionFields = new ArrayList<ValueMetaInterface>();
}
conversionExceptions.add(e);
exceptionFields.add(sourceValueMeta);
}
}
} else {
// nothing for the header, no conversions here.
outputRowData[actualFieldIndex] = null;
}
}
// empty column at the end of the row (see the Jira case for details)
if ((!newLineFound && outputIndex < data.fieldsMapping.size()) || (newLineFound && doubleLineEnd)) {
int i = 0;
while ((!data.newLineFound() && (i < data.delimiter.length))) {
data.moveEndBufferPointer();
i++;
}
if (data.newLineFound() && outputIndex >= data.fieldsMapping.size()) {
data.moveEndBufferPointer();
}
if (doubleLineEnd && data.encodingType.getLength() > 1) {
data.moveEndBufferPointer();
}
}
data.setStartBuffer(data.getEndBuffer());
}
//
if (!newLineFound && !data.resizeBufferIfNeeded()) {
do {
data.moveEndBufferPointer();
if (data.resizeBufferIfNeeded()) {
// nothing more to read.
break;
}
// TODO: if we're using quoting we might be dealing with a very dirty file with quoted newlines in trailing
// fields. (imagine that)
// In that particular case we want to use the same logic we use above (refactored a bit) to skip these fields.
} while (!data.newLineFound());
if (!data.resizeBufferIfNeeded()) {
while (data.newLineFound()) {
data.moveEndBufferPointer();
if (data.resizeBufferIfNeeded()) {
// nothing more to read.
break;
}
}
}
// Make sure we start at the right position the next time around.
data.setStartBuffer(data.getEndBuffer());
}
//
if (meta.isIncludingFilename() && !Utils.isEmpty(meta.getFilenameField())) {
if (meta.isLazyConversionActive()) {
outputRowData[data.filenameFieldIndex] = data.binaryFilename;
} else {
outputRowData[data.filenameFieldIndex] = data.filenames[data.filenr - 1];
}
}
if (data.isAddingRowNumber) {
outputRowData[data.rownumFieldIndex] = data.rowNumber++;
}
if (!ignoreEnclosures) {
incrementLinesInput();
}
if (conversionExceptions != null && conversionExceptions.size() > 0) {
//
throw new KettleConversionException("There were " + conversionExceptions.size() + " conversion errors on line " + getLinesInput(), conversionExceptions, exceptionFields, outputRowData);
}
return outputRowData;
} catch (KettleConversionException e) {
throw e;
} catch (IOException e) {
throw new KettleFileException("Exception reading line using NIO", e);
}
}
Aggregations