use of org.pentaho.di.core.exception.KettleFileException in project pentaho-kettle by pentaho.
the class TestUtils method getFileObject.
public static FileObject getFileObject(String vfsPath, VariableSpace space) {
if (space == null) {
space = new Variables();
space.initializeVariablesFrom(null);
}
try {
return KettleVFS.getFileObject(vfsPath, space);
} catch (KettleFileException e) {
throw new RuntimeException(e);
}
}
use of org.pentaho.di.core.exception.KettleFileException in project pentaho-kettle by pentaho.
the class Value method writeData.
/**
* Write the data of this Value, without the meta-data to a DataOutputStream
*
* @param dos
* The DataOutputStream to write the data to
* @return true if all went well, false if something went wrong.
*/
public boolean writeData(DataOutputStream dos) throws KettleFileException {
try {
// Is the value NULL?
dos.writeBoolean(isNull());
// Handle Content -- only when not NULL
if (!isNull()) {
switch(getType()) {
case VALUE_TYPE_STRING:
if (getString() == null) {
// -1 == null string
dos.writeInt(-1);
} else {
String string = getString();
byte[] chars = string.getBytes(Const.XML_ENCODING);
dos.writeInt(chars.length);
dos.write(chars);
}
break;
case VALUE_TYPE_BIGNUMBER:
if (getBigNumber() == null) {
// -1 == null big number
dos.writeInt(-1);
} else {
String string = getBigNumber().toString();
dos.writeInt(string.length());
dos.writeChars(string);
}
break;
case VALUE_TYPE_DATE:
dos.writeBoolean(getDate() != null);
if (getDate() != null) {
dos.writeLong(getDate().getTime());
}
break;
case VALUE_TYPE_NUMBER:
dos.writeDouble(getNumber());
break;
case VALUE_TYPE_BOOLEAN:
dos.writeBoolean(getBoolean());
break;
case VALUE_TYPE_INTEGER:
dos.writeLong(getInteger());
break;
default:
// nothing
break;
}
}
} catch (IOException e) {
throw new KettleFileException("Unable to write value data to output stream", e);
}
return true;
}
use of org.pentaho.di.core.exception.KettleFileException in project pentaho-kettle by pentaho.
the class JobEntryCheckDbConnectionsIT method cleanup.
@After
public void cleanup() {
try {
FileObject dbFile = KettleVFS.getFileObject(H2_DATABASE + ".h2.db");
if (dbFile.exists()) {
System.out.println("deleting file");
dbFile.delete();
}
} catch (KettleFileException | FileSystemException ignored) {
// Ignore, we tried cleaning up
}
}
use of org.pentaho.di.core.exception.KettleFileException in project pentaho-kettle by pentaho.
the class BlockingStep method getBuffer.
private Object[] getBuffer() {
Object[] retval;
// Open all files at once and read one row from each file...
if (data.files.size() > 0 && (data.dis.size() == 0 || data.fis.size() == 0)) {
if (log.isBasic()) {
logBasic(BaseMessages.getString(PKG, "BlockingStep.Log.Openfiles"));
}
try {
FileObject fileObject = data.files.get(0);
String filename = KettleVFS.getFilename(fileObject);
if (log.isDetailed()) {
logDetailed(BaseMessages.getString(PKG, "BlockingStep.Log.Openfilename1") + filename + BaseMessages.getString(PKG, "BlockingStep.Log.Openfilename2"));
}
InputStream fi = KettleVFS.getInputStream(fileObject);
DataInputStream di;
data.fis.add(fi);
if (meta.getCompress()) {
GZIPInputStream gzfi = new GZIPInputStream(new BufferedInputStream(fi));
di = new DataInputStream(gzfi);
data.gzis.add(gzfi);
} else {
di = new DataInputStream(fi);
}
data.dis.add(di);
// How long is the buffer?
int buffersize = di.readInt();
if (log.isDetailed()) {
logDetailed(BaseMessages.getString(PKG, "BlockingStep.Log.BufferSize1") + filename + BaseMessages.getString(PKG, "BlockingStep.Log.BufferSize2") + buffersize + " " + BaseMessages.getString(PKG, "BlockingStep.Log.BufferSize3"));
}
if (buffersize > 0) {
// Read a row from temp-file
data.rowbuffer.add(data.outputRowMeta.readData(di));
}
} catch (Exception e) {
logError(BaseMessages.getString(PKG, "BlockingStepMeta.ErrorReadingFile") + e.toString());
logError(Const.getStackTracker(e));
}
}
if (data.files.size() == 0) {
if (data.buffer.size() > 0) {
retval = data.buffer.get(0);
data.buffer.remove(0);
} else {
retval = null;
}
} else {
if (data.rowbuffer.size() == 0) {
retval = null;
} else {
retval = data.rowbuffer.get(0);
data.rowbuffer.remove(0);
// now get another
FileObject file = data.files.get(0);
DataInputStream di = data.dis.get(0);
InputStream fi = data.fis.get(0);
GZIPInputStream gzfi = (meta.getCompress()) ? data.gzis.get(0) : null;
try {
data.rowbuffer.add(0, data.outputRowMeta.readData(di));
} catch (SocketTimeoutException e) {
logError(BaseMessages.getString(PKG, "System.Log.UnexpectedError") + " : " + e.toString());
logError(Const.getStackTracker(e));
setErrors(1);
stopAll();
} catch (KettleFileException fe) {
// empty file or EOF mostly
try {
di.close();
fi.close();
if (gzfi != null) {
gzfi.close();
}
file.delete();
} catch (IOException e) {
logError(BaseMessages.getString(PKG, "BlockingStepMeta.UnableDeleteFile") + file.toString());
setErrors(1);
stopAll();
return null;
}
data.files.remove(0);
data.dis.remove(0);
data.fis.remove(0);
if (gzfi != null) {
data.gzis.remove(0);
}
}
}
}
return retval;
}
use of org.pentaho.di.core.exception.KettleFileException in project pentaho-kettle by pentaho.
the class ParGzipCsvInput method readOneRow.
/**
* Read a single row of data from the file...
*
* @param doConversions
* if you want to do conversions, set to false for the header row.
* @return a row of data...
* @throws KettleException
*/
private Object[] readOneRow(boolean doConversions) throws KettleException {
//
if (data.totalBytesRead > data.blockSize) {
//
return null;
}
try {
Object[] outputRowData = RowDataUtil.allocateRowData(data.outputRowMeta.size());
int outputIndex = 0;
boolean newLineFound = false;
int newLines = 0;
//
while (!newLineFound && outputIndex < meta.getInputFields().length) {
if (checkBufferSize()) {
// there is no end of line delimiter
if (outputRowData != null) {
// filling the rest of them with null
if (outputIndex > 0) {
return (outputRowData);
}
}
// nothing more to read, call it a day.
return null;
}
// OK, at this point we should have data in the byteBuffer and we should be able to scan for the next
// delimiter (;)
// So let's look for a delimiter.
// Also skip over the enclosures ("), it is NOT taking into account escaped enclosures.
// Later we can add an option for having escaped or double enclosures in the file. <sigh>
//
boolean delimiterFound = false;
boolean enclosureFound = false;
int escapedEnclosureFound = 0;
while (!delimiterFound) {
//
if (data.byteBuffer[data.endBuffer] == data.delimiter[0]) {
delimiterFound = true;
} else if (data.byteBuffer[data.endBuffer] == '\n' || data.byteBuffer[data.endBuffer] == '\r') {
// Perhaps we found a new line?
// "\n\r".getBytes()
//
data.endBuffer++;
data.totalBytesRead++;
newLines = 1;
if (!checkBufferSize()) {
// re-check for double delimiters...
if (data.byteBuffer[data.endBuffer] == '\n' || data.byteBuffer[data.endBuffer] == '\r') {
data.endBuffer++;
data.totalBytesRead++;
newLines = 2;
checkBufferSize();
}
}
newLineFound = true;
delimiterFound = true;
} else if (data.enclosure != null && data.byteBuffer[data.endBuffer] == data.enclosure[0]) {
// Perhaps we need to skip over an enclosed part?
// We always expect exactly one enclosure character
// If we find the enclosure doubled, we consider it escaped.
// --> "" is converted to " later on.
//
enclosureFound = true;
boolean keepGoing;
do {
data.endBuffer++;
if (checkBufferSize()) {
enclosureFound = false;
break;
}
keepGoing = data.byteBuffer[data.endBuffer] != data.enclosure[0];
if (!keepGoing) {
// We found an enclosure character.
// Read another byte...
//
data.endBuffer++;
if (checkBufferSize()) {
enclosureFound = false;
break;
}
// If this character is also an enclosure, we can consider the enclosure "escaped".
// As such, if this is an enclosure, we keep going...
//
keepGoing = data.byteBuffer[data.endBuffer] == data.enclosure[0];
if (keepGoing) {
escapedEnclosureFound++;
}
}
} while (keepGoing);
//
if (data.endBuffer >= data.bufferSize) {
// consider it a newline to break out of the upper while loop
newLineFound = true;
// to remove the enclosures in case of missing newline on last line.
newLines += 2;
break;
}
} else {
data.endBuffer++;
data.totalBytesRead++;
if (checkBufferSize()) {
if (data.endBuffer >= data.bufferSize) {
newLineFound = true;
break;
}
}
}
}
// If we're still here, we found a delimiter..
// Since the starting point never changed really, we just can grab range:
//
// [startBuffer-endBuffer[
//
// This is the part we want.
//
int length = data.endBuffer - data.startBuffer;
if (newLineFound) {
length -= newLines;
if (length <= 0) {
length = 0;
}
}
if (enclosureFound) {
data.startBuffer++;
length -= 2;
if (length <= 0) {
length = 0;
}
}
if (length <= 0) {
length = 0;
}
byte[] field = new byte[length];
System.arraycopy(data.byteBuffer, data.startBuffer, field, 0, length);
//
if (escapedEnclosureFound > 0) {
if (log.isRowLevel()) {
logRowlevel("Escaped enclosures found in " + new String(field));
}
field = data.removeEscapedEnclosures(field, escapedEnclosureFound);
}
if (doConversions) {
if (meta.isLazyConversionActive()) {
outputRowData[outputIndex++] = field;
} else {
// We're not lazy so we convert the data right here and now.
// The convert object uses binary storage as such we just have to ask the native type from it.
// That will do the actual conversion.
//
ValueMetaInterface sourceValueMeta = data.convertRowMeta.getValueMeta(outputIndex);
outputRowData[outputIndex++] = sourceValueMeta.convertBinaryStringToNativeType(field);
}
} else {
// nothing for the header, no conversions here.
outputRowData[outputIndex++] = null;
}
// OK, move on to the next field...
if (!newLineFound) {
data.endBuffer++;
data.totalBytesRead++;
}
data.startBuffer = data.endBuffer;
}
//
if (!newLineFound && !checkBufferSize()) {
do {
data.endBuffer++;
data.totalBytesRead++;
if (checkBufferSize()) {
// nothing more to read.
break;
}
// TODO: if we're using quoting we might be dealing with a very dirty file with quoted newlines in trailing
// fields. (imagine that)
// In that particular case we want to use the same logic we use above (refactored a bit) to skip these fields.
} while (data.byteBuffer[data.endBuffer] != '\n' && data.byteBuffer[data.endBuffer] != '\r');
if (!checkBufferSize()) {
while (data.byteBuffer[data.endBuffer] == '\n' || data.byteBuffer[data.endBuffer] == '\r') {
data.endBuffer++;
data.totalBytesRead++;
if (checkBufferSize()) {
// nothing more to read.
break;
}
}
}
// Make sure we start at the right position the next time around.
data.startBuffer = data.endBuffer;
}
//
if (meta.isIncludingFilename() && !Utils.isEmpty(meta.getFilenameField())) {
if (meta.isLazyConversionActive()) {
outputRowData[data.filenameFieldIndex] = data.binaryFilename;
} else {
outputRowData[data.filenameFieldIndex] = data.filenames[data.filenr - 1];
}
}
if (data.isAddingRowNumber) {
outputRowData[data.rownumFieldIndex] = new Long(data.rowNumber++);
}
incrementLinesInput();
return outputRowData;
} catch (Exception e) {
throw new KettleFileException("Exception reading line of data", e);
}
}
Aggregations