use of org.apache.commons.vfs2.FileObject in project pentaho-kettle by pentaho.
the class ParGzipCsvInput method openNextFile.
private boolean openNextFile() throws KettleException {
try {
// Close the previous file...
//
closeFile();
if (data.filenr >= data.filenames.length) {
return false;
}
// Open the next one...
//
logBasic("Opening file #" + data.filenr + " : " + data.filenames[data.filenr]);
FileObject fileObject = KettleVFS.getFileObject(data.filenames[data.filenr], getTransMeta());
data.fis = KettleVFS.getInputStream(fileObject);
if (meta.isLazyConversionActive()) {
data.binaryFilename = data.filenames[data.filenr].getBytes();
}
data.gzis = new GZIPInputStream(data.fis, data.bufferSize);
clearBuffer();
data.fileReadPosition = 0L;
data.blockNr = 0;
data.eofReached = false;
// Skip to the next file...
//
data.filenr++;
//
if (data.parallel) {
// Calculate the first block of data to read from the file
// If the buffer size is 500, we read 0-499 for the first file,
// 500-999 for the second, 1000-1499 for the third, etc.
//
// After that we need to get 1500-1999 for the first step again,
// 2000-2499 for the second, 2500-2999 for the third, etc.
//
// This is equivalent :
//
// FROM : stepNumber * bufferSize + blockNr*bufferSize*nrOfSteps
// TO : FROM + bufferSize - 1
//
// Example : step 0, block 0, size 500:
// From: 0*500+0*500*3=0 To: 0+500-1=499
//
// Example : step 0, block 1, size 500:
// From: 0*500+1*500*3=1500 To: 1500+500-1=1999
//
// So our first act is to skip to the correct position in the compressed stream...
//
// for now.
data.blockSize = 2 * data.bufferSize;
long bytesToSkip = data.stepNumber * data.blockSize;
if (bytesToSkip > 0) {
// Get into position for block 0
//
logBasic("Skipping " + bytesToSkip + " bytes to go to position " + bytesToSkip + " for step copy " + data.stepNumber);
long bytesSkipped = 0L;
while (bytesSkipped < bytesToSkip) {
long n = data.gzis.skip(bytesToSkip - bytesSkipped);
if (n <= 0) {
// EOF in this file, can't read a block in this step copy
data.eofReached = true;
return false;
}
bytesSkipped += n;
}
// Keep track of the file pointer!
//
data.fileReadPosition += bytesSkipped;
// Reset the bytes read in the current block of data
//
data.totalBytesRead = 0L;
// Skip the first row until the next CR
//
readOneRow(false);
} else {
// Reset the bytes read in the current block of data
//
data.totalBytesRead = 0L;
//
if (meta.isHeaderPresent()) {
readOneRow(false);
}
}
} else {
// Just one block: read it all until we hit an EOF.
//
// 9,223,372,036 GB
data.blockSize = Long.MAX_VALUE;
//
if (meta.isHeaderPresent()) {
readOneRow(false);
}
}
// Add filename to result filenames ?
if (meta.isAddResultFile()) {
ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_GENERAL, fileObject, getTransMeta().getName(), toString());
resultFile.setComment("File was read by a Csv input step");
addResultFile(resultFile);
}
// Reset the row number pointer...
//
data.rowNumber = 1L;
return true;
} catch (Exception e) {
throw new KettleException(e);
}
}
use of org.apache.commons.vfs2.FileObject in project pentaho-kettle by pentaho.
the class TextFileInputTest method deleteVfsFile.
private static void deleteVfsFile(String path) throws Exception {
FileObject fileObject = TestUtils.getFileObject(path);
fileObject.close();
fileObject.delete();
}
use of org.apache.commons.vfs2.FileObject in project pentaho-kettle by pentaho.
the class BaseParsingTest method getFile.
/**
* Resolve file from test directory.
*/
protected FileObject getFile(String filename) throws Exception {
URL res = this.getClass().getResource(inPrefix + filename);
assertNotNull("There is no file", res);
FileObject file = fs.resolveFile(res.toExternalForm());
assertNotNull("There is no file", file);
return file;
}
use of org.apache.commons.vfs2.FileObject in project pentaho-kettle by pentaho.
the class FileInputList method getRequiredFilesDescription.
public static String getRequiredFilesDescription(List<FileObject> nonExistantFiles) {
StringBuilder buffer = new StringBuilder();
for (Iterator<FileObject> iter = nonExistantFiles.iterator(); iter.hasNext(); ) {
FileObject file = iter.next();
buffer.append(Const.optionallyDecodeUriString(file.getName().getURI()));
buffer.append(Const.CR);
}
return buffer.toString();
}
use of org.apache.commons.vfs2.FileObject in project pentaho-kettle by pentaho.
the class FileInputList method createFolderList.
public static FileInputList createFolderList(VariableSpace space, String[] folderName, String[] folderRequired) {
FileInputList fileInputList = new FileInputList();
// Replace possible environment variables...
final String[] realfolder = space.environmentSubstitute(folderName);
for (int i = 0; i < realfolder.length; i++) {
final String onefile = realfolder[i];
final boolean onerequired = YES.equalsIgnoreCase(folderRequired[i]);
final boolean subdirs = true;
final FileTypeFilter filter = FileTypeFilter.ONLY_FOLDERS;
if (Utils.isEmpty(onefile)) {
continue;
}
FileObject directoryFileObject = null;
try {
// Find all folder names in this directory
//
directoryFileObject = KettleVFS.getFileObject(onefile, space);
if (directoryFileObject != null && directoryFileObject.getType() == FileType.FOLDER) {
// it's a directory
FileObject[] fileObjects = directoryFileObject.findFiles(new AllFileSelector() {
@Override
public boolean traverseDescendents(FileSelectInfo info) {
return (info.getDepth() == 0 || subdirs) && // Check if one has permission to list this folder
hasAccess(info.getFile());
}
private boolean hasAccess(FileObject fileObject) {
try {
if (fileObject instanceof LocalFile) {
// fileObject.isReadable wrongly returns true in windows file system even if not readable
return Files.isReadable(Paths.get((new File(fileObject.getName().getPath())).toURI()));
}
return fileObject.isReadable();
} catch (FileSystemException e) {
// Something went wrong... well, let's assume "no access"!
return false;
}
}
@Override
public boolean includeFile(FileSelectInfo info) {
// Never return the parent directory of a file list.
if (info.getDepth() == 0) {
return false;
}
FileObject fileObject = info.getFile();
try {
return (fileObject != null && // Is this an allowed type?
filter.isFileTypeAllowed(fileObject.getType()) && // Check if one has permission to access it
hasAccess(fileObject));
} catch (IOException ex) {
// Upon error don't process the file.
return false;
}
}
});
if (fileObjects != null) {
for (int j = 0; j < fileObjects.length; j++) {
if (fileObjects[j].exists()) {
fileInputList.addFile(fileObjects[j]);
}
}
}
if (Utils.isEmpty(fileObjects)) {
if (onerequired) {
fileInputList.addNonAccessibleFile(directoryFileObject);
}
}
// Sort the list: quicksort, only for regular files
fileInputList.sortFiles();
} else {
if (onerequired && !directoryFileObject.exists()) {
fileInputList.addNonExistantFile(directoryFileObject);
}
}
} catch (Exception e) {
log.logError(Const.getStackTracker(e));
} finally {
try {
if (directoryFileObject != null) {
directoryFileObject.close();
}
directoryFileObject = null;
} catch (Exception e) {
// Ignore
}
}
}
return fileInputList;
}
Aggregations