Search in sources :

Example 71 with FileObject

use of org.apache.commons.vfs2.FileObject in project pentaho-kettle by pentaho.

the class ParGzipCsvInput method openNextFile.

private boolean openNextFile() throws KettleException {
    try {
        // Close the previous file...
        // 
        closeFile();
        if (data.filenr >= data.filenames.length) {
            return false;
        }
        // Open the next one...
        // 
        logBasic("Opening file #" + data.filenr + " : " + data.filenames[data.filenr]);
        FileObject fileObject = KettleVFS.getFileObject(data.filenames[data.filenr], getTransMeta());
        data.fis = KettleVFS.getInputStream(fileObject);
        if (meta.isLazyConversionActive()) {
            data.binaryFilename = data.filenames[data.filenr].getBytes();
        }
        data.gzis = new GZIPInputStream(data.fis, data.bufferSize);
        clearBuffer();
        data.fileReadPosition = 0L;
        data.blockNr = 0;
        data.eofReached = false;
        // Skip to the next file...
        // 
        data.filenr++;
        // 
        if (data.parallel) {
            // Calculate the first block of data to read from the file
            // If the buffer size is 500, we read 0-499 for the first file,
            // 500-999 for the second, 1000-1499 for the third, etc.
            // 
            // After that we need to get 1500-1999 for the first step again,
            // 2000-2499 for the second, 2500-2999 for the third, etc.
            // 
            // This is equivalent :
            // 
            // FROM : stepNumber * bufferSize + blockNr*bufferSize*nrOfSteps
            // TO : FROM + bufferSize - 1
            // 
            // Example : step 0, block 0, size 500:
            // From: 0*500+0*500*3=0 To: 0+500-1=499
            // 
            // Example : step 0, block 1, size 500:
            // From: 0*500+1*500*3=1500 To: 1500+500-1=1999
            // 
            // So our first act is to skip to the correct position in the compressed stream...
            // 
            // for now.
            data.blockSize = 2 * data.bufferSize;
            long bytesToSkip = data.stepNumber * data.blockSize;
            if (bytesToSkip > 0) {
                // Get into position for block 0
                // 
                logBasic("Skipping " + bytesToSkip + " bytes to go to position " + bytesToSkip + " for step copy " + data.stepNumber);
                long bytesSkipped = 0L;
                while (bytesSkipped < bytesToSkip) {
                    long n = data.gzis.skip(bytesToSkip - bytesSkipped);
                    if (n <= 0) {
                        // EOF in this file, can't read a block in this step copy
                        data.eofReached = true;
                        return false;
                    }
                    bytesSkipped += n;
                }
                // Keep track of the file pointer!
                // 
                data.fileReadPosition += bytesSkipped;
                // Reset the bytes read in the current block of data
                // 
                data.totalBytesRead = 0L;
                // Skip the first row until the next CR
                // 
                readOneRow(false);
            } else {
                // Reset the bytes read in the current block of data
                // 
                data.totalBytesRead = 0L;
                // 
                if (meta.isHeaderPresent()) {
                    readOneRow(false);
                }
            }
        } else {
            // Just one block: read it all until we hit an EOF.
            // 
            // 9,223,372,036 GB
            data.blockSize = Long.MAX_VALUE;
            // 
            if (meta.isHeaderPresent()) {
                readOneRow(false);
            }
        }
        // Add filename to result filenames ?
        if (meta.isAddResultFile()) {
            ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_GENERAL, fileObject, getTransMeta().getName(), toString());
            resultFile.setComment("File was read by a Csv input step");
            addResultFile(resultFile);
        }
        // Reset the row number pointer...
        // 
        data.rowNumber = 1L;
        return true;
    } catch (Exception e) {
        throw new KettleException(e);
    }
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) KettleException(org.pentaho.di.core.exception.KettleException) FileObject(org.apache.commons.vfs2.FileObject) ResultFile(org.pentaho.di.core.ResultFile) KettleException(org.pentaho.di.core.exception.KettleException) IOException(java.io.IOException) KettleFileException(org.pentaho.di.core.exception.KettleFileException)

Example 72 with FileObject

use of org.apache.commons.vfs2.FileObject in project pentaho-kettle by pentaho.

the class TextFileInputTest method deleteVfsFile.

private static void deleteVfsFile(String path) throws Exception {
    FileObject fileObject = TestUtils.getFileObject(path);
    fileObject.close();
    fileObject.delete();
}
Also used : FileObject(org.apache.commons.vfs2.FileObject)

Example 73 with FileObject

use of org.apache.commons.vfs2.FileObject in project pentaho-kettle by pentaho.

the class BaseParsingTest method getFile.

/**
 * Resolve file from test directory.
 */
protected FileObject getFile(String filename) throws Exception {
    URL res = this.getClass().getResource(inPrefix + filename);
    assertNotNull("There is no file", res);
    FileObject file = fs.resolveFile(res.toExternalForm());
    assertNotNull("There is no file", file);
    return file;
}
Also used : FileObject(org.apache.commons.vfs2.FileObject) URL(java.net.URL)

Example 74 with FileObject

use of org.apache.commons.vfs2.FileObject in project pentaho-kettle by pentaho.

the class FileInputList method getRequiredFilesDescription.

public static String getRequiredFilesDescription(List<FileObject> nonExistantFiles) {
    StringBuilder buffer = new StringBuilder();
    for (Iterator<FileObject> iter = nonExistantFiles.iterator(); iter.hasNext(); ) {
        FileObject file = iter.next();
        buffer.append(Const.optionallyDecodeUriString(file.getName().getURI()));
        buffer.append(Const.CR);
    }
    return buffer.toString();
}
Also used : CompressedFileFileObject(org.apache.commons.vfs2.provider.compressed.CompressedFileFileObject) FileObject(org.apache.commons.vfs2.FileObject)

Example 75 with FileObject

use of org.apache.commons.vfs2.FileObject in project pentaho-kettle by pentaho.

the class FileInputList method createFolderList.

public static FileInputList createFolderList(VariableSpace space, String[] folderName, String[] folderRequired) {
    FileInputList fileInputList = new FileInputList();
    // Replace possible environment variables...
    final String[] realfolder = space.environmentSubstitute(folderName);
    for (int i = 0; i < realfolder.length; i++) {
        final String onefile = realfolder[i];
        final boolean onerequired = YES.equalsIgnoreCase(folderRequired[i]);
        final boolean subdirs = true;
        final FileTypeFilter filter = FileTypeFilter.ONLY_FOLDERS;
        if (Utils.isEmpty(onefile)) {
            continue;
        }
        FileObject directoryFileObject = null;
        try {
            // Find all folder names in this directory
            // 
            directoryFileObject = KettleVFS.getFileObject(onefile, space);
            if (directoryFileObject != null && directoryFileObject.getType() == FileType.FOLDER) {
                // it's a directory
                FileObject[] fileObjects = directoryFileObject.findFiles(new AllFileSelector() {

                    @Override
                    public boolean traverseDescendents(FileSelectInfo info) {
                        return (info.getDepth() == 0 || subdirs) && // Check if one has permission to list this folder
                        hasAccess(info.getFile());
                    }

                    private boolean hasAccess(FileObject fileObject) {
                        try {
                            if (fileObject instanceof LocalFile) {
                                // fileObject.isReadable wrongly returns true in windows file system even if not readable
                                return Files.isReadable(Paths.get((new File(fileObject.getName().getPath())).toURI()));
                            }
                            return fileObject.isReadable();
                        } catch (FileSystemException e) {
                            // Something went wrong... well, let's assume "no access"!
                            return false;
                        }
                    }

                    @Override
                    public boolean includeFile(FileSelectInfo info) {
                        // Never return the parent directory of a file list.
                        if (info.getDepth() == 0) {
                            return false;
                        }
                        FileObject fileObject = info.getFile();
                        try {
                            return (fileObject != null && // Is this an allowed type?
                            filter.isFileTypeAllowed(fileObject.getType()) && // Check if one has permission to access it
                            hasAccess(fileObject));
                        } catch (IOException ex) {
                            // Upon error don't process the file.
                            return false;
                        }
                    }
                });
                if (fileObjects != null) {
                    for (int j = 0; j < fileObjects.length; j++) {
                        if (fileObjects[j].exists()) {
                            fileInputList.addFile(fileObjects[j]);
                        }
                    }
                }
                if (Utils.isEmpty(fileObjects)) {
                    if (onerequired) {
                        fileInputList.addNonAccessibleFile(directoryFileObject);
                    }
                }
                // Sort the list: quicksort, only for regular files
                fileInputList.sortFiles();
            } else {
                if (onerequired && !directoryFileObject.exists()) {
                    fileInputList.addNonExistantFile(directoryFileObject);
                }
            }
        } catch (Exception e) {
            log.logError(Const.getStackTracker(e));
        } finally {
            try {
                if (directoryFileObject != null) {
                    directoryFileObject.close();
                }
                directoryFileObject = null;
            } catch (Exception e) {
            // Ignore
            }
        }
    }
    return fileInputList;
}
Also used : IOException(java.io.IOException) FileSelectInfo(org.apache.commons.vfs2.FileSelectInfo) FileSystemException(org.apache.commons.vfs2.FileSystemException) IOException(java.io.IOException) LocalFile(org.apache.commons.vfs2.provider.local.LocalFile) FileSystemException(org.apache.commons.vfs2.FileSystemException) AllFileSelector(org.apache.commons.vfs2.AllFileSelector) CompressedFileFileObject(org.apache.commons.vfs2.provider.compressed.CompressedFileFileObject) FileObject(org.apache.commons.vfs2.FileObject) File(java.io.File) LocalFile(org.apache.commons.vfs2.provider.local.LocalFile)

Aggregations

FileObject (org.apache.commons.vfs2.FileObject)646 KettleException (org.pentaho.di.core.exception.KettleException)206 IOException (java.io.IOException)203 FileSystemException (org.apache.commons.vfs2.FileSystemException)173 KettleXMLException (org.pentaho.di.core.exception.KettleXMLException)104 KettleFileException (org.pentaho.di.core.exception.KettleFileException)97 Test (org.junit.Test)82 KettleDatabaseException (org.pentaho.di.core.exception.KettleDatabaseException)68 File (java.io.File)60 InputStream (java.io.InputStream)48 ValueMetaString (org.pentaho.di.core.row.value.ValueMetaString)37 KettleStepException (org.pentaho.di.core.exception.KettleStepException)36 ArrayList (java.util.ArrayList)35 ResultFile (org.pentaho.di.core.ResultFile)33 ILanguageImpl (org.metaborg.core.language.ILanguageImpl)32 Result (org.pentaho.di.core.Result)32 OutputStream (java.io.OutputStream)29 FileName (org.apache.commons.vfs2.FileName)29 KettleValueException (org.pentaho.di.core.exception.KettleValueException)29 IStrategoTerm (org.spoofax.interpreter.terms.IStrategoTerm)28