Search in sources :

Example 36 with KettleFileException

use of org.pentaho.di.core.exception.KettleFileException in project pentaho-kettle by pentaho.

the class TextFileInput method getLine.

public static final String getLine(LogChannelInterface log, InputStreamReader reader, EncodingType encodingType, int formatNr, StringBuilder line) throws KettleFileException {
    int c = 0;
    line.setLength(0);
    try {
        switch(formatNr) {
            case TextFileInputMeta.FILE_FORMAT_DOS:
                while (c >= 0) {
                    c = reader.read();
                    if (encodingType.isReturn(c) || encodingType.isLinefeed(c)) {
                        // skip \n and \r
                        c = reader.read();
                        if (!encodingType.isReturn(c) && !encodingType.isLinefeed(c)) {
                            // so we have pulled a character from the next line
                            throw new KettleFileException(BaseMessages.getString(PKG, "TextFileInput.Log.SingleLineFound"));
                        }
                        return line.toString();
                    }
                    if (c >= 0) {
                        line.append((char) c);
                    }
                }
                break;
            case TextFileInputMeta.FILE_FORMAT_UNIX:
                while (c >= 0) {
                    c = reader.read();
                    if (encodingType.isLinefeed(c) || encodingType.isReturn(c)) {
                        return line.toString();
                    }
                    if (c >= 0) {
                        line.append((char) c);
                    }
                }
                break;
            case TextFileInputMeta.FILE_FORMAT_MIXED:
                // not for MAC OS 9 but works for Mac OS X. Mac OS 9 can use UNIX-Format
                while (c >= 0) {
                    c = reader.read();
                    if (encodingType.isLinefeed(c)) {
                        return line.toString();
                    } else if (!encodingType.isReturn(c)) {
                        if (c >= 0) {
                            line.append((char) c);
                        }
                    }
                }
                break;
            default:
                break;
        }
    } catch (KettleFileException e) {
        throw e;
    } catch (Exception e) {
        if (line.length() == 0) {
            throw new KettleFileException(BaseMessages.getString(PKG, "TextFileInput.Log.Error.ExceptionReadingLine", e.toString()), e);
        }
        return line.toString();
    }
    if (line.length() > 0) {
        return line.toString();
    }
    return null;
}
Also used : KettleFileException(org.pentaho.di.core.exception.KettleFileException) KettleException(org.pentaho.di.core.exception.KettleException) KettleFileException(org.pentaho.di.core.exception.KettleFileException)

Example 37 with KettleFileException

use of org.pentaho.di.core.exception.KettleFileException in project pentaho-kettle by pentaho.

the class TextFileInput method processRow.

@Override
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
    data = (TextFileInputData) sdi;
    meta = (TextFileInputMeta) smi;
    Object[] r = null;
    boolean retval = true;
    boolean putrow = false;
    if (first) {
        // we just got started
        first = false;
        data.outputRowMeta = new RowMeta();
        RowMetaInterface[] infoStep = null;
        if (meta.isAcceptingFilenames()) {
            // Read the files from the specified input stream...
            // 
            data.getFiles().getFiles().clear();
            int idx = -1;
            data.rowSet = findInputRowSet(meta.getAcceptingStepName());
            Object[] fileRow = getRowFrom(data.rowSet);
            while (fileRow != null) {
                RowMetaInterface prevInfoFields = data.rowSet.getRowMeta();
                if (idx < 0) {
                    if (meta.isPassingThruFields()) {
                        data.passThruFields = new HashMap<FileObject, Object[]>();
                        infoStep = new RowMetaInterface[] { prevInfoFields };
                        data.nrPassThruFields = prevInfoFields.size();
                    }
                    idx = prevInfoFields.indexOfValue(meta.getAcceptingField());
                    if (idx < 0) {
                        logError(BaseMessages.getString(PKG, "TextFileInput.Log.Error.UnableToFindFilenameField", meta.getAcceptingField()));
                        setErrors(getErrors() + 1);
                        stopAll();
                        return false;
                    }
                }
                String fileValue = prevInfoFields.getString(fileRow, idx);
                try {
                    FileObject fileObject = KettleVFS.getFileObject(fileValue, getTransMeta());
                    data.getFiles().addFile(fileObject);
                    if (meta.isPassingThruFields()) {
                        data.passThruFields.put(fileObject, fileRow);
                    }
                } catch (KettleFileException e) {
                    logError(BaseMessages.getString(PKG, "TextFileInput.Log.Error.UnableToCreateFileObject", fileValue), e);
                }
                // Grab another row
                fileRow = getRowFrom(data.rowSet);
            }
            if (data.getFiles().nrOfFiles() == 0) {
                if (log.isDetailed()) {
                    logDetailed(BaseMessages.getString(PKG, "TextFileInput.Log.Error.NoFilesSpecified"));
                }
                setOutputDone();
                return false;
            }
        }
        // // get the metadata populated. Simple and easy.
        meta.getFields(data.outputRowMeta, getStepname(), infoStep, null, this, repository, metaStore);
        // Create convert meta-data objects that will contain Date & Number formatters
        // 
        data.convertRowMeta = data.outputRowMeta.cloneToType(ValueMetaInterface.TYPE_STRING);
        handleMissingFiles();
        // if it fails and not set to skip bad files...
        if (!openNextFile()) {
            if (failAfterBadFile(null)) {
                closeLastFile();
                setOutputDone();
                return false;
            }
        }
        // Count the number of repeat fields...
        for (int i = 0; i < meta.getInputFields().length; i++) {
            if (meta.getInputFields()[i].isRepeated()) {
                data.nr_repeats++;
            }
        }
    } else {
        if (!data.doneReading) {
            int repeats = 1;
            if (meta.isLineWrapped()) {
                repeats = meta.getNrWraps() > 0 ? meta.getNrWraps() : repeats;
            }
            if (!data.doneWithHeader && data.headerLinesRead == 0) {
                // We are just starting to read header lines, read them all
                repeats += meta.getNrHeaderLines() + 1;
            }
            // Read a number of lines...
            for (int i = 0; i < repeats && !data.doneReading; i++) {
                if (!tryToReadLine(true)) {
                    repeats++;
                }
            }
        }
    }
    /*
     * If the buffer is empty: open the next file. (if nothing in there, open the next, etc.)
     */
    while (data.lineBuffer.size() == 0) {
        if (!openNextFile()) {
            // Open fails: done processing unless set to skip bad files
            if (failAfterBadFile(null)) {
                closeLastFile();
                // signal end to receiver(s)
                setOutputDone();
                return false;
            }
        // else will continue until can open
        }
    }
    /*
     * Take the first line available in the buffer & remove the line from the buffer
     */
    TextFileLine textLine = data.lineBuffer.get(0);
    incrementLinesInput();
    lineNumberInFile++;
    data.lineBuffer.remove(0);
    if (meta.isLayoutPaged()) {
        /*
       * Different rules apply: on each page: a header a number of data lines a footer
       */
        if (!data.doneWithHeader && data.pageLinesRead == 0) {
            // We are reading header lines
            if (log.isRowLevel()) {
                logRowlevel("P-HEADER (" + data.headerLinesRead + ") : " + textLine.line);
            }
            data.headerLinesRead++;
            if (data.headerLinesRead >= meta.getNrHeaderLines()) {
                data.doneWithHeader = true;
            }
        } else {
            if (data.pageLinesRead < meta.getNrLinesPerPage()) {
                // See if we are dealing with wrapped lines:
                if (meta.isLineWrapped()) {
                    for (int i = 0; i < meta.getNrWraps(); i++) {
                        String extra = "";
                        if (data.lineBuffer.size() > 0) {
                            extra = data.lineBuffer.get(0).line;
                            data.lineBuffer.remove(0);
                        }
                        textLine.line += extra;
                    }
                }
                if (log.isRowLevel()) {
                    logRowlevel("P-DATA: " + textLine.line);
                }
                // Read a normal line on a page of data.
                data.pageLinesRead++;
                data.lineInFile++;
                long useNumber = meta.isRowNumberByFile() ? data.lineInFile : getLinesWritten() + 1;
                r = convertLineToRow(log, textLine, meta, data.currentPassThruFieldsRow, data.nrPassThruFields, data.outputRowMeta, data.convertRowMeta, data.filename, useNumber, data.separator, data.enclosure, data.escapeCharacter, data.dataErrorLineHandler, data.addShortFilename, data.addExtension, data.addPath, data.addSize, data.addIsHidden, data.addLastModificationDate, data.addUri, data.addRootUri, data.shortFilename, data.path, data.hidden, data.lastModificationDateTime, data.uriName, data.rootUriName, data.extension, data.size);
                if (r != null) {
                    putrow = true;
                }
                // done reading data.
                if (!meta.hasFooter() && (data.pageLinesRead == meta.getNrLinesPerPage())) {
                    /*
             * OK, we are done reading the footer lines, start again on 'next page' with the header
             */
                    data.doneWithHeader = false;
                    data.headerLinesRead = 0;
                    data.pageLinesRead = 0;
                    data.footerLinesRead = 0;
                    if (log.isRowLevel()) {
                        logRowlevel("RESTART PAGE");
                    }
                }
            } else {
                if (meta.hasFooter() && data.footerLinesRead < meta.getNrFooterLines()) {
                    if (log.isRowLevel()) {
                        logRowlevel("P-FOOTER: " + textLine.line);
                    }
                    data.footerLinesRead++;
                }
                if (!meta.hasFooter() || data.footerLinesRead >= meta.getNrFooterLines()) {
                    /*
             * OK, we are done reading the footer lines, start again on 'next page' with the header
             */
                    data.doneWithHeader = false;
                    data.headerLinesRead = 0;
                    data.pageLinesRead = 0;
                    data.footerLinesRead = 0;
                    if (log.isRowLevel()) {
                        logRowlevel("RESTART PAGE");
                    }
                }
            }
        }
    } else {
        if (!data.doneWithHeader) {
            // We are reading header lines
            data.headerLinesRead++;
            if (data.headerLinesRead >= meta.getNrHeaderLines()) {
                data.doneWithHeader = true;
            }
        } else {
            /*
         * IF we are done reading and we have a footer AND the number of lines in the buffer is smaller then the number
         * of footer lines THEN we can remove the remaining rows from the buffer: they are all footer rows.
         */
            if (data.doneReading && meta.hasFooter() && data.lineBuffer.size() < meta.getNrFooterLines()) {
                data.lineBuffer.clear();
            } else {
                // See if we are dealing with wrapped lines:
                if (meta.isLineWrapped()) {
                    for (int i = 0; i < meta.getNrWraps(); i++) {
                        String extra = "";
                        if (data.lineBuffer.size() > 0) {
                            extra = data.lineBuffer.get(0).line;
                            data.lineBuffer.remove(0);
                        } else {
                            tryToReadLine(true);
                            if (!data.lineBuffer.isEmpty()) {
                                extra = data.lineBuffer.remove(0).line;
                            }
                        }
                        textLine.line += extra;
                    }
                }
                if (data.filePlayList.isProcessingNeeded(textLine.file, textLine.lineNumber, AbstractFileErrorHandler.NO_PARTS)) {
                    data.lineInFile++;
                    long useNumber = meta.isRowNumberByFile() ? data.lineInFile : getLinesWritten() + 1;
                    r = convertLineToRow(log, textLine, meta, data.currentPassThruFieldsRow, data.nrPassThruFields, data.outputRowMeta, data.convertRowMeta, data.filename, useNumber, data.separator, data.enclosure, data.escapeCharacter, data.dataErrorLineHandler, data.addShortFilename, data.addExtension, data.addPath, data.addSize, data.addIsHidden, data.addLastModificationDate, data.addUri, data.addRootUri, data.shortFilename, data.path, data.hidden, data.lastModificationDateTime, data.uriName, data.rootUriName, data.extension, data.size);
                    if (r != null) {
                        if (log.isRowLevel()) {
                            logRowlevel("Found data row: " + data.outputRowMeta.getString(r));
                        }
                        putrow = true;
                    }
                } else {
                    putrow = false;
                }
            }
        }
    }
    if (putrow && r != null) {
        // See if the previous values need to be repeated!
        if (data.nr_repeats > 0) {
            if (data.previous_row == null) {
                // First invocation...
                data.previous_row = data.outputRowMeta.cloneRow(r);
            } else {
                // int repnr = 0;
                for (int i = 0; i < meta.getInputFields().length; i++) {
                    if (meta.getInputFields()[i].isRepeated()) {
                        if (r[i] == null) {
                            // if it is empty: take the previous value!
                            r[i] = data.previous_row[i];
                        } else {
                            // not empty: change the previous_row entry!
                            data.previous_row[i] = r[i];
                        }
                    // repnr++;
                    }
                }
            }
        }
        if (log.isRowLevel()) {
            logRowlevel("Putting row: " + data.outputRowMeta.getString(r));
        }
        putRow(data.outputRowMeta, r);
        if (getLinesInput() >= meta.getRowLimit() && meta.getRowLimit() > 0) {
            closeLastFile();
            // signal end to receiver(s)
            setOutputDone();
            return false;
        }
    }
    if (checkFeedback(getLinesInput())) {
        if (log.isBasic()) {
            logBasic("linenr " + getLinesInput());
        }
    }
    return retval;
}
Also used : KettleFileException(org.pentaho.di.core.exception.KettleFileException) RowMeta(org.pentaho.di.core.row.RowMeta) RowMetaInterface(org.pentaho.di.core.row.RowMetaInterface) ValueMetaString(org.pentaho.di.core.row.value.ValueMetaString) FileObject(org.apache.commons.vfs2.FileObject) FileObject(org.apache.commons.vfs2.FileObject)

Example 38 with KettleFileException

use of org.pentaho.di.core.exception.KettleFileException in project pentaho-metaverse by pentaho.

the class VfsLineageCollector method compressArtifacts.

@Override
public void compressArtifacts(List<String> paths, OutputStream os) {
    ZipOutputStream zos = null;
    try {
        FileSystemOptions opts = new FileSystemOptions();
        zos = new ZipOutputStream(os);
        for (String path : paths) {
            FileObject file = KettleVFS.getFileObject(path, opts);
            try {
                // register the file as an entry in the zip file
                ZipEntry zipEntry = new ZipEntry(file.getName().getPath());
                zos.putNextEntry(zipEntry);
                // write the file's bytes to the zip stream
                InputStream fis = file.getContent().getInputStream();
                zos.write(IOUtils.toByteArray(fis));
            } catch (IOException e) {
                log.error(Messages.getString("ERROR.FailedAddingFileToZip", file.getName().getPath()));
            } finally {
                // indicate we are done with this file
                try {
                    zos.closeEntry();
                } catch (IOException e) {
                    log.error(Messages.getString("ERROR.FailedToProperlyCloseZipEntry", file.getName().getPath()));
                }
            }
        }
    } catch (KettleFileException e) {
        log.error(Messages.getString("ERROR.UnexpectedVfsError", e.getMessage()));
    } finally {
        IOUtils.closeQuietly(zos);
    }
}
Also used : KettleFileException(org.pentaho.di.core.exception.KettleFileException) ZipOutputStream(java.util.zip.ZipOutputStream) InputStream(java.io.InputStream) ZipEntry(java.util.zip.ZipEntry) FileObject(org.apache.commons.vfs2.FileObject) IOException(java.io.IOException) FileSystemOptions(org.apache.commons.vfs2.FileSystemOptions)

Example 39 with KettleFileException

use of org.pentaho.di.core.exception.KettleFileException in project pentaho-metaverse by pentaho.

the class KettleAnalyzerUtil method getResourcesFromMeta.

public static Collection<IExternalResourceInfo> getResourcesFromMeta(final BaseFileInputMeta meta, final IAnalysisContext context) {
    Collection<IExternalResourceInfo> resources = Collections.emptyList();
    final StepMeta parentStepMeta = meta.getParentStepMeta();
    if (parentStepMeta != null) {
        final TransMeta parentTransMeta = parentStepMeta.getParentTransMeta();
        if (parentTransMeta != null) {
            final FileInputList inputList = meta.getFileInputList(parentTransMeta);
            if (inputList != null) {
                final String[] paths = inputList.getFileStrings();
                if (paths != null) {
                    resources = new ArrayList<>(paths.length);
                    for (final String path : paths) {
                        if (!Const.isEmpty(path)) {
                            try {
                                final IExternalResourceInfo resource = ExternalResourceInfoFactory.createFileResource(KettleVFS.getFileObject(path), true);
                                if (resource != null) {
                                    resources.add(resource);
                                } else {
                                    throw new KettleFileException("Error getting file resource!");
                                }
                            } catch (KettleFileException kfe) {
                            // TODO throw or ignore?
                            }
                        }
                    }
                }
            }
        }
    }
    return resources;
}
Also used : KettleFileException(org.pentaho.di.core.exception.KettleFileException) IExternalResourceInfo(org.pentaho.metaverse.api.model.IExternalResourceInfo) TransMeta(org.pentaho.di.trans.TransMeta) StepMeta(org.pentaho.di.trans.step.StepMeta) FileInputList(org.pentaho.di.core.fileinput.FileInputList)

Example 40 with KettleFileException

use of org.pentaho.di.core.exception.KettleFileException in project pentaho-kettle by pentaho.

the class OraBulkLoader method createCommandLine.

/**
 * Create the command line for an sqlldr process depending on the meta information supplied.
 *
 * @param meta
 *          The meta data to create the command line from
 * @param password
 *          Use the real password or not
 *
 * @return The string to execute.
 *
 * @throws KettleException
 *           Upon any exception
 */
public String createCommandLine(OraBulkLoaderMeta meta, boolean password) throws KettleException {
    StringBuilder sb = new StringBuilder(300);
    if (meta.getSqlldr() != null) {
        try {
            FileObject fileObject = KettleVFS.getFileObject(environmentSubstitute(meta.getSqlldr()), getTransMeta());
            String sqlldr = KettleVFS.getFilename(fileObject);
            sb.append(sqlldr);
        } catch (KettleFileException ex) {
            throw new KettleException("Error retrieving sqlldr string", ex);
        }
    } else {
        throw new KettleException("No sqlldr application specified");
    }
    if (meta.getControlFile() != null) {
        try {
            FileObject fileObject = KettleVFS.getFileObject(environmentSubstitute(meta.getControlFile()), getTransMeta());
            sb.append(" control=\'");
            sb.append(KettleVFS.getFilename(fileObject));
            sb.append("\'");
        } catch (KettleFileException ex) {
            throw new KettleException("Error retrieving controlfile string", ex);
        }
    } else {
        throw new KettleException("No control file specified");
    }
    if (OraBulkLoaderMeta.METHOD_AUTO_CONCURRENT.equals(meta.getLoadMethod())) {
        sb.append(" data=\'-\'");
    }
    if (meta.getLogFile() != null) {
        try {
            FileObject fileObject = KettleVFS.getFileObject(environmentSubstitute(meta.getLogFile()), getTransMeta());
            sb.append(" log=\'");
            sb.append(KettleVFS.getFilename(fileObject));
            sb.append("\'");
        } catch (KettleFileException ex) {
            throw new KettleException("Error retrieving logfile string", ex);
        }
    }
    if (meta.getBadFile() != null) {
        try {
            FileObject fileObject = KettleVFS.getFileObject(environmentSubstitute(meta.getBadFile()), getTransMeta());
            sb.append(" bad=\'");
            sb.append(KettleVFS.getFilename(fileObject));
            sb.append("\'");
        } catch (KettleFileException ex) {
            throw new KettleException("Error retrieving badfile string", ex);
        }
    }
    if (meta.getDiscardFile() != null) {
        try {
            FileObject fileObject = KettleVFS.getFileObject(environmentSubstitute(meta.getDiscardFile()), getTransMeta());
            sb.append(" discard=\'");
            sb.append(KettleVFS.getFilename(fileObject));
            sb.append("\'");
        } catch (KettleFileException ex) {
            throw new KettleException("Error retrieving discardfile string", ex);
        }
    }
    DatabaseMeta dm = meta.getDatabaseMeta();
    if (dm != null) {
        String user = Const.NVL(dm.getUsername(), "");
        String pass = Const.NVL(Encr.decryptPasswordOptionallyEncrypted(environmentSubstitute(dm.getPassword())), "");
        if (!password) {
            pass = "******";
        }
        String dns = Const.NVL(dm.getDatabaseName(), "");
        sb.append(" userid=").append(environmentSubstitute(user)).append("/").append(environmentSubstitute(pass)).append("@");
        String overrideName = meta.getDbNameOverride();
        if (Utils.isEmpty(Const.rtrim(overrideName))) {
            sb.append(environmentSubstitute(dns));
        } else {
            // if the database name override is filled in, do that one.
            sb.append(environmentSubstitute(overrideName));
        }
    } else {
        throw new KettleException("No connection specified");
    }
    if (meta.isDirectPath()) {
        sb.append(" DIRECT=TRUE");
        if (getStepMeta().getCopies() > 1 || meta.isParallel()) {
            sb.append(" PARALLEL=TRUE");
        }
    }
    return sb.toString();
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) KettleFileException(org.pentaho.di.core.exception.KettleFileException) FileObject(org.apache.commons.vfs2.FileObject) DatabaseMeta(org.pentaho.di.core.database.DatabaseMeta)

Aggregations

KettleFileException (org.pentaho.di.core.exception.KettleFileException)61 IOException (java.io.IOException)32 FileObject (org.apache.commons.vfs2.FileObject)30 KettleException (org.pentaho.di.core.exception.KettleException)25 FileSystemException (org.apache.commons.vfs2.FileSystemException)10 DataInputStream (java.io.DataInputStream)8 File (java.io.File)7 ResultFile (org.pentaho.di.core.ResultFile)7 SocketTimeoutException (java.net.SocketTimeoutException)6 FileInputStream (java.io.FileInputStream)5 InputStream (java.io.InputStream)5 KettleXMLException (org.pentaho.di.core.exception.KettleXMLException)5 BufferedInputStream (java.io.BufferedInputStream)4 InputStreamReader (java.io.InputStreamReader)4 GZIPInputStream (java.util.zip.GZIPInputStream)4 KettleDatabaseException (org.pentaho.di.core.exception.KettleDatabaseException)4 KettleEOFException (org.pentaho.di.core.exception.KettleEOFException)4 ValueMetaInterface (org.pentaho.di.core.row.ValueMetaInterface)4 DataOutputStream (java.io.DataOutputStream)3 FileNotFoundException (java.io.FileNotFoundException)3