Search in sources :

Example 1 with CompressionProvider

use of org.pentaho.di.core.compress.CompressionProvider in project pentaho-kettle by pentaho.

the class TextFileInputDialog method getCSV.

// Get the data layout
private void getCSV() {
    TextFileInputMeta meta = new TextFileInputMeta();
    getInfo(meta, true);
    // CSV without separator defined
    if (meta.content.fileType.equalsIgnoreCase("CSV") && (meta.content.separator == null || meta.content.separator.isEmpty())) {
        MessageBox mb = new MessageBox(shell, SWT.OK | SWT.ICON_ERROR);
        mb.setMessage(BaseMessages.getString(PKG, "TextFileInput.Exception.NoSeparator"));
        mb.setText(BaseMessages.getString(PKG, "TextFileInputDialog.DialogTitle"));
        mb.open();
        return;
    }
    TextFileInputMeta previousMeta = (TextFileInputMeta) meta.clone();
    FileInputList textFileList = meta.getFileInputList(transMeta);
    InputStream fileInputStream;
    CompressionInputStream inputStream = null;
    StringBuilder lineStringBuilder = new StringBuilder(256);
    int fileFormatType = meta.getFileFormatTypeNr();
    String delimiter = transMeta.environmentSubstitute(meta.content.separator);
    String enclosure = transMeta.environmentSubstitute(meta.content.enclosure);
    String escapeCharacter = transMeta.environmentSubstitute(meta.content.escapeCharacter);
    if (textFileList.nrOfFiles() > 0) {
        int clearFields = meta.content.header ? SWT.YES : SWT.NO;
        int nrInputFields = meta.inputFields.length;
        if (nrInputFields > 0) {
            MessageBox mb = new MessageBox(shell, SWT.YES | SWT.NO | SWT.CANCEL | SWT.ICON_QUESTION);
            mb.setMessage(BaseMessages.getString(PKG, "TextFileInputDialog.ClearFieldList.DialogMessage"));
            mb.setText(BaseMessages.getString(PKG, "TextFileInputDialog.ClearFieldList.DialogTitle"));
            clearFields = mb.open();
            if (clearFields == SWT.CANCEL) {
                return;
            }
        }
        try {
            wFields.table.removeAll();
            FileObject fileObject = textFileList.getFile(0);
            fileInputStream = KettleVFS.getInputStream(fileObject);
            Table table = wFields.table;
            CompressionProvider provider = CompressionProviderFactory.getInstance().createCompressionProviderInstance(meta.content.fileCompression);
            inputStream = provider.createInputStream(fileInputStream);
            InputStreamReader reader;
            if (meta.getEncoding() != null && meta.getEncoding().length() > 0) {
                reader = new InputStreamReader(inputStream, meta.getEncoding());
            } else {
                reader = new InputStreamReader(inputStream);
            }
            EncodingType encodingType = EncodingType.guessEncodingType(reader.getEncoding());
            // Scan the header-line, determine fields...
            String line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineStringBuilder);
            if (line != null) {
                // Estimate the number of input fields...
                // Chop up the line using the delimiter
                String[] fields = TextFileInputUtils.guessStringsFromLine(transMeta, log, line, meta, delimiter, enclosure, escapeCharacter);
                for (int i = 0; i < fields.length; i++) {
                    String field = fields[i];
                    if (field == null || field.length() == 0 || !meta.content.header) {
                        field = "Field" + (i + 1);
                    } else {
                        // Trim the field
                        field = Const.trim(field);
                        // Replace all spaces & - with underscore _
                        field = Const.replace(field, " ", "_");
                        field = Const.replace(field, "-", "_");
                    }
                    TableItem item = new TableItem(table, SWT.NONE);
                    item.setText(1, field);
                    // The default type is String...
                    item.setText(2, "String");
                }
                wFields.setRowNums();
                wFields.optWidth(true);
                // Copy it...
                getInfo(meta, true);
                // Sample a few lines to determine the correct type of the fields...
                String shellText = BaseMessages.getString(PKG, "TextFileInputDialog.LinesToSample.DialogTitle");
                String lineText = BaseMessages.getString(PKG, "TextFileInputDialog.LinesToSample.DialogMessage");
                EnterNumberDialog end = new EnterNumberDialog(shell, 100, shellText, lineText);
                int samples = end.open();
                if (samples >= 0) {
                    getInfo(meta, true);
                    TextFileCSVImportProgressDialog pd = new TextFileCSVImportProgressDialog(shell, meta, transMeta, reader, samples, clearFields == SWT.YES);
                    String message = pd.open();
                    if (message != null) {
                        wFields.removeAll();
                        // OK, what's the result of our search?
                        getData(meta);
                        // 
                        if (clearFields == SWT.NO) {
                            getFieldsData(previousMeta, true);
                            wFields.table.setSelection(previousMeta.inputFields.length, wFields.table.getItemCount() - 1);
                        }
                        wFields.removeEmptyRows();
                        wFields.setRowNums();
                        wFields.optWidth(true);
                        EnterTextDialog etd = new EnterTextDialog(shell, BaseMessages.getString(PKG, "TextFileInputDialog.ScanResults.DialogTitle"), BaseMessages.getString(PKG, "TextFileInputDialog.ScanResults.DialogMessage"), message, true);
                        etd.setReadOnly();
                        etd.open();
                    }
                }
            } else {
                MessageBox mb = new MessageBox(shell, SWT.OK | SWT.ICON_ERROR);
                mb.setMessage(BaseMessages.getString(PKG, "TextFileInputDialog.UnableToReadHeaderLine.DialogMessage"));
                mb.setText(BaseMessages.getString(PKG, "System.Dialog.Error.Title"));
                mb.open();
            }
        } catch (IOException e) {
            new ErrorDialog(shell, BaseMessages.getString(PKG, "TextFileInputDialog.IOError.DialogTitle"), BaseMessages.getString(PKG, "TextFileInputDialog.IOError.DialogMessage"), e);
        } catch (KettleException e) {
            new ErrorDialog(shell, BaseMessages.getString(PKG, "System.Dialog.Error.Title"), BaseMessages.getString(PKG, "TextFileInputDialog.ErrorGettingFileDesc.DialogMessage"), e);
        } finally {
            try {
                if (inputStream != null) {
                    inputStream.close();
                }
            } catch (Exception e) {
            // Ignore errors
            }
        }
    } else {
        MessageBox mb = new MessageBox(shell, SWT.OK | SWT.ICON_ERROR);
        mb.setMessage(BaseMessages.getString(PKG, "TextFileInputDialog.NoValidFileFound.DialogMessage"));
        mb.setText(BaseMessages.getString(PKG, "System.Dialog.Error.Title"));
        mb.open();
    }
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) Table(org.eclipse.swt.widgets.Table) InputStreamReader(java.io.InputStreamReader) CompressionInputStream(org.pentaho.di.core.compress.CompressionInputStream) CompressionInputStream(org.pentaho.di.core.compress.CompressionInputStream) InputStream(java.io.InputStream) TableItem(org.eclipse.swt.widgets.TableItem) EncodingType(org.pentaho.di.trans.steps.fileinput.text.EncodingType) ErrorDialog(org.pentaho.di.ui.core.dialog.ErrorDialog) ValueMetaString(org.pentaho.di.core.row.value.ValueMetaString) IOException(java.io.IOException) KettleException(org.pentaho.di.core.exception.KettleException) IOException(java.io.IOException) MessageBox(org.eclipse.swt.widgets.MessageBox) CompressionProvider(org.pentaho.di.core.compress.CompressionProvider) TextFileInputMeta(org.pentaho.di.trans.steps.fileinput.text.TextFileInputMeta) EnterTextDialog(org.pentaho.di.ui.core.dialog.EnterTextDialog) FileObject(org.apache.commons.vfs2.FileObject) EnterNumberDialog(org.pentaho.di.ui.core.dialog.EnterNumberDialog) FileInputList(org.pentaho.di.core.fileinput.FileInputList)

Example 2 with CompressionProvider

use of org.pentaho.di.core.compress.CompressionProvider in project pentaho-kettle by pentaho.

the class TextFileInput method openNextFile.

private boolean openNextFile() {
    try {
        lineNumberInFile = 0;
        if (!closeLastFile() && failAfterBadFile(null)) {
            // (!meta.isSkipBadFiles() || data.isLastFile) ) return false;
            return false;
        }
        if (data.getFiles().nrOfFiles() == 0) {
            return false;
        }
        // Is this the last file?
        data.isLastFile = (data.filenr == data.getFiles().nrOfFiles() - 1);
        data.file = data.getFiles().getFile(data.filenr);
        data.filename = KettleVFS.getFilename(data.file);
        // Move file pointer ahead!
        data.filenr++;
        // Add additional fields?
        if (data.addShortFilename) {
            data.shortFilename = data.file.getName().getBaseName();
        }
        if (data.addPath) {
            data.path = KettleVFS.getFilename(data.file.getParent());
        }
        if (data.addIsHidden) {
            data.hidden = data.file.isHidden();
        }
        if (data.addExtension) {
            data.extension = data.file.getName().getExtension();
        }
        if (data.addLastModificationDate) {
            data.lastModificationDateTime = new Date(data.file.getContent().getLastModifiedTime());
        }
        if (data.addUri) {
            data.uriName = Const.optionallyDecodeUriString(data.file.getName().getURI());
        }
        if (data.addRootUri) {
            data.rootUriName = data.file.getName().getRootURI();
        }
        if (data.addSize) {
            data.size = new Long(data.file.getContent().getSize());
        }
        data.lineInFile = 0;
        if (meta.isPassingThruFields()) {
            data.currentPassThruFieldsRow = data.passThruFields.get(data.file);
        }
        // 
        if (meta.isAddResultFile()) {
            ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_GENERAL, data.file, getTransMeta().getName(), toString());
            resultFile.setComment("File was read by an Text File input step");
            addResultFile(resultFile);
        }
        if (log.isBasic()) {
            logBasic("Opening file: " + data.file.getName().getFriendlyURI());
        }
        CompressionProvider provider = CompressionProviderFactory.getInstance().getCompressionProviderByName(meta.getFileCompression());
        data.in = provider.createInputStream(KettleVFS.getInputStream(data.file));
        data.dataErrorLineHandler.handleFile(data.file);
        data.in.nextEntry();
        if (log.isDetailed()) {
            logDetailed("This is a compressed file being handled by the " + provider.getName() + " provider");
        }
        if (meta.getEncoding() != null && meta.getEncoding().length() > 0) {
            data.isr = new InputStreamReader(new BufferedInputStream(data.in, BUFFER_SIZE_INPUT_STREAM), meta.getEncoding());
        } else {
            data.isr = new InputStreamReader(new BufferedInputStream(data.in, BUFFER_SIZE_INPUT_STREAM));
        }
        String encoding = data.isr.getEncoding();
        data.encodingType = EncodingType.guessEncodingType(encoding);
        // /////////////////////////////////////////////////////////////////////////////
        // Read the first lines...
        /*
       * Keep track of the status of the file: are there any lines left to read?
       */
        data.doneReading = false;
        /*
       * OK, read a number of lines in the buffer: The header rows The nr rows in the page : optional The footer rows
       */
        int bufferSize = 1;
        bufferSize += meta.hasHeader() ? meta.getNrHeaderLines() : 0;
        bufferSize += meta.isLayoutPaged() ? meta.getNrLinesPerPage() * (Math.max(0, meta.getNrWraps()) + 1) : // it helps when we have wrapped input w/o header
        Math.max(0, meta.getNrWraps());
        bufferSize += meta.hasFooter() ? meta.getNrFooterLines() : 0;
        // See if we need to skip the document header lines...
        if (meta.isLayoutPaged()) {
            for (int i = 0; i < meta.getNrLinesDocHeader(); i++) {
                // Just skip these...
                // header and
                getLine(log, data.isr, data.encodingType, data.fileFormatType, data.lineStringBuilder);
                // footer: not
                // wrapped
                lineNumberInFile++;
            }
        }
        for (int i = 0; i < bufferSize && !data.doneReading; i++) {
            boolean wasNotFiltered = tryToReadLine(!meta.hasHeader() || i >= meta.getNrHeaderLines());
            if (!wasNotFiltered) {
                // grab another line, this one got filtered
                bufferSize++;
            }
        }
        // Reset counters etc.
        data.headerLinesRead = 0;
        data.footerLinesRead = 0;
        data.pageLinesRead = 0;
        // Set a flags
        data.doneWithHeader = !meta.hasHeader();
    } catch (Exception e) {
        String errorMsg = "Couldn't open file #" + data.filenr + " : " + data.file.getName().getFriendlyURI() + " --> " + e.toString();
        logError(errorMsg);
        if (failAfterBadFile(errorMsg)) {
            // !meta.isSkipBadFiles()) stopAll();
            stopAll();
        }
        setErrors(getErrors() + 1);
        return false;
    }
    return true;
}
Also used : CompressionProvider(org.pentaho.di.core.compress.CompressionProvider) InputStreamReader(java.io.InputStreamReader) BufferedInputStream(java.io.BufferedInputStream) ValueMetaString(org.pentaho.di.core.row.value.ValueMetaString) ResultFile(org.pentaho.di.core.ResultFile) Date(java.util.Date) KettleException(org.pentaho.di.core.exception.KettleException) KettleFileException(org.pentaho.di.core.exception.KettleFileException)

Example 3 with CompressionProvider

use of org.pentaho.di.core.compress.CompressionProvider in project pentaho-kettle by pentaho.

the class TextFileOutput method getCompressionProvider.

private CompressionProvider getCompressionProvider() throws KettleException {
    String compressionType = Const.NVL(meta.getFileCompression(), FILE_COMPRESSION_TYPE_NONE);
    CompressionProvider compressionProvider = CompressionProviderFactory.getInstance().getCompressionProviderByName(compressionType);
    if (compressionProvider == null) {
        throw new KettleException("No compression provider found with name = " + compressionType);
    }
    if (!compressionProvider.supportsOutput()) {
        throw new KettleException("Compression provider " + compressionType + " does not support output streams!");
    }
    return compressionProvider;
}
Also used : CompressionProvider(org.pentaho.di.core.compress.CompressionProvider) ZIPCompressionProvider(org.pentaho.di.core.compress.zip.ZIPCompressionProvider) KettleException(org.pentaho.di.core.exception.KettleException)

Example 4 with CompressionProvider

use of org.pentaho.di.core.compress.CompressionProvider in project pentaho-kettle by pentaho.

the class ZIPCompressionOutputStreamTest method setUp.

@Before
public void setUp() throws Exception {
    factory = CompressionProviderFactory.getInstance();
    CompressionProvider provider = factory.getCompressionProviderByName(PROVIDER_NAME);
    internalStream = new ByteArrayOutputStream();
    outStream = new ZIPCompressionOutputStream(internalStream, provider);
}
Also used : CompressionProvider(org.pentaho.di.core.compress.CompressionProvider) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Before(org.junit.Before)

Example 5 with CompressionProvider

use of org.pentaho.di.core.compress.CompressionProvider in project pentaho-kettle by pentaho.

the class ZIPCompressionInputStreamTest method testRead.

@Test
public void testRead() throws IOException {
    CompressionProvider provider = inStream.getCompressionProvider();
    ByteArrayInputStream in = new ByteArrayInputStream("Test".getBytes());
    inStream = new ZIPCompressionInputStream(in, provider) {
    };
    inStream.read(new byte[100], 0, inStream.available());
}
Also used : CompressionProvider(org.pentaho.di.core.compress.CompressionProvider) ByteArrayInputStream(java.io.ByteArrayInputStream) Test(org.junit.Test)

Aggregations

CompressionProvider (org.pentaho.di.core.compress.CompressionProvider)27 Test (org.junit.Test)14 ByteArrayOutputStream (java.io.ByteArrayOutputStream)11 KettleException (org.pentaho.di.core.exception.KettleException)8 ValueMetaString (org.pentaho.di.core.row.value.ValueMetaString)6 IOException (java.io.IOException)5 InputStream (java.io.InputStream)5 InputStreamReader (java.io.InputStreamReader)5 FileObject (org.apache.commons.vfs2.FileObject)5 CompressionInputStream (org.pentaho.di.core.compress.CompressionInputStream)5 FileInputList (org.pentaho.di.core.fileinput.FileInputList)5 Before (org.junit.Before)4 ArrayList (java.util.ArrayList)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 MessageBox (org.eclipse.swt.widgets.MessageBox)2 Table (org.eclipse.swt.widgets.Table)2 TableItem (org.eclipse.swt.widgets.TableItem)2 EncodingType (org.pentaho.di.trans.steps.fileinput.text.EncodingType)2 TextFileInputMeta (org.pentaho.di.trans.steps.fileinput.text.TextFileInputMeta)2 BufferedInputStream (java.io.BufferedInputStream)1