Search in sources :

Example 1 with TextFileInputMeta

use of org.pentaho.di.trans.steps.fileinput.text.TextFileInputMeta in project pentaho-kettle by pentaho.

the class TextFileCSVImportProgressDialog method doScan.

private String doScan(IProgressMonitor monitor) throws KettleException {
    if (samples > 0) {
        monitor.beginTask(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Task.ScanningFile"), samples + 1);
    } else {
        monitor.beginTask(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Task.ScanningFile"), 2);
    }
    String line = "";
    long fileLineNumber = 0;
    DecimalFormatSymbols dfs = new DecimalFormatSymbols();
    int nrfields = meta.inputFields.length;
    RowMetaInterface outputRowMeta = new RowMeta();
    meta.getFields(outputRowMeta, null, null, null, transMeta, null, null);
    // Remove the storage meta-data (don't go for lazy conversion during scan)
    for (ValueMetaInterface valueMeta : outputRowMeta.getValueMetaList()) {
        valueMeta.setStorageMetadata(null);
        valueMeta.setStorageType(ValueMetaInterface.STORAGE_TYPE_NORMAL);
    }
    RowMetaInterface convertRowMeta = outputRowMeta.cloneToType(ValueMetaInterface.TYPE_STRING);
    // How many null values?
    // How many times null value?
    int[] nrnull = new int[nrfields];
    // String info
    // min string
    String[] minstr = new String[nrfields];
    // max string
    String[] maxstr = new String[nrfields];
    // first occ. of string?
    boolean[] firststr = new boolean[nrfields];
    // Date info
    // is the field perhaps a Date?
    boolean[] isDate = new boolean[nrfields];
    // How many date formats work?
    int[] dateFormatCount = new int[nrfields];
    // What are the date formats that
    boolean[][] dateFormat = new boolean[nrfields][Const.getDateFormats().length];
    // work?
    // min date value
    Date[][] minDate = new Date[nrfields][Const.getDateFormats().length];
    // max date value
    Date[][] maxDate = new Date[nrfields][Const.getDateFormats().length];
    // Number info
    // is the field perhaps a Number?
    boolean[] isNumber = new boolean[nrfields];
    // How many number formats work?
    int[] numberFormatCount = new int[nrfields];
    // What are the number format
    boolean[][] numberFormat = new boolean[nrfields][Const.getNumberFormats().length];
    // that work?
    // min number value
    double[][] minValue = new double[nrfields][Const.getDateFormats().length];
    // max number value
    double[][] maxValue = new double[nrfields][Const.getDateFormats().length];
    // remember the precision?
    int[][] numberPrecision = new int[nrfields][Const.getNumberFormats().length];
    // remember the length?
    int[][] numberLength = new int[nrfields][Const.getNumberFormats().length];
    for (int i = 0; i < nrfields; i++) {
        BaseFileField field = meta.inputFields[i];
        if (log.isDebug()) {
            debug = "init field #" + i;
        }
        if (replaceMeta) {
            // Clear previous info...
            field.setName(meta.inputFields[i].getName());
            field.setType(meta.inputFields[i].getType());
            field.setFormat("");
            field.setLength(-1);
            field.setPrecision(-1);
            field.setCurrencySymbol(dfs.getCurrencySymbol());
            field.setDecimalSymbol("" + dfs.getDecimalSeparator());
            field.setGroupSymbol("" + dfs.getGroupingSeparator());
            field.setNullString("-");
            field.setTrimType(ValueMetaInterface.TRIM_TYPE_NONE);
        }
        nrnull[i] = 0;
        minstr[i] = "";
        maxstr[i] = "";
        firststr[i] = true;
        // Init data guess
        isDate[i] = true;
        for (int j = 0; j < Const.getDateFormats().length; j++) {
            dateFormat[i][j] = true;
            minDate[i][j] = Const.MAX_DATE;
            maxDate[i][j] = Const.MIN_DATE;
        }
        dateFormatCount[i] = Const.getDateFormats().length;
        // Init number guess
        isNumber[i] = true;
        for (int j = 0; j < Const.getNumberFormats().length; j++) {
            numberFormat[i][j] = true;
            minValue[i][j] = Double.MAX_VALUE;
            maxValue[i][j] = -Double.MAX_VALUE;
            numberPrecision[i][j] = -1;
            numberLength[i][j] = -1;
        }
        numberFormatCount[i] = Const.getNumberFormats().length;
    }
    TextFileInputMeta strinfo = (TextFileInputMeta) meta.clone();
    for (int i = 0; i < nrfields; i++) {
        strinfo.inputFields[i].setType(ValueMetaInterface.TYPE_STRING);
    }
    // Sample <samples> rows...
    debug = "get first line";
    StringBuilder lineBuffer = new StringBuilder(256);
    int fileFormatType = meta.getFileFormatTypeNr();
    // If the file has a header we overwrite the first line
    // However, if it doesn't have a header, take a new line
    // 
    line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineBuffer);
    fileLineNumber++;
    int skipped = 1;
    if (meta.content.header) {
        while (line != null && skipped < meta.content.nrHeaderLines) {
            line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineBuffer);
            skipped++;
            fileLineNumber++;
        }
    }
    int linenr = 1;
    List<StringEvaluator> evaluators = new ArrayList<StringEvaluator>();
    // Allocate number and date parsers
    DecimalFormat df2 = (DecimalFormat) NumberFormat.getInstance();
    DecimalFormatSymbols dfs2 = new DecimalFormatSymbols();
    SimpleDateFormat daf2 = new SimpleDateFormat();
    boolean errorFound = false;
    while (!errorFound && line != null && (linenr <= samples || samples == 0) && !monitor.isCanceled()) {
        monitor.subTask(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Task.ScanningLine", "" + linenr));
        if (samples > 0) {
            monitor.worked(1);
        }
        if (log.isDebug()) {
            debug = "convert line #" + linenr + " to row";
        }
        RowMetaInterface rowMeta = new RowMeta();
        meta.getFields(rowMeta, "stepname", null, null, transMeta, null, null);
        // Remove the storage meta-data (don't go for lazy conversion during scan)
        for (ValueMetaInterface valueMeta : rowMeta.getValueMetaList()) {
            valueMeta.setStorageMetadata(null);
            valueMeta.setStorageType(ValueMetaInterface.STORAGE_TYPE_NORMAL);
        }
        String delimiter = transMeta.environmentSubstitute(meta.content.separator);
        String enclosure = transMeta.environmentSubstitute(meta.content.enclosure);
        String escapeCharacter = transMeta.environmentSubstitute(meta.content.escapeCharacter);
        Object[] r = TextFileInputUtils.convertLineToRow(log, new TextFileLine(line, fileLineNumber, null), strinfo, null, 0, outputRowMeta, convertRowMeta, FileInputList.createFilePathList(transMeta, meta.inputFiles.fileName, meta.inputFiles.fileMask, meta.inputFiles.excludeFileMask, meta.inputFiles.fileRequired, meta.inputFiles.includeSubFolderBoolean())[0], rownumber, delimiter, enclosure, escapeCharacter, null, new BaseFileInputAdditionalField(), null, null, false, null, null, null, null, null);
        if (r == null) {
            errorFound = true;
            continue;
        }
        rownumber++;
        for (int i = 0; i < nrfields && i < r.length; i++) {
            StringEvaluator evaluator;
            if (i >= evaluators.size()) {
                evaluator = new StringEvaluator(true);
                evaluators.add(evaluator);
            } else {
                evaluator = evaluators.get(i);
            }
            String string = rowMeta.getString(r, i);
            if (i == 0) {
                System.out.println();
            }
            evaluator.evaluateString(string);
        }
        fileLineNumber++;
        if (r != null) {
            linenr++;
        }
        // Grab another line...
        // 
        line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineBuffer);
    }
    monitor.worked(1);
    monitor.setTaskName(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Task.AnalyzingResults"));
    // Show information on items using a dialog box
    // 
    StringBuilder message = new StringBuilder();
    message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.ResultAfterScanning", "" + (linenr - 1)));
    message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.HorizontalLine"));
    for (int i = 0; i < nrfields; i++) {
        BaseFileField field = meta.inputFields[i];
        StringEvaluator evaluator = evaluators.get(i);
        List<StringEvaluationResult> evaluationResults = evaluator.getStringEvaluationResults();
        // 
        if (evaluationResults.isEmpty()) {
            field.setType(ValueMetaInterface.TYPE_STRING);
            field.setLength(evaluator.getMaxLength());
        } else {
            StringEvaluationResult result = evaluator.getAdvicedResult();
            if (result != null) {
                // Take the first option we find, list the others below...
                // 
                ValueMetaInterface conversionMeta = result.getConversionMeta();
                field.setType(conversionMeta.getType());
                field.setTrimType(conversionMeta.getTrimType());
                field.setFormat(conversionMeta.getConversionMask());
                field.setDecimalSymbol(conversionMeta.getDecimalSymbol());
                field.setGroupSymbol(conversionMeta.getGroupingSymbol());
                field.setLength(conversionMeta.getLength());
                field.setPrecision(conversionMeta.getPrecision());
                nrnull[i] = result.getNrNull();
                minstr[i] = result.getMin() == null ? "" : result.getMin().toString();
                maxstr[i] = result.getMax() == null ? "" : result.getMax().toString();
            }
        }
        message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.FieldNumber", "" + (i + 1)));
        message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.FieldName", field.getName()));
        message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.FieldType", field.getTypeDesc()));
        switch(field.getType()) {
            case ValueMetaInterface.TYPE_NUMBER:
                message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.EstimatedLength", (field.getLength() < 0 ? "-" : "" + field.getLength())));
                message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.EstimatedPrecision", field.getPrecision() < 0 ? "-" : "" + field.getPrecision()));
                message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.NumberFormat", field.getFormat()));
                if (!evaluationResults.isEmpty()) {
                    if (evaluationResults.size() > 1) {
                        message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.WarnNumberFormat"));
                    }
                    for (StringEvaluationResult seResult : evaluationResults) {
                        String mask = seResult.getConversionMeta().getConversionMask();
                        message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.NumberFormat2", mask));
                        message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.TrimType", seResult.getConversionMeta().getTrimType()));
                        message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.NumberMinValue", seResult.getMin()));
                        message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.NumberMaxValue", seResult.getMax()));
                        try {
                            df2.applyPattern(mask);
                            df2.setDecimalFormatSymbols(dfs2);
                            double mn = df2.parse(seResult.getMin().toString()).doubleValue();
                            message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.NumberExample", mask, seResult.getMin(), Double.toString(mn)));
                        } catch (Exception e) {
                            if (log.isDetailed()) {
                                log.logDetailed("This is unexpected: parsing [" + seResult.getMin() + "] with format [" + mask + "] did not work.");
                            }
                        }
                    }
                }
                message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.NumberNrNullValues", "" + nrnull[i]));
                break;
            case ValueMetaInterface.TYPE_STRING:
                message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.StringMaxLength", "" + field.getLength()));
                message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.StringMinValue", minstr[i]));
                message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.StringMaxValue", maxstr[i]));
                message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.StringNrNullValues", "" + nrnull[i]));
                break;
            case ValueMetaInterface.TYPE_DATE:
                message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateMaxLength", field.getLength() < 0 ? "-" : "" + field.getLength()));
                message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateFormat", field.getFormat()));
                if (dateFormatCount[i] > 1) {
                    message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.WarnDateFormat"));
                }
                if (!Utils.isEmpty(minstr[i])) {
                    for (int x = 0; x < Const.getDateFormats().length; x++) {
                        if (dateFormat[i][x]) {
                            message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateFormat2", Const.getDateFormats()[x]));
                            Date mindate = minDate[i][x];
                            Date maxdate = maxDate[i][x];
                            message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateMinValue", mindate.toString()));
                            message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateMaxValue", maxdate.toString()));
                            daf2.applyPattern(Const.getDateFormats()[x]);
                            try {
                                Date md = daf2.parse(minstr[i]);
                                message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateExample", Const.getDateFormats()[x], minstr[i], md.toString()));
                            } catch (Exception e) {
                                if (log.isDetailed()) {
                                    log.logDetailed("This is unexpected: parsing [" + minstr[i] + "] with format [" + Const.getDateFormats()[x] + "] did not work.");
                                }
                            }
                        }
                    }
                }
                message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.DateNrNullValues", "" + nrnull[i]));
                break;
            default:
                break;
        }
        if (nrnull[i] == linenr - 1) {
            message.append(BaseMessages.getString(PKG, "TextFileCSVImportProgressDialog.Info.AllNullValues"));
        }
        message.append(Const.CR);
    }
    monitor.worked(1);
    monitor.done();
    return message.toString();
}
Also used : RowMeta(org.pentaho.di.core.row.RowMeta) DecimalFormat(java.text.DecimalFormat) BaseFileField(org.pentaho.di.trans.steps.file.BaseFileField) ArrayList(java.util.ArrayList) RowMetaInterface(org.pentaho.di.core.row.RowMetaInterface) TextFileLine(org.pentaho.di.trans.steps.fileinput.text.TextFileLine) StringEvaluationResult(org.pentaho.di.core.util.StringEvaluationResult) DecimalFormatSymbols(java.text.DecimalFormatSymbols) Date(java.util.Date) KettleException(org.pentaho.di.core.exception.KettleException) InvocationTargetException(java.lang.reflect.InvocationTargetException) ValueMetaInterface(org.pentaho.di.core.row.ValueMetaInterface) StringEvaluator(org.pentaho.di.core.util.StringEvaluator) TextFileInputMeta(org.pentaho.di.trans.steps.fileinput.text.TextFileInputMeta) BaseFileInputAdditionalField(org.pentaho.di.trans.steps.file.BaseFileInputAdditionalField) SimpleDateFormat(java.text.SimpleDateFormat)

Example 2 with TextFileInputMeta

use of org.pentaho.di.trans.steps.fileinput.text.TextFileInputMeta in project pentaho-kettle by pentaho.

the class TextFileInputDialog method getFirst.

// Get the first x lines
private List<String> getFirst(int nrlines, boolean skipHeaders) throws KettleException {
    TextFileInputMeta meta = new TextFileInputMeta();
    getInfo(meta, true);
    FileInputList textFileList = meta.getFileInputList(transMeta);
    InputStream fi;
    CompressionInputStream f = null;
    StringBuilder lineStringBuilder = new StringBuilder(256);
    int fileFormatType = meta.getFileFormatTypeNr();
    List<String> retval = new ArrayList<>();
    if (textFileList.nrOfFiles() > 0) {
        FileObject file = textFileList.getFile(0);
        try {
            fi = KettleVFS.getInputStream(file);
            CompressionProvider provider = CompressionProviderFactory.getInstance().createCompressionProviderInstance(meta.content.fileCompression);
            f = provider.createInputStream(fi);
            InputStreamReader reader;
            if (meta.getEncoding() != null && meta.getEncoding().length() > 0) {
                reader = new InputStreamReader(f, meta.getEncoding());
            } else {
                reader = new InputStreamReader(f);
            }
            EncodingType encodingType = EncodingType.guessEncodingType(reader.getEncoding());
            int linenr = 0;
            int maxnr = nrlines + (meta.content.header ? meta.content.nrHeaderLines : 0);
            if (skipHeaders) {
                // Skip the header lines first if more then one, it helps us position
                if (meta.content.layoutPaged && meta.content.nrLinesDocHeader > 0) {
                    int skipped = 0;
                    String line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineStringBuilder);
                    while (line != null && skipped < meta.content.nrLinesDocHeader - 1) {
                        skipped++;
                        line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineStringBuilder);
                    }
                }
                // Skip the header lines first if more then one, it helps us position
                if (meta.content.header && meta.content.nrHeaderLines > 0) {
                    int skipped = 0;
                    String line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineStringBuilder);
                    while (line != null && skipped < meta.content.nrHeaderLines - 1) {
                        skipped++;
                        line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineStringBuilder);
                    }
                }
            }
            String line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineStringBuilder);
            while (line != null && (linenr < maxnr || nrlines == 0)) {
                retval.add(line);
                linenr++;
                line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineStringBuilder);
            }
        } catch (Exception e) {
            throw new KettleException(BaseMessages.getString(PKG, "TextFileInputDialog.Exception.ErrorGettingFirstLines", "" + nrlines, file.getName().getURI()), e);
        } finally {
            try {
                if (f != null) {
                    f.close();
                }
            } catch (Exception e) {
            // Ignore errors
            }
        }
    }
    return retval;
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) InputStreamReader(java.io.InputStreamReader) CompressionInputStream(org.pentaho.di.core.compress.CompressionInputStream) CompressionInputStream(org.pentaho.di.core.compress.CompressionInputStream) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) EncodingType(org.pentaho.di.trans.steps.fileinput.text.EncodingType) ValueMetaString(org.pentaho.di.core.row.value.ValueMetaString) KettleException(org.pentaho.di.core.exception.KettleException) IOException(java.io.IOException) CompressionProvider(org.pentaho.di.core.compress.CompressionProvider) TextFileInputMeta(org.pentaho.di.trans.steps.fileinput.text.TextFileInputMeta) FileObject(org.apache.commons.vfs2.FileObject) FileInputList(org.pentaho.di.core.fileinput.FileInputList)

Example 3 with TextFileInputMeta

use of org.pentaho.di.trans.steps.fileinput.text.TextFileInputMeta in project pentaho-kettle by pentaho.

the class TextFileInputDialog method preview.

// Preview the data
private void preview() {
    // Create the XML input step
    TextFileInputMeta oneMeta = new TextFileInputMeta();
    getInfo(oneMeta, true);
    if (oneMeta.inputFiles.acceptingFilenames) {
        MessageBox mb = new MessageBox(shell, SWT.OK | SWT.ICON_INFORMATION);
        mb.setMessage(BaseMessages.getString(PKG, "TextFileInputDialog.Dialog.SpecifyASampleFile.Message"));
        mb.setText(BaseMessages.getString(PKG, "TextFileInputDialog.Dialog.SpecifyASampleFile.Title"));
        mb.open();
        return;
    }
    TransMeta previewMeta = TransPreviewFactory.generatePreviewTransformation(transMeta, oneMeta, wStepname.getText());
    EnterNumberDialog numberDialog = new EnterNumberDialog(shell, props.getDefaultPreviewSize(), BaseMessages.getString(PKG, "TextFileInputDialog.PreviewSize.DialogTitle"), BaseMessages.getString(PKG, "TextFileInputDialog.PreviewSize.DialogMessage"));
    int previewSize = numberDialog.open();
    if (previewSize > 0) {
        TransPreviewProgressDialog progressDialog = new TransPreviewProgressDialog(shell, previewMeta, new String[] { wStepname.getText() }, new int[] { previewSize });
        progressDialog.open();
        Trans trans = progressDialog.getTrans();
        String loggingText = progressDialog.getLoggingText();
        if (!progressDialog.isCancelled()) {
            if (trans.getResult() != null && trans.getResult().getNrErrors() > 0) {
                EnterTextDialog etd = new EnterTextDialog(shell, BaseMessages.getString(PKG, "System.Dialog.PreviewError.Title"), BaseMessages.getString(PKG, "System.Dialog.PreviewError.Message"), loggingText, true);
                etd.setReadOnly();
                etd.open();
            }
        }
        PreviewRowsDialog prd = new PreviewRowsDialog(shell, transMeta, SWT.NONE, wStepname.getText(), progressDialog.getPreviewRowsMeta(wStepname.getText()), progressDialog.getPreviewRows(wStepname.getText()), loggingText);
        prd.open();
    }
}
Also used : TextFileInputMeta(org.pentaho.di.trans.steps.fileinput.text.TextFileInputMeta) TransPreviewProgressDialog(org.pentaho.di.ui.trans.dialog.TransPreviewProgressDialog) TransMeta(org.pentaho.di.trans.TransMeta) EnterTextDialog(org.pentaho.di.ui.core.dialog.EnterTextDialog) ValueMetaString(org.pentaho.di.core.row.value.ValueMetaString) PreviewRowsDialog(org.pentaho.di.ui.core.dialog.PreviewRowsDialog) EnterNumberDialog(org.pentaho.di.ui.core.dialog.EnterNumberDialog) Trans(org.pentaho.di.trans.Trans) MessageBox(org.eclipse.swt.widgets.MessageBox)

Example 4 with TextFileInputMeta

use of org.pentaho.di.trans.steps.fileinput.text.TextFileInputMeta in project pentaho-kettle by pentaho.

the class TextFileInputDialog method getCSV.

// Get the data layout
private void getCSV() {
    TextFileInputMeta meta = new TextFileInputMeta();
    getInfo(meta, true);
    // CSV without separator defined
    if (meta.content.fileType.equalsIgnoreCase("CSV") && (meta.content.separator == null || meta.content.separator.isEmpty())) {
        MessageBox mb = new MessageBox(shell, SWT.OK | SWT.ICON_ERROR);
        mb.setMessage(BaseMessages.getString(PKG, "TextFileInput.Exception.NoSeparator"));
        mb.setText(BaseMessages.getString(PKG, "TextFileInputDialog.DialogTitle"));
        mb.open();
        return;
    }
    TextFileInputMeta previousMeta = (TextFileInputMeta) meta.clone();
    FileInputList textFileList = meta.getFileInputList(transMeta);
    InputStream fileInputStream;
    CompressionInputStream inputStream = null;
    StringBuilder lineStringBuilder = new StringBuilder(256);
    int fileFormatType = meta.getFileFormatTypeNr();
    String delimiter = transMeta.environmentSubstitute(meta.content.separator);
    String enclosure = transMeta.environmentSubstitute(meta.content.enclosure);
    String escapeCharacter = transMeta.environmentSubstitute(meta.content.escapeCharacter);
    if (textFileList.nrOfFiles() > 0) {
        int clearFields = meta.content.header ? SWT.YES : SWT.NO;
        int nrInputFields = meta.inputFields.length;
        if (nrInputFields > 0) {
            MessageBox mb = new MessageBox(shell, SWT.YES | SWT.NO | SWT.CANCEL | SWT.ICON_QUESTION);
            mb.setMessage(BaseMessages.getString(PKG, "TextFileInputDialog.ClearFieldList.DialogMessage"));
            mb.setText(BaseMessages.getString(PKG, "TextFileInputDialog.ClearFieldList.DialogTitle"));
            clearFields = mb.open();
            if (clearFields == SWT.CANCEL) {
                return;
            }
        }
        try {
            wFields.table.removeAll();
            FileObject fileObject = textFileList.getFile(0);
            fileInputStream = KettleVFS.getInputStream(fileObject);
            Table table = wFields.table;
            CompressionProvider provider = CompressionProviderFactory.getInstance().createCompressionProviderInstance(meta.content.fileCompression);
            inputStream = provider.createInputStream(fileInputStream);
            InputStreamReader reader;
            if (meta.getEncoding() != null && meta.getEncoding().length() > 0) {
                reader = new InputStreamReader(inputStream, meta.getEncoding());
            } else {
                reader = new InputStreamReader(inputStream);
            }
            EncodingType encodingType = EncodingType.guessEncodingType(reader.getEncoding());
            // Scan the header-line, determine fields...
            String line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineStringBuilder);
            if (line != null) {
                // Estimate the number of input fields...
                // Chop up the line using the delimiter
                String[] fields = TextFileInputUtils.guessStringsFromLine(transMeta, log, line, meta, delimiter, enclosure, escapeCharacter);
                for (int i = 0; i < fields.length; i++) {
                    String field = fields[i];
                    if (field == null || field.length() == 0 || !meta.content.header) {
                        field = "Field" + (i + 1);
                    } else {
                        // Trim the field
                        field = Const.trim(field);
                        // Replace all spaces & - with underscore _
                        field = Const.replace(field, " ", "_");
                        field = Const.replace(field, "-", "_");
                    }
                    TableItem item = new TableItem(table, SWT.NONE);
                    item.setText(1, field);
                    // The default type is String...
                    item.setText(2, "String");
                }
                wFields.setRowNums();
                wFields.optWidth(true);
                // Copy it...
                getInfo(meta, true);
                // Sample a few lines to determine the correct type of the fields...
                String shellText = BaseMessages.getString(PKG, "TextFileInputDialog.LinesToSample.DialogTitle");
                String lineText = BaseMessages.getString(PKG, "TextFileInputDialog.LinesToSample.DialogMessage");
                EnterNumberDialog end = new EnterNumberDialog(shell, 100, shellText, lineText);
                int samples = end.open();
                if (samples >= 0) {
                    getInfo(meta, true);
                    TextFileCSVImportProgressDialog pd = new TextFileCSVImportProgressDialog(shell, meta, transMeta, reader, samples, clearFields == SWT.YES);
                    String message = pd.open();
                    if (message != null) {
                        wFields.removeAll();
                        // OK, what's the result of our search?
                        getData(meta);
                        // 
                        if (clearFields == SWT.NO) {
                            getFieldsData(previousMeta, true);
                            wFields.table.setSelection(previousMeta.inputFields.length, wFields.table.getItemCount() - 1);
                        }
                        wFields.removeEmptyRows();
                        wFields.setRowNums();
                        wFields.optWidth(true);
                        EnterTextDialog etd = new EnterTextDialog(shell, BaseMessages.getString(PKG, "TextFileInputDialog.ScanResults.DialogTitle"), BaseMessages.getString(PKG, "TextFileInputDialog.ScanResults.DialogMessage"), message, true);
                        etd.setReadOnly();
                        etd.open();
                    }
                }
            } else {
                MessageBox mb = new MessageBox(shell, SWT.OK | SWT.ICON_ERROR);
                mb.setMessage(BaseMessages.getString(PKG, "TextFileInputDialog.UnableToReadHeaderLine.DialogMessage"));
                mb.setText(BaseMessages.getString(PKG, "System.Dialog.Error.Title"));
                mb.open();
            }
        } catch (IOException e) {
            new ErrorDialog(shell, BaseMessages.getString(PKG, "TextFileInputDialog.IOError.DialogTitle"), BaseMessages.getString(PKG, "TextFileInputDialog.IOError.DialogMessage"), e);
        } catch (KettleException e) {
            new ErrorDialog(shell, BaseMessages.getString(PKG, "System.Dialog.Error.Title"), BaseMessages.getString(PKG, "TextFileInputDialog.ErrorGettingFileDesc.DialogMessage"), e);
        } finally {
            try {
                if (inputStream != null) {
                    inputStream.close();
                }
            } catch (Exception e) {
            // Ignore errors
            }
        }
    } else {
        MessageBox mb = new MessageBox(shell, SWT.OK | SWT.ICON_ERROR);
        mb.setMessage(BaseMessages.getString(PKG, "TextFileInputDialog.NoValidFileFound.DialogMessage"));
        mb.setText(BaseMessages.getString(PKG, "System.Dialog.Error.Title"));
        mb.open();
    }
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) Table(org.eclipse.swt.widgets.Table) InputStreamReader(java.io.InputStreamReader) CompressionInputStream(org.pentaho.di.core.compress.CompressionInputStream) CompressionInputStream(org.pentaho.di.core.compress.CompressionInputStream) InputStream(java.io.InputStream) TableItem(org.eclipse.swt.widgets.TableItem) EncodingType(org.pentaho.di.trans.steps.fileinput.text.EncodingType) ErrorDialog(org.pentaho.di.ui.core.dialog.ErrorDialog) ValueMetaString(org.pentaho.di.core.row.value.ValueMetaString) IOException(java.io.IOException) KettleException(org.pentaho.di.core.exception.KettleException) IOException(java.io.IOException) MessageBox(org.eclipse.swt.widgets.MessageBox) CompressionProvider(org.pentaho.di.core.compress.CompressionProvider) TextFileInputMeta(org.pentaho.di.trans.steps.fileinput.text.TextFileInputMeta) EnterTextDialog(org.pentaho.di.ui.core.dialog.EnterTextDialog) FileObject(org.apache.commons.vfs2.FileObject) EnterNumberDialog(org.pentaho.di.ui.core.dialog.EnterNumberDialog) FileInputList(org.pentaho.di.core.fileinput.FileInputList)

Example 5 with TextFileInputMeta

use of org.pentaho.di.trans.steps.fileinput.text.TextFileInputMeta in project pentaho-metaverse by pentaho.

the class TextFileInputExternalResourceConsumer method getResourcesFromRow.

@Override
public Collection<IExternalResourceInfo> getResourcesFromRow(TextFileInput textFileInput, RowMetaInterface rowMeta, Object[] row) {
    Collection<IExternalResourceInfo> resources = new LinkedList<>();
    // For some reason the step doesn't return the StepMetaInterface directly, so go around it
    TextFileInputMeta meta = (TextFileInputMeta) textFileInput.getStepMetaInterface();
    if (meta == null) {
        meta = (TextFileInputMeta) textFileInput.getStepMeta().getStepMetaInterface();
    }
    try {
        String filename = meta == null ? null : rowMeta.getString(row, meta.getAcceptingField(), null);
        if (!Const.isEmpty(filename)) {
            FileObject fileObject = KettleVFS.getFileObject(filename);
            resources.add(ExternalResourceInfoFactory.createFileResource(fileObject, true));
        }
    } catch (KettleException kve) {
    // TODO throw exception or ignore?
    }
    return resources;
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) IExternalResourceInfo(org.pentaho.metaverse.api.model.IExternalResourceInfo) TextFileInputMeta(org.pentaho.di.trans.steps.fileinput.text.TextFileInputMeta) FileObject(org.apache.commons.vfs2.FileObject) LinkedList(java.util.LinkedList)

Aggregations

TextFileInputMeta (org.pentaho.di.trans.steps.fileinput.text.TextFileInputMeta)9 ValueMetaString (org.pentaho.di.core.row.value.ValueMetaString)7 KettleException (org.pentaho.di.core.exception.KettleException)6 MessageBox (org.eclipse.swt.widgets.MessageBox)4 IOException (java.io.IOException)3 FileObject (org.apache.commons.vfs2.FileObject)3 FileInputList (org.pentaho.di.core.fileinput.FileInputList)3 BaseFileField (org.pentaho.di.trans.steps.file.BaseFileField)3 EnterNumberDialog (org.pentaho.di.ui.core.dialog.EnterNumberDialog)3 EnterTextDialog (org.pentaho.di.ui.core.dialog.EnterTextDialog)3 ErrorDialog (org.pentaho.di.ui.core.dialog.ErrorDialog)3 InputStream (java.io.InputStream)2 InputStreamReader (java.io.InputStreamReader)2 ArrayList (java.util.ArrayList)2 Shell (org.eclipse.swt.widgets.Shell)2 TableItem (org.eclipse.swt.widgets.TableItem)2 CompressionInputStream (org.pentaho.di.core.compress.CompressionInputStream)2 CompressionProvider (org.pentaho.di.core.compress.CompressionProvider)2 RowMeta (org.pentaho.di.core.row.RowMeta)2 TransMeta (org.pentaho.di.trans.TransMeta)2