Search in sources :

Example 6 with BaseFileField

use of org.pentaho.di.trans.steps.file.BaseFileField in project pentaho-kettle by pentaho.

the class TextFileInputMeta method getXML.

@Override
public String getXML() {
    StringBuilder retval = new StringBuilder(1500);
    retval.append("    ").append(XMLHandler.addTagValue("accept_filenames", inputFiles.acceptingFilenames));
    retval.append("    ").append(XMLHandler.addTagValue("passing_through_fields", inputFiles.passingThruFields));
    retval.append("    ").append(XMLHandler.addTagValue("accept_field", inputFiles.acceptingField));
    retval.append("    ").append(XMLHandler.addTagValue("accept_stepname", (acceptingStep != null ? acceptingStep.getName() : "")));
    retval.append("    ").append(XMLHandler.addTagValue("separator", content.separator));
    retval.append("    ").append(XMLHandler.addTagValue("enclosure", content.enclosure));
    retval.append("    ").append(XMLHandler.addTagValue("enclosure_breaks", content.breakInEnclosureAllowed));
    retval.append("    ").append(XMLHandler.addTagValue("escapechar", content.escapeCharacter));
    retval.append("    ").append(XMLHandler.addTagValue("header", content.header));
    retval.append("    ").append(XMLHandler.addTagValue("nr_headerlines", content.nrHeaderLines));
    retval.append("    ").append(XMLHandler.addTagValue("footer", content.footer));
    retval.append("    ").append(XMLHandler.addTagValue("nr_footerlines", content.nrFooterLines));
    retval.append("    ").append(XMLHandler.addTagValue("line_wrapped", content.lineWrapped));
    retval.append("    ").append(XMLHandler.addTagValue("nr_wraps", content.nrWraps));
    retval.append("    ").append(XMLHandler.addTagValue("layout_paged", content.layoutPaged));
    retval.append("    ").append(XMLHandler.addTagValue("nr_lines_per_page", content.nrLinesPerPage));
    retval.append("    ").append(XMLHandler.addTagValue("nr_lines_doc_header", content.nrLinesDocHeader));
    retval.append("    ").append(XMLHandler.addTagValue("noempty", content.noEmptyLines));
    retval.append("    ").append(XMLHandler.addTagValue("include", content.includeFilename));
    retval.append("    ").append(XMLHandler.addTagValue("include_field", content.filenameField));
    retval.append("    ").append(XMLHandler.addTagValue("rownum", content.includeRowNumber));
    retval.append("    ").append(XMLHandler.addTagValue("rownumByFile", content.rowNumberByFile));
    retval.append("    ").append(XMLHandler.addTagValue("rownum_field", content.rowNumberField));
    retval.append("    ").append(XMLHandler.addTagValue("format", content.fileFormat));
    retval.append("    ").append(XMLHandler.addTagValue("encoding", content.encoding));
    retval.append("    ").append(XMLHandler.addTagValue("length", content.length));
    retval.append("    " + XMLHandler.addTagValue("add_to_result_filenames", inputFiles.isaddresult));
    retval.append("    <file>").append(Const.CR);
    // we need the equals by size arrays for inputFiles.fileName[i], inputFiles.fileMask[i], inputFiles.fileRequired[i], inputFiles.includeSubFolders[i]
    // to prevent the ArrayIndexOutOfBoundsException
    inputFiles.normalizeAllocation(inputFiles.fileName.length);
    for (int i = 0; i < inputFiles.fileName.length; i++) {
        saveSource(retval, inputFiles.fileName[i]);
        parentStepMeta.getParentTransMeta().getNamedClusterEmbedManager().registerUrl(inputFiles.fileName[i]);
        retval.append("      ").append(XMLHandler.addTagValue("filemask", inputFiles.fileMask[i]));
        retval.append("      ").append(XMLHandler.addTagValue("exclude_filemask", inputFiles.excludeFileMask[i]));
        retval.append("      ").append(XMLHandler.addTagValue("file_required", inputFiles.fileRequired[i]));
        retval.append("      ").append(XMLHandler.addTagValue("include_subfolders", inputFiles.includeSubFolders[i]));
    }
    retval.append("      ").append(XMLHandler.addTagValue("type", content.fileType));
    retval.append("      ").append(XMLHandler.addTagValue("compression", (content.fileCompression == null) ? "None" : content.fileCompression));
    retval.append("    </file>").append(Const.CR);
    retval.append("    <filters>").append(Const.CR);
    for (int i = 0; i < filter.length; i++) {
        String filterString = filter[i].getFilterString();
        byte[] filterBytes = new byte[] {};
        String filterPrefix = "";
        if (filterString != null) {
            filterBytes = filterString.getBytes();
            filterPrefix = STRING_BASE64_PREFIX;
        }
        String filterEncoded = filterPrefix + new String(Base64.encodeBase64(filterBytes));
        retval.append("      <filter>").append(Const.CR);
        retval.append("        ").append(XMLHandler.addTagValue("filter_string", filterEncoded, false));
        retval.append("        ").append(XMLHandler.addTagValue("filter_position", filter[i].getFilterPosition(), false));
        retval.append("        ").append(XMLHandler.addTagValue("filter_is_last_line", filter[i].isFilterLastLine(), false));
        retval.append("        ").append(XMLHandler.addTagValue("filter_is_positive", filter[i].isFilterPositive(), false));
        retval.append("      </filter>").append(Const.CR);
    }
    retval.append("    </filters>").append(Const.CR);
    retval.append("    <fields>").append(Const.CR);
    for (int i = 0; i < inputFields.length; i++) {
        BaseFileField field = inputFields[i];
        retval.append("      <field>").append(Const.CR);
        retval.append("        ").append(XMLHandler.addTagValue("name", field.getName()));
        retval.append("        ").append(XMLHandler.addTagValue("type", field.getTypeDesc()));
        retval.append("        ").append(XMLHandler.addTagValue("format", field.getFormat()));
        retval.append("        ").append(XMLHandler.addTagValue("currency", field.getCurrencySymbol()));
        retval.append("        ").append(XMLHandler.addTagValue("decimal", field.getDecimalSymbol()));
        retval.append("        ").append(XMLHandler.addTagValue("group", field.getGroupSymbol()));
        retval.append("        ").append(XMLHandler.addTagValue("nullif", field.getNullString()));
        retval.append("        ").append(XMLHandler.addTagValue("ifnull", field.getIfNullValue()));
        retval.append("        ").append(XMLHandler.addTagValue("position", field.getPosition()));
        retval.append("        ").append(XMLHandler.addTagValue("length", field.getLength()));
        retval.append("        ").append(XMLHandler.addTagValue("precision", field.getPrecision()));
        retval.append("        ").append(XMLHandler.addTagValue("trim_type", field.getTrimTypeCode()));
        retval.append("        ").append(XMLHandler.addTagValue("repeat", field.isRepeated()));
        retval.append("      </field>").append(Const.CR);
    }
    retval.append("    </fields>").append(Const.CR);
    retval.append("    ").append(XMLHandler.addTagValue("limit", content.rowLimit));
    // ERROR HANDLING
    retval.append("    ").append(XMLHandler.addTagValue("error_ignored", errorHandling.errorIgnored));
    retval.append("    ").append(XMLHandler.addTagValue("skip_bad_files", errorHandling.skipBadFiles));
    retval.append("    ").append(XMLHandler.addTagValue("file_error_field", errorHandling.fileErrorField));
    retval.append("    ").append(XMLHandler.addTagValue("file_error_message_field", errorHandling.fileErrorMessageField));
    retval.append("    ").append(XMLHandler.addTagValue("error_line_skipped", errorLineSkipped));
    retval.append("    ").append(XMLHandler.addTagValue("error_count_field", errorCountField));
    retval.append("    ").append(XMLHandler.addTagValue("error_fields_field", errorFieldsField));
    retval.append("    ").append(XMLHandler.addTagValue("error_text_field", errorTextField));
    retval.append("    ").append(XMLHandler.addTagValue("bad_line_files_destination_directory", errorHandling.warningFilesDestinationDirectory));
    retval.append("    ").append(XMLHandler.addTagValue("bad_line_files_extension", errorHandling.warningFilesExtension));
    retval.append("    ").append(XMLHandler.addTagValue("error_line_files_destination_directory", errorHandling.errorFilesDestinationDirectory));
    retval.append("    ").append(XMLHandler.addTagValue("error_line_files_extension", errorHandling.errorFilesExtension));
    retval.append("    ").append(XMLHandler.addTagValue("line_number_files_destination_directory", errorHandling.lineNumberFilesDestinationDirectory));
    retval.append("    ").append(XMLHandler.addTagValue("line_number_files_extension", errorHandling.lineNumberFilesExtension));
    retval.append("    ").append(XMLHandler.addTagValue("date_format_lenient", content.dateFormatLenient));
    retval.append("    ").append(XMLHandler.addTagValue("date_format_locale", content.dateFormatLocale != null ? content.dateFormatLocale.toString() : null));
    retval.append("    ").append(XMLHandler.addTagValue("shortFileFieldName", additionalOutputFields.shortFilenameField));
    retval.append("    ").append(XMLHandler.addTagValue("pathFieldName", additionalOutputFields.pathField));
    retval.append("    ").append(XMLHandler.addTagValue("hiddenFieldName", additionalOutputFields.hiddenField));
    retval.append("    ").append(XMLHandler.addTagValue("lastModificationTimeFieldName", additionalOutputFields.lastModificationField));
    retval.append("    ").append(XMLHandler.addTagValue("uriNameFieldName", additionalOutputFields.uriField));
    retval.append("    ").append(XMLHandler.addTagValue("rootUriNameFieldName", additionalOutputFields.rootUriField));
    retval.append("    ").append(XMLHandler.addTagValue("extensionFieldName", additionalOutputFields.extensionField));
    retval.append("    ").append(XMLHandler.addTagValue("sizeFieldName", additionalOutputFields.sizeField));
    return retval.toString();
}
Also used : BaseFileField(org.pentaho.di.trans.steps.file.BaseFileField) ValueMetaString(org.pentaho.di.core.row.value.ValueMetaString)

Example 7 with BaseFileField

use of org.pentaho.di.trans.steps.file.BaseFileField in project pentaho-kettle by pentaho.

the class TextFileInputMeta method saveRep.

@Override
public void saveRep(Repository rep, IMetaStore metaStore, ObjectId id_transformation, ObjectId id_step) throws KettleException {
    try {
        rep.saveStepAttribute(id_transformation, id_step, "accept_filenames", inputFiles.acceptingFilenames);
        rep.saveStepAttribute(id_transformation, id_step, "passing_through_fields", inputFiles.passingThruFields);
        rep.saveStepAttribute(id_transformation, id_step, "accept_field", inputFiles.acceptingField);
        rep.saveStepAttribute(id_transformation, id_step, "accept_stepname", (acceptingStep != null ? acceptingStep.getName() : ""));
        rep.saveStepAttribute(id_transformation, id_step, "separator", content.separator);
        rep.saveStepAttribute(id_transformation, id_step, "enclosure", content.enclosure);
        rep.saveStepAttribute(id_transformation, id_step, "enclosure_breaks", content.breakInEnclosureAllowed);
        rep.saveStepAttribute(id_transformation, id_step, "escapechar", content.escapeCharacter);
        rep.saveStepAttribute(id_transformation, id_step, "header", content.header);
        rep.saveStepAttribute(id_transformation, id_step, "nr_headerlines", content.nrHeaderLines);
        rep.saveStepAttribute(id_transformation, id_step, "footer", content.footer);
        rep.saveStepAttribute(id_transformation, id_step, "nr_footerlines", content.nrFooterLines);
        rep.saveStepAttribute(id_transformation, id_step, "line_wrapped", content.lineWrapped);
        rep.saveStepAttribute(id_transformation, id_step, "nr_wraps", content.nrWraps);
        rep.saveStepAttribute(id_transformation, id_step, "layout_paged", content.layoutPaged);
        rep.saveStepAttribute(id_transformation, id_step, "nr_lines_per_page", content.nrLinesPerPage);
        rep.saveStepAttribute(id_transformation, id_step, "nr_lines_doc_header", content.nrLinesDocHeader);
        rep.saveStepAttribute(id_transformation, id_step, "noempty", content.noEmptyLines);
        rep.saveStepAttribute(id_transformation, id_step, "include", content.includeFilename);
        rep.saveStepAttribute(id_transformation, id_step, "include_field", content.filenameField);
        rep.saveStepAttribute(id_transformation, id_step, "rownum", content.includeRowNumber);
        rep.saveStepAttribute(id_transformation, id_step, "rownumByFile", content.rowNumberByFile);
        rep.saveStepAttribute(id_transformation, id_step, "rownum_field", content.rowNumberField);
        rep.saveStepAttribute(id_transformation, id_step, "format", content.fileFormat);
        rep.saveStepAttribute(id_transformation, id_step, "encoding", content.encoding);
        rep.saveStepAttribute(id_transformation, id_step, "length", content.length);
        rep.saveStepAttribute(id_transformation, id_step, "add_to_result_filenames", inputFiles.isaddresult);
        rep.saveStepAttribute(id_transformation, id_step, "limit", content.rowLimit);
        for (int i = 0; i < inputFiles.fileName.length; i++) {
            saveSourceRep(rep, id_transformation, id_step, i, inputFiles.fileName[i]);
            rep.saveStepAttribute(id_transformation, id_step, i, "file_mask", inputFiles.fileMask[i]);
            rep.saveStepAttribute(id_transformation, id_step, i, "exclude_file_mask", inputFiles.excludeFileMask[i]);
            rep.saveStepAttribute(id_transformation, id_step, i, "file_required", inputFiles.fileRequired[i]);
            rep.saveStepAttribute(id_transformation, id_step, i, "include_subfolders", inputFiles.includeSubFolders[i]);
        }
        rep.saveStepAttribute(id_transformation, id_step, "file_type", content.fileType);
        rep.saveStepAttribute(id_transformation, id_step, "compression", (content.fileCompression == null) ? "None" : content.fileCompression);
        for (int i = 0; i < filter.length; i++) {
            rep.saveStepAttribute(id_transformation, id_step, i, "filter_position", filter[i].getFilterPosition());
            rep.saveStepAttribute(id_transformation, id_step, i, "filter_string", filter[i].getFilterString());
            rep.saveStepAttribute(id_transformation, id_step, i, "filter_is_last_line", filter[i].isFilterLastLine());
            rep.saveStepAttribute(id_transformation, id_step, i, "filter_is_positive", filter[i].isFilterPositive());
        }
        for (int i = 0; i < inputFields.length; i++) {
            BaseFileField field = inputFields[i];
            rep.saveStepAttribute(id_transformation, id_step, i, "field_name", field.getName());
            rep.saveStepAttribute(id_transformation, id_step, i, "field_type", field.getTypeDesc());
            rep.saveStepAttribute(id_transformation, id_step, i, "field_format", field.getFormat());
            rep.saveStepAttribute(id_transformation, id_step, i, "field_currency", field.getCurrencySymbol());
            rep.saveStepAttribute(id_transformation, id_step, i, "field_decimal", field.getDecimalSymbol());
            rep.saveStepAttribute(id_transformation, id_step, i, "field_group", field.getGroupSymbol());
            rep.saveStepAttribute(id_transformation, id_step, i, "field_nullif", field.getNullString());
            rep.saveStepAttribute(id_transformation, id_step, i, "field_ifnull", field.getIfNullValue());
            rep.saveStepAttribute(id_transformation, id_step, i, "field_position", field.getPosition());
            rep.saveStepAttribute(id_transformation, id_step, i, "field_length", field.getLength());
            rep.saveStepAttribute(id_transformation, id_step, i, "field_precision", field.getPrecision());
            rep.saveStepAttribute(id_transformation, id_step, i, "field_trim_type", field.getTrimTypeCode());
            rep.saveStepAttribute(id_transformation, id_step, i, "field_repeat", field.isRepeated());
        }
        rep.saveStepAttribute(id_transformation, id_step, "error_ignored", errorHandling.errorIgnored);
        rep.saveStepAttribute(id_transformation, id_step, "skip_bad_files", errorHandling.skipBadFiles);
        rep.saveStepAttribute(id_transformation, id_step, "file_error_field", errorHandling.fileErrorField);
        rep.saveStepAttribute(id_transformation, id_step, "file_error_message_field", errorHandling.fileErrorMessageField);
        rep.saveStepAttribute(id_transformation, id_step, "error_line_skipped", errorLineSkipped);
        rep.saveStepAttribute(id_transformation, id_step, "error_count_field", errorCountField);
        rep.saveStepAttribute(id_transformation, id_step, "error_fields_field", errorFieldsField);
        rep.saveStepAttribute(id_transformation, id_step, "error_text_field", errorTextField);
        rep.saveStepAttribute(id_transformation, id_step, "bad_line_files_dest_dir", errorHandling.warningFilesDestinationDirectory);
        rep.saveStepAttribute(id_transformation, id_step, "bad_line_files_ext", errorHandling.warningFilesExtension);
        rep.saveStepAttribute(id_transformation, id_step, "error_line_files_dest_dir", errorHandling.errorFilesDestinationDirectory);
        rep.saveStepAttribute(id_transformation, id_step, "error_line_files_ext", errorHandling.errorFilesExtension);
        rep.saveStepAttribute(id_transformation, id_step, "line_number_files_dest_dir", errorHandling.lineNumberFilesDestinationDirectory);
        rep.saveStepAttribute(id_transformation, id_step, "line_number_files_ext", errorHandling.lineNumberFilesExtension);
        rep.saveStepAttribute(id_transformation, id_step, "date_format_lenient", content.dateFormatLenient);
        rep.saveStepAttribute(id_transformation, id_step, "date_format_locale", content.dateFormatLocale != null ? content.dateFormatLocale.toString() : null);
        rep.saveStepAttribute(id_transformation, id_step, "shortFileFieldName", additionalOutputFields.shortFilenameField);
        rep.saveStepAttribute(id_transformation, id_step, "pathFieldName", additionalOutputFields.pathField);
        rep.saveStepAttribute(id_transformation, id_step, "hiddenFieldName", additionalOutputFields.hiddenField);
        rep.saveStepAttribute(id_transformation, id_step, "lastModificationTimeFieldName", additionalOutputFields.lastModificationField);
        rep.saveStepAttribute(id_transformation, id_step, "uriNameFieldName", additionalOutputFields.uriField);
        rep.saveStepAttribute(id_transformation, id_step, "rootUriNameFieldName", additionalOutputFields.rootUriField);
        rep.saveStepAttribute(id_transformation, id_step, "extensionFieldName", additionalOutputFields.extensionField);
        rep.saveStepAttribute(id_transformation, id_step, "sizeFieldName", additionalOutputFields.sizeField);
    } catch (Exception e) {
        throw new KettleException("Unable to save step information to the repository for id_step=" + id_step, e);
    }
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) BaseFileField(org.pentaho.di.trans.steps.file.BaseFileField) KettleXMLException(org.pentaho.di.core.exception.KettleXMLException) KettleFileException(org.pentaho.di.core.exception.KettleFileException) KettleStepException(org.pentaho.di.core.exception.KettleStepException) KettleException(org.pentaho.di.core.exception.KettleException)

Example 8 with BaseFileField

use of org.pentaho.di.trans.steps.file.BaseFileField in project pentaho-kettle by pentaho.

the class TextFileInputMeta method getFields.

@Override
public void getFields(RowMetaInterface row, String name, RowMetaInterface[] info, StepMeta nextStep, VariableSpace space, Repository repository, IMetaStore metaStore) throws KettleStepException {
    if (!inputFiles.passingThruFields) {
        // all incoming fields are not transmitted !
        row.clear();
    } else {
        if (info != null) {
            boolean found = false;
            for (int i = 0; i < info.length && !found; i++) {
                if (info[i] != null) {
                    row.mergeRowMeta(info[i], name);
                    found = true;
                }
            }
        }
    }
    for (int i = 0; i < inputFields.length; i++) {
        BaseFileField field = inputFields[i];
        int type = field.getType();
        if (type == ValueMetaInterface.TYPE_NONE) {
            type = ValueMetaInterface.TYPE_STRING;
        }
        try {
            ValueMetaInterface v = ValueMetaFactory.createValueMeta(field.getName(), type);
            v.setLength(field.getLength());
            v.setPrecision(field.getPrecision());
            v.setOrigin(name);
            v.setConversionMask(field.getFormat());
            v.setDecimalSymbol(field.getDecimalSymbol());
            v.setGroupingSymbol(field.getGroupSymbol());
            v.setCurrencySymbol(field.getCurrencySymbol());
            v.setDateFormatLenient(content.dateFormatLenient);
            v.setDateFormatLocale(content.dateFormatLocale);
            v.setTrimType(field.getTrimType());
            row.addValueMeta(v);
        } catch (Exception e) {
            throw new KettleStepException(e);
        }
    }
    if (errorHandling.errorIgnored) {
        if (errorCountField != null && errorCountField.length() > 0) {
            ValueMetaInterface v = new ValueMetaInteger(errorCountField);
            v.setLength(ValueMetaInterface.DEFAULT_INTEGER_LENGTH, 0);
            v.setOrigin(name);
            row.addValueMeta(v);
        }
        if (errorFieldsField != null && errorFieldsField.length() > 0) {
            ValueMetaInterface v = new ValueMetaString(errorFieldsField);
            v.setOrigin(name);
            row.addValueMeta(v);
        }
        if (errorTextField != null && errorTextField.length() > 0) {
            ValueMetaInterface v = new ValueMetaString(errorTextField);
            v.setOrigin(name);
            row.addValueMeta(v);
        }
    }
    if (content.includeFilename) {
        ValueMetaInterface v = new ValueMetaString(content.filenameField);
        v.setLength(100);
        v.setOrigin(name);
        row.addValueMeta(v);
    }
    if (content.includeRowNumber) {
        ValueMetaInterface v = new ValueMetaInteger(content.rowNumberField);
        v.setLength(ValueMetaInterface.DEFAULT_INTEGER_LENGTH, 0);
        v.setOrigin(name);
        row.addValueMeta(v);
    }
    if (StringUtils.isNotBlank(additionalOutputFields.shortFilenameField)) {
        ValueMetaInterface v = new ValueMetaString(space.environmentSubstitute(additionalOutputFields.shortFilenameField));
        v.setLength(100, -1);
        v.setOrigin(name);
        row.addValueMeta(v);
    }
    if (StringUtils.isNotBlank(additionalOutputFields.extensionField)) {
        ValueMetaInterface v = new ValueMetaString(space.environmentSubstitute(additionalOutputFields.extensionField));
        v.setLength(100, -1);
        v.setOrigin(name);
        row.addValueMeta(v);
    }
    if (StringUtils.isNotBlank(additionalOutputFields.pathField)) {
        ValueMetaInterface v = new ValueMetaString(space.environmentSubstitute(additionalOutputFields.pathField));
        v.setLength(100, -1);
        v.setOrigin(name);
        row.addValueMeta(v);
    }
    if (StringUtils.isNotBlank(additionalOutputFields.sizeField)) {
        ValueMetaInterface v = new ValueMetaString(space.environmentSubstitute(additionalOutputFields.sizeField));
        v.setOrigin(name);
        v.setLength(9);
        row.addValueMeta(v);
    }
    if (StringUtils.isNotBlank(additionalOutputFields.hiddenField)) {
        ValueMetaInterface v = new ValueMetaBoolean(space.environmentSubstitute(additionalOutputFields.hiddenField));
        v.setOrigin(name);
        row.addValueMeta(v);
    }
    if (StringUtils.isNotBlank(additionalOutputFields.lastModificationField)) {
        ValueMetaInterface v = new ValueMetaDate(space.environmentSubstitute(additionalOutputFields.lastModificationField));
        v.setOrigin(name);
        row.addValueMeta(v);
    }
    if (StringUtils.isNotBlank(additionalOutputFields.uriField)) {
        ValueMetaInterface v = new ValueMetaString(space.environmentSubstitute(additionalOutputFields.uriField));
        v.setLength(100, -1);
        v.setOrigin(name);
        row.addValueMeta(v);
    }
    if (StringUtils.isNotBlank(additionalOutputFields.rootUriField)) {
        ValueMetaInterface v = new ValueMetaString(additionalOutputFields.rootUriField);
        v.setLength(100, -1);
        v.setOrigin(name);
        row.addValueMeta(v);
    }
}
Also used : ValueMetaString(org.pentaho.di.core.row.value.ValueMetaString) KettleStepException(org.pentaho.di.core.exception.KettleStepException) BaseFileField(org.pentaho.di.trans.steps.file.BaseFileField) ValueMetaInteger(org.pentaho.di.core.row.value.ValueMetaInteger) ValueMetaBoolean(org.pentaho.di.core.row.value.ValueMetaBoolean) ValueMetaDate(org.pentaho.di.core.row.value.ValueMetaDate) KettleXMLException(org.pentaho.di.core.exception.KettleXMLException) KettleFileException(org.pentaho.di.core.exception.KettleFileException) KettleStepException(org.pentaho.di.core.exception.KettleStepException) KettleException(org.pentaho.di.core.exception.KettleException) ValueMetaInterface(org.pentaho.di.core.row.ValueMetaInterface)

Example 9 with BaseFileField

use of org.pentaho.di.trans.steps.file.BaseFileField in project pentaho-kettle by pentaho.

the class TextFileInputUtils method convertLineToRow.

public static final Object[] convertLineToRow(LogChannelInterface log, TextFileLine textFileLine, TextFileInputMeta info, Object[] passThruFields, int nrPassThruFields, RowMetaInterface outputRowMeta, RowMetaInterface convertRowMeta, String fname, long rowNr, String delimiter, String enclosure, String escapeCharacter, FileErrorHandler errorHandler, BaseFileInputAdditionalField additionalOutputFields, String shortFilename, String path, boolean hidden, Date modificationDateTime, String uri, String rooturi, String extension, Long size) throws KettleException {
    if (textFileLine == null || textFileLine.line == null) {
        return null;
    }
    // over-allocate a bit in the row producing
    Object[] r = RowDataUtil.allocateRowData(outputRowMeta.size());
    // steps...
    int nrfields = info.inputFields.length;
    int fieldnr;
    Long errorCount = null;
    if (info.errorHandling.errorIgnored && info.getErrorCountField() != null && info.getErrorCountField().length() > 0) {
        errorCount = new Long(0L);
    }
    String errorFields = null;
    if (info.errorHandling.errorIgnored && info.getErrorFieldsField() != null && info.getErrorFieldsField().length() > 0) {
        errorFields = "";
    }
    String errorText = null;
    if (info.errorHandling.errorIgnored && info.getErrorTextField() != null && info.getErrorTextField().length() > 0) {
        errorText = "";
    }
    try {
        // System.out.println("Convertings line to string ["+line+"]");
        String[] strings = convertLineToStrings(log, textFileLine.line, info, delimiter, enclosure, escapeCharacter);
        int shiftFields = (passThruFields == null ? 0 : nrPassThruFields);
        for (fieldnr = 0; fieldnr < nrfields; fieldnr++) {
            BaseFileField f = info.inputFields[fieldnr];
            int valuenr = shiftFields + fieldnr;
            ValueMetaInterface valueMeta = outputRowMeta.getValueMeta(valuenr);
            ValueMetaInterface convertMeta = convertRowMeta.getValueMeta(valuenr);
            Object value;
            String nullif = fieldnr < nrfields ? f.getNullString() : "";
            String ifnull = fieldnr < nrfields ? f.getIfNullValue() : "";
            int trim_type = fieldnr < nrfields ? f.getTrimType() : ValueMetaInterface.TRIM_TYPE_NONE;
            if (fieldnr < strings.length) {
                String pol = strings[fieldnr];
                try {
                    if (valueMeta.isNull(pol) || !Utils.isEmpty(nullif) && nullif.equals(pol)) {
                        pol = null;
                    }
                    value = valueMeta.convertDataFromString(pol, convertMeta, nullif, ifnull, trim_type);
                } catch (Exception e) {
                    // OK, give some feedback!
                    String message = BaseMessages.getString(PKG, "TextFileInput.Log.CoundNotParseField", valueMeta.toStringMeta(), "" + pol, valueMeta.getConversionMask(), "" + rowNr);
                    if (info.errorHandling.errorIgnored) {
                        log.logDetailed(fname, BaseMessages.getString(PKG, "TextFileInput.Log.Warning") + ": " + message + " : " + e.getMessage());
                        value = null;
                        if (errorCount != null) {
                            errorCount = new Long(errorCount.longValue() + 1L);
                        }
                        if (errorFields != null) {
                            StringBuilder sb = new StringBuilder(errorFields);
                            if (sb.length() > 0) {
                                // TODO document this change
                                sb.append("\t");
                            }
                            sb.append(valueMeta.getName());
                            errorFields = sb.toString();
                        }
                        if (errorText != null) {
                            StringBuilder sb = new StringBuilder(errorText);
                            if (sb.length() > 0) {
                                sb.append(Const.CR);
                            }
                            sb.append(message);
                            errorText = sb.toString();
                        }
                        if (errorHandler != null) {
                            errorHandler.handleLineError(textFileLine.lineNumber, AbstractFileErrorHandler.NO_PARTS);
                        }
                        if (info.isErrorLineSkipped()) {
                            // compensates for stmt: r.setIgnore();
                            r = null;
                        }
                    } else {
                        throw new KettleException(message, e);
                    }
                }
            } else {
                // No data found: TRAILING NULLCOLS: add null value...
                value = null;
            }
            // Now add value to the row (if we're not skipping the row)
            if (r != null) {
                r[valuenr] = value;
            }
        }
        // none of this applies if we're skipping the row
        if (r != null) {
            // Should be OK at allocation time, but it doesn't hurt :-)
            if (fieldnr < nrfields) {
                for (int i = fieldnr; i < info.inputFields.length; i++) {
                    r[shiftFields + i] = null;
                }
            }
            // Add the error handling fields...
            int index = shiftFields + nrfields;
            if (errorCount != null) {
                r[index] = errorCount;
                index++;
            }
            if (errorFields != null) {
                r[index] = errorFields;
                index++;
            }
            if (errorText != null) {
                r[index] = errorText;
                index++;
            }
            // Possibly add a filename...
            if (info.content.includeFilename) {
                r[index] = fname;
                index++;
            }
            // Possibly add a row number...
            if (info.content.includeRowNumber) {
                r[index] = new Long(rowNr);
                index++;
            }
            // Possibly add short filename...
            if (additionalOutputFields.shortFilenameField != null) {
                r[index] = shortFilename;
                index++;
            }
            // Add Extension
            if (additionalOutputFields.extensionField != null) {
                r[index] = extension;
                index++;
            }
            // add path
            if (additionalOutputFields.pathField != null) {
                r[index] = path;
                index++;
            }
            // Add Size
            if (additionalOutputFields.sizeField != null) {
                r[index] = size;
                index++;
            }
            // add Hidden
            if (additionalOutputFields.hiddenField != null) {
                r[index] = hidden;
                index++;
            }
            // Add modification date
            if (additionalOutputFields.lastModificationField != null) {
                r[index] = modificationDateTime;
                index++;
            }
            // Add Uri
            if (additionalOutputFields.uriField != null) {
                r[index] = uri;
                index++;
            }
            // Add RootUri
            if (additionalOutputFields.rootUriField != null) {
                r[index] = rooturi;
                index++;
            }
        }
    // End if r != null
    } catch (Exception e) {
        throw new KettleException(BaseMessages.getString(PKG, "TextFileInput.Log.Error.ErrorConvertingLineText"), e);
    }
    if (r != null && passThruFields != null) {
        // Simply add all fields from source files step
        for (int i = 0; i < nrPassThruFields; i++) {
            r[i] = passThruFields[i];
        }
    }
    return r;
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) BaseFileField(org.pentaho.di.trans.steps.file.BaseFileField) KettleException(org.pentaho.di.core.exception.KettleException) KettleFileException(org.pentaho.di.core.exception.KettleFileException) ValueMetaInterface(org.pentaho.di.core.row.ValueMetaInterface)

Example 10 with BaseFileField

use of org.pentaho.di.trans.steps.file.BaseFileField in project pentaho-kettle by pentaho.

the class TextFileInputUtils method convertLineToStrings.

public static final String[] convertLineToStrings(LogChannelInterface log, String line, TextFileInputMeta inf, String delimiter, String enclosure, String escapeCharacters) throws KettleException {
    String[] strings = new String[inf.inputFields.length];
    int fieldnr;
    // piece of line
    String pol;
    try {
        if (line == null) {
            return null;
        }
        if (inf.content.fileType.equalsIgnoreCase("CSV")) {
            // Split string in pieces, only for CSV!
            fieldnr = 0;
            int pos = 0;
            int length = line.length();
            boolean dencl = false;
            int len_encl = (enclosure == null ? 0 : enclosure.length());
            int len_esc = (escapeCharacters == null ? 0 : escapeCharacters.length());
            while (pos < length) {
                int from = pos;
                int next;
                boolean encl_found;
                boolean contains_escaped_enclosures = false;
                boolean contains_escaped_separators = false;
                boolean contains_escaped_escape = false;
                // "aa;aa";123;"aaa-aaa";000;...
                if (len_encl > 0 && line.substring(from, from + len_encl).equalsIgnoreCase(enclosure)) {
                    if (log.isRowLevel()) {
                        log.logRowlevel(BaseMessages.getString(PKG, "TextFileInput.Log.ConvertLineToRowTitle"), BaseMessages.getString(PKG, "TextFileInput.Log.Encloruse", line.substring(from, from + len_encl)));
                    }
                    encl_found = true;
                    int p = from + len_encl;
                    boolean is_enclosure = len_encl > 0 && p + len_encl < length && line.substring(p, p + len_encl).equalsIgnoreCase(enclosure);
                    boolean is_escape = len_esc > 0 && p + len_esc < length && line.substring(p, p + len_esc).equalsIgnoreCase(inf.content.escapeCharacter);
                    boolean enclosure_after = false;
                    // Is it really an enclosure? See if it's not repeated twice or escaped!
                    if ((is_enclosure || is_escape) && p < length - 1) {
                        String strnext = line.substring(p + len_encl, p + 2 * len_encl);
                        if (strnext.equalsIgnoreCase(enclosure)) {
                            p++;
                            enclosure_after = true;
                            dencl = true;
                            // Remember to replace them later on!
                            if (is_escape) {
                                contains_escaped_enclosures = true;
                            }
                        } else if (strnext.equals(inf.content.escapeCharacter)) {
                            p++;
                            // Remember to replace them later on!
                            if (is_escape) {
                                // remember
                                contains_escaped_escape = true;
                            }
                        }
                    }
                    // Look for a closing enclosure!
                    while ((!is_enclosure || enclosure_after) && p < line.length()) {
                        p++;
                        enclosure_after = false;
                        is_enclosure = len_encl > 0 && p + len_encl < length && line.substring(p, p + len_encl).equals(enclosure);
                        is_escape = len_esc > 0 && p + len_esc < length && line.substring(p, p + len_esc).equals(inf.content.escapeCharacter);
                        // Is it really an enclosure? See if it's not repeated twice or escaped!
                        if ((is_enclosure || is_escape) && p < length - 1) {
                            String strnext = line.substring(p + len_encl, p + 2 * len_encl);
                            if (strnext.equals(enclosure)) {
                                p++;
                                enclosure_after = true;
                                dencl = true;
                                // Remember to replace them later on!
                                if (is_escape) {
                                    // remember
                                    contains_escaped_enclosures = true;
                                }
                            } else if (strnext.equals(inf.content.escapeCharacter)) {
                                p++;
                                // Remember to replace them later on!
                                if (is_escape) {
                                    // remember
                                    contains_escaped_escape = true;
                                }
                            }
                        }
                    }
                    if (p >= length) {
                        next = p;
                    } else {
                        next = p + len_encl;
                    }
                    if (log.isRowLevel()) {
                        log.logRowlevel(BaseMessages.getString(PKG, "TextFileInput.Log.ConvertLineToRowTitle"), BaseMessages.getString(PKG, "TextFileInput.Log.EndOfEnclosure", "" + p));
                    }
                } else {
                    encl_found = false;
                    boolean found = false;
                    int startpoint = from;
                    // int tries = 1;
                    do {
                        next = line.indexOf(delimiter, startpoint);
                        // See if this position is preceded by an escape character.
                        if (len_esc > 0 && next - len_esc > 0) {
                            String before = line.substring(next - len_esc, next);
                            if (inf.content.escapeCharacter.equals(before)) {
                                // take the next separator, this one is escaped...
                                startpoint = next + 1;
                                // tries++;
                                contains_escaped_separators = true;
                            } else {
                                found = true;
                            }
                        } else {
                            found = true;
                        }
                    } while (!found && next >= 0);
                }
                if (next == -1) {
                    next = length;
                }
                if (encl_found && ((from + len_encl) <= (next - len_encl))) {
                    pol = line.substring(from + len_encl, next - len_encl);
                    if (log.isRowLevel()) {
                        log.logRowlevel(BaseMessages.getString(PKG, "TextFileInput.Log.ConvertLineToRowTitle"), BaseMessages.getString(PKG, "TextFileInput.Log.EnclosureFieldFound", "" + pol));
                    }
                } else {
                    pol = line.substring(from, next);
                    if (log.isRowLevel()) {
                        log.logRowlevel(BaseMessages.getString(PKG, "TextFileInput.Log.ConvertLineToRowTitle"), BaseMessages.getString(PKG, "TextFileInput.Log.NormalFieldFound", "" + pol));
                    }
                }
                if (dencl && Utils.isEmpty(inf.content.escapeCharacter)) {
                    StringBuilder sbpol = new StringBuilder(pol);
                    int idx = sbpol.indexOf(enclosure + enclosure);
                    while (idx >= 0) {
                        sbpol.delete(idx, idx + enclosure.length());
                        idx = sbpol.indexOf(enclosure + enclosure);
                    }
                    pol = sbpol.toString();
                }
                // replace the escaped enclosures with enclosures...
                if (contains_escaped_enclosures) {
                    String replace = inf.content.escapeCharacter + enclosure;
                    String replaceWith = enclosure;
                    pol = Const.replace(pol, replace, replaceWith);
                }
                // replace the escaped separators with separators...
                if (contains_escaped_separators) {
                    String replace = inf.content.escapeCharacter + delimiter;
                    String replaceWith = delimiter;
                    pol = Const.replace(pol, replace, replaceWith);
                }
                // replace the escaped escape with escape...
                if (contains_escaped_escape) {
                    String replace = inf.content.escapeCharacter + inf.content.escapeCharacter;
                    String replaceWith = inf.content.escapeCharacter;
                    pol = Const.replace(pol, replace, replaceWith);
                }
                // Now add pol to the strings found!
                try {
                    strings[fieldnr] = pol;
                } catch (ArrayIndexOutOfBoundsException e) {
                    // In case we didn't allocate enough space.
                    // This happens when you have less header values specified than there are actual values in the rows.
                    // As this is "the exception" we catch and resize here.
                    // 
                    String[] newStrings = new String[strings.length];
                    for (int x = 0; x < strings.length; x++) {
                        newStrings[x] = strings[x];
                    }
                    strings = newStrings;
                }
                pos = next + delimiter.length();
                fieldnr++;
            }
            if (pos == length) {
                if (log.isRowLevel()) {
                    log.logRowlevel(BaseMessages.getString(PKG, "TextFileInput.Log.ConvertLineToRowTitle"), BaseMessages.getString(PKG, "TextFileInput.Log.EndOfEmptyLineFound"));
                }
                if (fieldnr < strings.length) {
                    strings[fieldnr] = Const.EMPTY_STRING;
                }
                fieldnr++;
            }
        } else {
            // Fixed file format: Simply get the strings at the required positions...
            // Note - charBased is the old default behavior. If this is an old transformation, content.length will be null
            // and should be processed as before. If the content.length is equal to "Characters" or there is no specified encoding,
            // it will still use the old behavior. The *only* way to get the new behavior is if content.length = "Bytes" and
            // the encoding is specified.
            // Default to classic behavior
            boolean charBased = (inf.content.length == null || inf.content.length.equalsIgnoreCase("Characters") || inf.getEncoding() == null);
            for (int i = 0; i < inf.inputFields.length; i++) {
                BaseFileField field = inf.inputFields[i];
                int length;
                int fPos = field.getPosition();
                int fLength = field.getLength();
                int fPl = fPos + fLength;
                if (charBased) {
                    length = line.length();
                    if (fPl <= length) {
                        strings[i] = line.substring(fPos, fPl);
                    } else {
                        if (fPos < length) {
                            strings[i] = line.substring(fPos);
                        } else {
                            strings[i] = "";
                        }
                    }
                } else {
                    byte[] b = null;
                    String enc = inf.getEncoding();
                    b = line.getBytes(enc);
                    length = b.length;
                    if (fPl <= length) {
                        strings[i] = new String(Arrays.copyOfRange(b, fPos, fPl), enc);
                    } else {
                        if (fPos < length) {
                            strings[i] = new String(Arrays.copyOfRange(b, fPos, length - 1), enc);
                        } else {
                            strings[i] = "";
                        }
                    }
                }
            }
        }
    } catch (Exception e) {
        throw new KettleException(BaseMessages.getString(PKG, "TextFileInput.Log.Error.ErrorConvertingLine", e.toString()), e);
    }
    return strings;
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) BaseFileField(org.pentaho.di.trans.steps.file.BaseFileField) KettleException(org.pentaho.di.core.exception.KettleException) KettleFileException(org.pentaho.di.core.exception.KettleFileException)

Aggregations

BaseFileField (org.pentaho.di.trans.steps.file.BaseFileField)36 Test (org.junit.Test)19 ValueMetaString (org.pentaho.di.core.row.value.ValueMetaString)12 KettleException (org.pentaho.di.core.exception.KettleException)9 KettleFileException (org.pentaho.di.core.exception.KettleFileException)7 ValueMetaInterface (org.pentaho.di.core.row.ValueMetaInterface)5 ArrayList (java.util.ArrayList)4 KettleStepException (org.pentaho.di.core.exception.KettleStepException)4 KettleXMLException (org.pentaho.di.core.exception.KettleXMLException)4 TableItem (org.eclipse.swt.widgets.TableItem)3 RowMetaInterface (org.pentaho.di.core.row.RowMetaInterface)3 Shell (org.eclipse.swt.widgets.Shell)2 TextFileInputFieldInterface (org.pentaho.di.core.gui.TextFileInputFieldInterface)2 RowMeta (org.pentaho.di.core.row.RowMeta)2 Variables (org.pentaho.di.core.variables.Variables)2 TextFileInputMeta (org.pentaho.di.trans.steps.fileinput.text.TextFileInputMeta)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 IOException (java.io.IOException)1 OutputStream (java.io.OutputStream)1