use of org.pentaho.di.trans.steps.file.BaseFileField in project pentaho-kettle by pentaho.
the class TextFileInputMeta method getXML.
@Override
public String getXML() {
StringBuilder retval = new StringBuilder(1500);
retval.append(" ").append(XMLHandler.addTagValue("accept_filenames", inputFiles.acceptingFilenames));
retval.append(" ").append(XMLHandler.addTagValue("passing_through_fields", inputFiles.passingThruFields));
retval.append(" ").append(XMLHandler.addTagValue("accept_field", inputFiles.acceptingField));
retval.append(" ").append(XMLHandler.addTagValue("accept_stepname", (acceptingStep != null ? acceptingStep.getName() : "")));
retval.append(" ").append(XMLHandler.addTagValue("separator", content.separator));
retval.append(" ").append(XMLHandler.addTagValue("enclosure", content.enclosure));
retval.append(" ").append(XMLHandler.addTagValue("enclosure_breaks", content.breakInEnclosureAllowed));
retval.append(" ").append(XMLHandler.addTagValue("escapechar", content.escapeCharacter));
retval.append(" ").append(XMLHandler.addTagValue("header", content.header));
retval.append(" ").append(XMLHandler.addTagValue("nr_headerlines", content.nrHeaderLines));
retval.append(" ").append(XMLHandler.addTagValue("footer", content.footer));
retval.append(" ").append(XMLHandler.addTagValue("nr_footerlines", content.nrFooterLines));
retval.append(" ").append(XMLHandler.addTagValue("line_wrapped", content.lineWrapped));
retval.append(" ").append(XMLHandler.addTagValue("nr_wraps", content.nrWraps));
retval.append(" ").append(XMLHandler.addTagValue("layout_paged", content.layoutPaged));
retval.append(" ").append(XMLHandler.addTagValue("nr_lines_per_page", content.nrLinesPerPage));
retval.append(" ").append(XMLHandler.addTagValue("nr_lines_doc_header", content.nrLinesDocHeader));
retval.append(" ").append(XMLHandler.addTagValue("noempty", content.noEmptyLines));
retval.append(" ").append(XMLHandler.addTagValue("include", content.includeFilename));
retval.append(" ").append(XMLHandler.addTagValue("include_field", content.filenameField));
retval.append(" ").append(XMLHandler.addTagValue("rownum", content.includeRowNumber));
retval.append(" ").append(XMLHandler.addTagValue("rownumByFile", content.rowNumberByFile));
retval.append(" ").append(XMLHandler.addTagValue("rownum_field", content.rowNumberField));
retval.append(" ").append(XMLHandler.addTagValue("format", content.fileFormat));
retval.append(" ").append(XMLHandler.addTagValue("encoding", content.encoding));
retval.append(" ").append(XMLHandler.addTagValue("length", content.length));
retval.append(" " + XMLHandler.addTagValue("add_to_result_filenames", inputFiles.isaddresult));
retval.append(" <file>").append(Const.CR);
// we need the equals by size arrays for inputFiles.fileName[i], inputFiles.fileMask[i], inputFiles.fileRequired[i], inputFiles.includeSubFolders[i]
// to prevent the ArrayIndexOutOfBoundsException
inputFiles.normalizeAllocation(inputFiles.fileName.length);
for (int i = 0; i < inputFiles.fileName.length; i++) {
saveSource(retval, inputFiles.fileName[i]);
parentStepMeta.getParentTransMeta().getNamedClusterEmbedManager().registerUrl(inputFiles.fileName[i]);
retval.append(" ").append(XMLHandler.addTagValue("filemask", inputFiles.fileMask[i]));
retval.append(" ").append(XMLHandler.addTagValue("exclude_filemask", inputFiles.excludeFileMask[i]));
retval.append(" ").append(XMLHandler.addTagValue("file_required", inputFiles.fileRequired[i]));
retval.append(" ").append(XMLHandler.addTagValue("include_subfolders", inputFiles.includeSubFolders[i]));
}
retval.append(" ").append(XMLHandler.addTagValue("type", content.fileType));
retval.append(" ").append(XMLHandler.addTagValue("compression", (content.fileCompression == null) ? "None" : content.fileCompression));
retval.append(" </file>").append(Const.CR);
retval.append(" <filters>").append(Const.CR);
for (int i = 0; i < filter.length; i++) {
String filterString = filter[i].getFilterString();
byte[] filterBytes = new byte[] {};
String filterPrefix = "";
if (filterString != null) {
filterBytes = filterString.getBytes();
filterPrefix = STRING_BASE64_PREFIX;
}
String filterEncoded = filterPrefix + new String(Base64.encodeBase64(filterBytes));
retval.append(" <filter>").append(Const.CR);
retval.append(" ").append(XMLHandler.addTagValue("filter_string", filterEncoded, false));
retval.append(" ").append(XMLHandler.addTagValue("filter_position", filter[i].getFilterPosition(), false));
retval.append(" ").append(XMLHandler.addTagValue("filter_is_last_line", filter[i].isFilterLastLine(), false));
retval.append(" ").append(XMLHandler.addTagValue("filter_is_positive", filter[i].isFilterPositive(), false));
retval.append(" </filter>").append(Const.CR);
}
retval.append(" </filters>").append(Const.CR);
retval.append(" <fields>").append(Const.CR);
for (int i = 0; i < inputFields.length; i++) {
BaseFileField field = inputFields[i];
retval.append(" <field>").append(Const.CR);
retval.append(" ").append(XMLHandler.addTagValue("name", field.getName()));
retval.append(" ").append(XMLHandler.addTagValue("type", field.getTypeDesc()));
retval.append(" ").append(XMLHandler.addTagValue("format", field.getFormat()));
retval.append(" ").append(XMLHandler.addTagValue("currency", field.getCurrencySymbol()));
retval.append(" ").append(XMLHandler.addTagValue("decimal", field.getDecimalSymbol()));
retval.append(" ").append(XMLHandler.addTagValue("group", field.getGroupSymbol()));
retval.append(" ").append(XMLHandler.addTagValue("nullif", field.getNullString()));
retval.append(" ").append(XMLHandler.addTagValue("ifnull", field.getIfNullValue()));
retval.append(" ").append(XMLHandler.addTagValue("position", field.getPosition()));
retval.append(" ").append(XMLHandler.addTagValue("length", field.getLength()));
retval.append(" ").append(XMLHandler.addTagValue("precision", field.getPrecision()));
retval.append(" ").append(XMLHandler.addTagValue("trim_type", field.getTrimTypeCode()));
retval.append(" ").append(XMLHandler.addTagValue("repeat", field.isRepeated()));
retval.append(" </field>").append(Const.CR);
}
retval.append(" </fields>").append(Const.CR);
retval.append(" ").append(XMLHandler.addTagValue("limit", content.rowLimit));
// ERROR HANDLING
retval.append(" ").append(XMLHandler.addTagValue("error_ignored", errorHandling.errorIgnored));
retval.append(" ").append(XMLHandler.addTagValue("skip_bad_files", errorHandling.skipBadFiles));
retval.append(" ").append(XMLHandler.addTagValue("file_error_field", errorHandling.fileErrorField));
retval.append(" ").append(XMLHandler.addTagValue("file_error_message_field", errorHandling.fileErrorMessageField));
retval.append(" ").append(XMLHandler.addTagValue("error_line_skipped", errorLineSkipped));
retval.append(" ").append(XMLHandler.addTagValue("error_count_field", errorCountField));
retval.append(" ").append(XMLHandler.addTagValue("error_fields_field", errorFieldsField));
retval.append(" ").append(XMLHandler.addTagValue("error_text_field", errorTextField));
retval.append(" ").append(XMLHandler.addTagValue("bad_line_files_destination_directory", errorHandling.warningFilesDestinationDirectory));
retval.append(" ").append(XMLHandler.addTagValue("bad_line_files_extension", errorHandling.warningFilesExtension));
retval.append(" ").append(XMLHandler.addTagValue("error_line_files_destination_directory", errorHandling.errorFilesDestinationDirectory));
retval.append(" ").append(XMLHandler.addTagValue("error_line_files_extension", errorHandling.errorFilesExtension));
retval.append(" ").append(XMLHandler.addTagValue("line_number_files_destination_directory", errorHandling.lineNumberFilesDestinationDirectory));
retval.append(" ").append(XMLHandler.addTagValue("line_number_files_extension", errorHandling.lineNumberFilesExtension));
retval.append(" ").append(XMLHandler.addTagValue("date_format_lenient", content.dateFormatLenient));
retval.append(" ").append(XMLHandler.addTagValue("date_format_locale", content.dateFormatLocale != null ? content.dateFormatLocale.toString() : null));
retval.append(" ").append(XMLHandler.addTagValue("shortFileFieldName", additionalOutputFields.shortFilenameField));
retval.append(" ").append(XMLHandler.addTagValue("pathFieldName", additionalOutputFields.pathField));
retval.append(" ").append(XMLHandler.addTagValue("hiddenFieldName", additionalOutputFields.hiddenField));
retval.append(" ").append(XMLHandler.addTagValue("lastModificationTimeFieldName", additionalOutputFields.lastModificationField));
retval.append(" ").append(XMLHandler.addTagValue("uriNameFieldName", additionalOutputFields.uriField));
retval.append(" ").append(XMLHandler.addTagValue("rootUriNameFieldName", additionalOutputFields.rootUriField));
retval.append(" ").append(XMLHandler.addTagValue("extensionFieldName", additionalOutputFields.extensionField));
retval.append(" ").append(XMLHandler.addTagValue("sizeFieldName", additionalOutputFields.sizeField));
return retval.toString();
}
use of org.pentaho.di.trans.steps.file.BaseFileField in project pentaho-kettle by pentaho.
the class TextFileInputMeta method saveRep.
@Override
public void saveRep(Repository rep, IMetaStore metaStore, ObjectId id_transformation, ObjectId id_step) throws KettleException {
try {
rep.saveStepAttribute(id_transformation, id_step, "accept_filenames", inputFiles.acceptingFilenames);
rep.saveStepAttribute(id_transformation, id_step, "passing_through_fields", inputFiles.passingThruFields);
rep.saveStepAttribute(id_transformation, id_step, "accept_field", inputFiles.acceptingField);
rep.saveStepAttribute(id_transformation, id_step, "accept_stepname", (acceptingStep != null ? acceptingStep.getName() : ""));
rep.saveStepAttribute(id_transformation, id_step, "separator", content.separator);
rep.saveStepAttribute(id_transformation, id_step, "enclosure", content.enclosure);
rep.saveStepAttribute(id_transformation, id_step, "enclosure_breaks", content.breakInEnclosureAllowed);
rep.saveStepAttribute(id_transformation, id_step, "escapechar", content.escapeCharacter);
rep.saveStepAttribute(id_transformation, id_step, "header", content.header);
rep.saveStepAttribute(id_transformation, id_step, "nr_headerlines", content.nrHeaderLines);
rep.saveStepAttribute(id_transformation, id_step, "footer", content.footer);
rep.saveStepAttribute(id_transformation, id_step, "nr_footerlines", content.nrFooterLines);
rep.saveStepAttribute(id_transformation, id_step, "line_wrapped", content.lineWrapped);
rep.saveStepAttribute(id_transformation, id_step, "nr_wraps", content.nrWraps);
rep.saveStepAttribute(id_transformation, id_step, "layout_paged", content.layoutPaged);
rep.saveStepAttribute(id_transformation, id_step, "nr_lines_per_page", content.nrLinesPerPage);
rep.saveStepAttribute(id_transformation, id_step, "nr_lines_doc_header", content.nrLinesDocHeader);
rep.saveStepAttribute(id_transformation, id_step, "noempty", content.noEmptyLines);
rep.saveStepAttribute(id_transformation, id_step, "include", content.includeFilename);
rep.saveStepAttribute(id_transformation, id_step, "include_field", content.filenameField);
rep.saveStepAttribute(id_transformation, id_step, "rownum", content.includeRowNumber);
rep.saveStepAttribute(id_transformation, id_step, "rownumByFile", content.rowNumberByFile);
rep.saveStepAttribute(id_transformation, id_step, "rownum_field", content.rowNumberField);
rep.saveStepAttribute(id_transformation, id_step, "format", content.fileFormat);
rep.saveStepAttribute(id_transformation, id_step, "encoding", content.encoding);
rep.saveStepAttribute(id_transformation, id_step, "length", content.length);
rep.saveStepAttribute(id_transformation, id_step, "add_to_result_filenames", inputFiles.isaddresult);
rep.saveStepAttribute(id_transformation, id_step, "limit", content.rowLimit);
for (int i = 0; i < inputFiles.fileName.length; i++) {
saveSourceRep(rep, id_transformation, id_step, i, inputFiles.fileName[i]);
rep.saveStepAttribute(id_transformation, id_step, i, "file_mask", inputFiles.fileMask[i]);
rep.saveStepAttribute(id_transformation, id_step, i, "exclude_file_mask", inputFiles.excludeFileMask[i]);
rep.saveStepAttribute(id_transformation, id_step, i, "file_required", inputFiles.fileRequired[i]);
rep.saveStepAttribute(id_transformation, id_step, i, "include_subfolders", inputFiles.includeSubFolders[i]);
}
rep.saveStepAttribute(id_transformation, id_step, "file_type", content.fileType);
rep.saveStepAttribute(id_transformation, id_step, "compression", (content.fileCompression == null) ? "None" : content.fileCompression);
for (int i = 0; i < filter.length; i++) {
rep.saveStepAttribute(id_transformation, id_step, i, "filter_position", filter[i].getFilterPosition());
rep.saveStepAttribute(id_transformation, id_step, i, "filter_string", filter[i].getFilterString());
rep.saveStepAttribute(id_transformation, id_step, i, "filter_is_last_line", filter[i].isFilterLastLine());
rep.saveStepAttribute(id_transformation, id_step, i, "filter_is_positive", filter[i].isFilterPositive());
}
for (int i = 0; i < inputFields.length; i++) {
BaseFileField field = inputFields[i];
rep.saveStepAttribute(id_transformation, id_step, i, "field_name", field.getName());
rep.saveStepAttribute(id_transformation, id_step, i, "field_type", field.getTypeDesc());
rep.saveStepAttribute(id_transformation, id_step, i, "field_format", field.getFormat());
rep.saveStepAttribute(id_transformation, id_step, i, "field_currency", field.getCurrencySymbol());
rep.saveStepAttribute(id_transformation, id_step, i, "field_decimal", field.getDecimalSymbol());
rep.saveStepAttribute(id_transformation, id_step, i, "field_group", field.getGroupSymbol());
rep.saveStepAttribute(id_transformation, id_step, i, "field_nullif", field.getNullString());
rep.saveStepAttribute(id_transformation, id_step, i, "field_ifnull", field.getIfNullValue());
rep.saveStepAttribute(id_transformation, id_step, i, "field_position", field.getPosition());
rep.saveStepAttribute(id_transformation, id_step, i, "field_length", field.getLength());
rep.saveStepAttribute(id_transformation, id_step, i, "field_precision", field.getPrecision());
rep.saveStepAttribute(id_transformation, id_step, i, "field_trim_type", field.getTrimTypeCode());
rep.saveStepAttribute(id_transformation, id_step, i, "field_repeat", field.isRepeated());
}
rep.saveStepAttribute(id_transformation, id_step, "error_ignored", errorHandling.errorIgnored);
rep.saveStepAttribute(id_transformation, id_step, "skip_bad_files", errorHandling.skipBadFiles);
rep.saveStepAttribute(id_transformation, id_step, "file_error_field", errorHandling.fileErrorField);
rep.saveStepAttribute(id_transformation, id_step, "file_error_message_field", errorHandling.fileErrorMessageField);
rep.saveStepAttribute(id_transformation, id_step, "error_line_skipped", errorLineSkipped);
rep.saveStepAttribute(id_transformation, id_step, "error_count_field", errorCountField);
rep.saveStepAttribute(id_transformation, id_step, "error_fields_field", errorFieldsField);
rep.saveStepAttribute(id_transformation, id_step, "error_text_field", errorTextField);
rep.saveStepAttribute(id_transformation, id_step, "bad_line_files_dest_dir", errorHandling.warningFilesDestinationDirectory);
rep.saveStepAttribute(id_transformation, id_step, "bad_line_files_ext", errorHandling.warningFilesExtension);
rep.saveStepAttribute(id_transformation, id_step, "error_line_files_dest_dir", errorHandling.errorFilesDestinationDirectory);
rep.saveStepAttribute(id_transformation, id_step, "error_line_files_ext", errorHandling.errorFilesExtension);
rep.saveStepAttribute(id_transformation, id_step, "line_number_files_dest_dir", errorHandling.lineNumberFilesDestinationDirectory);
rep.saveStepAttribute(id_transformation, id_step, "line_number_files_ext", errorHandling.lineNumberFilesExtension);
rep.saveStepAttribute(id_transformation, id_step, "date_format_lenient", content.dateFormatLenient);
rep.saveStepAttribute(id_transformation, id_step, "date_format_locale", content.dateFormatLocale != null ? content.dateFormatLocale.toString() : null);
rep.saveStepAttribute(id_transformation, id_step, "shortFileFieldName", additionalOutputFields.shortFilenameField);
rep.saveStepAttribute(id_transformation, id_step, "pathFieldName", additionalOutputFields.pathField);
rep.saveStepAttribute(id_transformation, id_step, "hiddenFieldName", additionalOutputFields.hiddenField);
rep.saveStepAttribute(id_transformation, id_step, "lastModificationTimeFieldName", additionalOutputFields.lastModificationField);
rep.saveStepAttribute(id_transformation, id_step, "uriNameFieldName", additionalOutputFields.uriField);
rep.saveStepAttribute(id_transformation, id_step, "rootUriNameFieldName", additionalOutputFields.rootUriField);
rep.saveStepAttribute(id_transformation, id_step, "extensionFieldName", additionalOutputFields.extensionField);
rep.saveStepAttribute(id_transformation, id_step, "sizeFieldName", additionalOutputFields.sizeField);
} catch (Exception e) {
throw new KettleException("Unable to save step information to the repository for id_step=" + id_step, e);
}
}
use of org.pentaho.di.trans.steps.file.BaseFileField in project pentaho-kettle by pentaho.
the class TextFileInputMeta method getFields.
@Override
public void getFields(RowMetaInterface row, String name, RowMetaInterface[] info, StepMeta nextStep, VariableSpace space, Repository repository, IMetaStore metaStore) throws KettleStepException {
if (!inputFiles.passingThruFields) {
// all incoming fields are not transmitted !
row.clear();
} else {
if (info != null) {
boolean found = false;
for (int i = 0; i < info.length && !found; i++) {
if (info[i] != null) {
row.mergeRowMeta(info[i], name);
found = true;
}
}
}
}
for (int i = 0; i < inputFields.length; i++) {
BaseFileField field = inputFields[i];
int type = field.getType();
if (type == ValueMetaInterface.TYPE_NONE) {
type = ValueMetaInterface.TYPE_STRING;
}
try {
ValueMetaInterface v = ValueMetaFactory.createValueMeta(field.getName(), type);
v.setLength(field.getLength());
v.setPrecision(field.getPrecision());
v.setOrigin(name);
v.setConversionMask(field.getFormat());
v.setDecimalSymbol(field.getDecimalSymbol());
v.setGroupingSymbol(field.getGroupSymbol());
v.setCurrencySymbol(field.getCurrencySymbol());
v.setDateFormatLenient(content.dateFormatLenient);
v.setDateFormatLocale(content.dateFormatLocale);
v.setTrimType(field.getTrimType());
row.addValueMeta(v);
} catch (Exception e) {
throw new KettleStepException(e);
}
}
if (errorHandling.errorIgnored) {
if (errorCountField != null && errorCountField.length() > 0) {
ValueMetaInterface v = new ValueMetaInteger(errorCountField);
v.setLength(ValueMetaInterface.DEFAULT_INTEGER_LENGTH, 0);
v.setOrigin(name);
row.addValueMeta(v);
}
if (errorFieldsField != null && errorFieldsField.length() > 0) {
ValueMetaInterface v = new ValueMetaString(errorFieldsField);
v.setOrigin(name);
row.addValueMeta(v);
}
if (errorTextField != null && errorTextField.length() > 0) {
ValueMetaInterface v = new ValueMetaString(errorTextField);
v.setOrigin(name);
row.addValueMeta(v);
}
}
if (content.includeFilename) {
ValueMetaInterface v = new ValueMetaString(content.filenameField);
v.setLength(100);
v.setOrigin(name);
row.addValueMeta(v);
}
if (content.includeRowNumber) {
ValueMetaInterface v = new ValueMetaInteger(content.rowNumberField);
v.setLength(ValueMetaInterface.DEFAULT_INTEGER_LENGTH, 0);
v.setOrigin(name);
row.addValueMeta(v);
}
if (StringUtils.isNotBlank(additionalOutputFields.shortFilenameField)) {
ValueMetaInterface v = new ValueMetaString(space.environmentSubstitute(additionalOutputFields.shortFilenameField));
v.setLength(100, -1);
v.setOrigin(name);
row.addValueMeta(v);
}
if (StringUtils.isNotBlank(additionalOutputFields.extensionField)) {
ValueMetaInterface v = new ValueMetaString(space.environmentSubstitute(additionalOutputFields.extensionField));
v.setLength(100, -1);
v.setOrigin(name);
row.addValueMeta(v);
}
if (StringUtils.isNotBlank(additionalOutputFields.pathField)) {
ValueMetaInterface v = new ValueMetaString(space.environmentSubstitute(additionalOutputFields.pathField));
v.setLength(100, -1);
v.setOrigin(name);
row.addValueMeta(v);
}
if (StringUtils.isNotBlank(additionalOutputFields.sizeField)) {
ValueMetaInterface v = new ValueMetaString(space.environmentSubstitute(additionalOutputFields.sizeField));
v.setOrigin(name);
v.setLength(9);
row.addValueMeta(v);
}
if (StringUtils.isNotBlank(additionalOutputFields.hiddenField)) {
ValueMetaInterface v = new ValueMetaBoolean(space.environmentSubstitute(additionalOutputFields.hiddenField));
v.setOrigin(name);
row.addValueMeta(v);
}
if (StringUtils.isNotBlank(additionalOutputFields.lastModificationField)) {
ValueMetaInterface v = new ValueMetaDate(space.environmentSubstitute(additionalOutputFields.lastModificationField));
v.setOrigin(name);
row.addValueMeta(v);
}
if (StringUtils.isNotBlank(additionalOutputFields.uriField)) {
ValueMetaInterface v = new ValueMetaString(space.environmentSubstitute(additionalOutputFields.uriField));
v.setLength(100, -1);
v.setOrigin(name);
row.addValueMeta(v);
}
if (StringUtils.isNotBlank(additionalOutputFields.rootUriField)) {
ValueMetaInterface v = new ValueMetaString(additionalOutputFields.rootUriField);
v.setLength(100, -1);
v.setOrigin(name);
row.addValueMeta(v);
}
}
use of org.pentaho.di.trans.steps.file.BaseFileField in project pentaho-kettle by pentaho.
the class TextFileInputUtils method convertLineToRow.
public static final Object[] convertLineToRow(LogChannelInterface log, TextFileLine textFileLine, TextFileInputMeta info, Object[] passThruFields, int nrPassThruFields, RowMetaInterface outputRowMeta, RowMetaInterface convertRowMeta, String fname, long rowNr, String delimiter, String enclosure, String escapeCharacter, FileErrorHandler errorHandler, BaseFileInputAdditionalField additionalOutputFields, String shortFilename, String path, boolean hidden, Date modificationDateTime, String uri, String rooturi, String extension, Long size) throws KettleException {
if (textFileLine == null || textFileLine.line == null) {
return null;
}
// over-allocate a bit in the row producing
Object[] r = RowDataUtil.allocateRowData(outputRowMeta.size());
// steps...
int nrfields = info.inputFields.length;
int fieldnr;
Long errorCount = null;
if (info.errorHandling.errorIgnored && info.getErrorCountField() != null && info.getErrorCountField().length() > 0) {
errorCount = new Long(0L);
}
String errorFields = null;
if (info.errorHandling.errorIgnored && info.getErrorFieldsField() != null && info.getErrorFieldsField().length() > 0) {
errorFields = "";
}
String errorText = null;
if (info.errorHandling.errorIgnored && info.getErrorTextField() != null && info.getErrorTextField().length() > 0) {
errorText = "";
}
try {
// System.out.println("Convertings line to string ["+line+"]");
String[] strings = convertLineToStrings(log, textFileLine.line, info, delimiter, enclosure, escapeCharacter);
int shiftFields = (passThruFields == null ? 0 : nrPassThruFields);
for (fieldnr = 0; fieldnr < nrfields; fieldnr++) {
BaseFileField f = info.inputFields[fieldnr];
int valuenr = shiftFields + fieldnr;
ValueMetaInterface valueMeta = outputRowMeta.getValueMeta(valuenr);
ValueMetaInterface convertMeta = convertRowMeta.getValueMeta(valuenr);
Object value;
String nullif = fieldnr < nrfields ? f.getNullString() : "";
String ifnull = fieldnr < nrfields ? f.getIfNullValue() : "";
int trim_type = fieldnr < nrfields ? f.getTrimType() : ValueMetaInterface.TRIM_TYPE_NONE;
if (fieldnr < strings.length) {
String pol = strings[fieldnr];
try {
if (valueMeta.isNull(pol) || !Utils.isEmpty(nullif) && nullif.equals(pol)) {
pol = null;
}
value = valueMeta.convertDataFromString(pol, convertMeta, nullif, ifnull, trim_type);
} catch (Exception e) {
// OK, give some feedback!
String message = BaseMessages.getString(PKG, "TextFileInput.Log.CoundNotParseField", valueMeta.toStringMeta(), "" + pol, valueMeta.getConversionMask(), "" + rowNr);
if (info.errorHandling.errorIgnored) {
log.logDetailed(fname, BaseMessages.getString(PKG, "TextFileInput.Log.Warning") + ": " + message + " : " + e.getMessage());
value = null;
if (errorCount != null) {
errorCount = new Long(errorCount.longValue() + 1L);
}
if (errorFields != null) {
StringBuilder sb = new StringBuilder(errorFields);
if (sb.length() > 0) {
// TODO document this change
sb.append("\t");
}
sb.append(valueMeta.getName());
errorFields = sb.toString();
}
if (errorText != null) {
StringBuilder sb = new StringBuilder(errorText);
if (sb.length() > 0) {
sb.append(Const.CR);
}
sb.append(message);
errorText = sb.toString();
}
if (errorHandler != null) {
errorHandler.handleLineError(textFileLine.lineNumber, AbstractFileErrorHandler.NO_PARTS);
}
if (info.isErrorLineSkipped()) {
// compensates for stmt: r.setIgnore();
r = null;
}
} else {
throw new KettleException(message, e);
}
}
} else {
// No data found: TRAILING NULLCOLS: add null value...
value = null;
}
// Now add value to the row (if we're not skipping the row)
if (r != null) {
r[valuenr] = value;
}
}
// none of this applies if we're skipping the row
if (r != null) {
// Should be OK at allocation time, but it doesn't hurt :-)
if (fieldnr < nrfields) {
for (int i = fieldnr; i < info.inputFields.length; i++) {
r[shiftFields + i] = null;
}
}
// Add the error handling fields...
int index = shiftFields + nrfields;
if (errorCount != null) {
r[index] = errorCount;
index++;
}
if (errorFields != null) {
r[index] = errorFields;
index++;
}
if (errorText != null) {
r[index] = errorText;
index++;
}
// Possibly add a filename...
if (info.content.includeFilename) {
r[index] = fname;
index++;
}
// Possibly add a row number...
if (info.content.includeRowNumber) {
r[index] = new Long(rowNr);
index++;
}
// Possibly add short filename...
if (additionalOutputFields.shortFilenameField != null) {
r[index] = shortFilename;
index++;
}
// Add Extension
if (additionalOutputFields.extensionField != null) {
r[index] = extension;
index++;
}
// add path
if (additionalOutputFields.pathField != null) {
r[index] = path;
index++;
}
// Add Size
if (additionalOutputFields.sizeField != null) {
r[index] = size;
index++;
}
// add Hidden
if (additionalOutputFields.hiddenField != null) {
r[index] = hidden;
index++;
}
// Add modification date
if (additionalOutputFields.lastModificationField != null) {
r[index] = modificationDateTime;
index++;
}
// Add Uri
if (additionalOutputFields.uriField != null) {
r[index] = uri;
index++;
}
// Add RootUri
if (additionalOutputFields.rootUriField != null) {
r[index] = rooturi;
index++;
}
}
// End if r != null
} catch (Exception e) {
throw new KettleException(BaseMessages.getString(PKG, "TextFileInput.Log.Error.ErrorConvertingLineText"), e);
}
if (r != null && passThruFields != null) {
// Simply add all fields from source files step
for (int i = 0; i < nrPassThruFields; i++) {
r[i] = passThruFields[i];
}
}
return r;
}
use of org.pentaho.di.trans.steps.file.BaseFileField in project pentaho-kettle by pentaho.
the class TextFileInputUtils method convertLineToStrings.
public static final String[] convertLineToStrings(LogChannelInterface log, String line, TextFileInputMeta inf, String delimiter, String enclosure, String escapeCharacters) throws KettleException {
String[] strings = new String[inf.inputFields.length];
int fieldnr;
// piece of line
String pol;
try {
if (line == null) {
return null;
}
if (inf.content.fileType.equalsIgnoreCase("CSV")) {
// Split string in pieces, only for CSV!
fieldnr = 0;
int pos = 0;
int length = line.length();
boolean dencl = false;
int len_encl = (enclosure == null ? 0 : enclosure.length());
int len_esc = (escapeCharacters == null ? 0 : escapeCharacters.length());
while (pos < length) {
int from = pos;
int next;
boolean encl_found;
boolean contains_escaped_enclosures = false;
boolean contains_escaped_separators = false;
boolean contains_escaped_escape = false;
// "aa;aa";123;"aaa-aaa";000;...
if (len_encl > 0 && line.substring(from, from + len_encl).equalsIgnoreCase(enclosure)) {
if (log.isRowLevel()) {
log.logRowlevel(BaseMessages.getString(PKG, "TextFileInput.Log.ConvertLineToRowTitle"), BaseMessages.getString(PKG, "TextFileInput.Log.Encloruse", line.substring(from, from + len_encl)));
}
encl_found = true;
int p = from + len_encl;
boolean is_enclosure = len_encl > 0 && p + len_encl < length && line.substring(p, p + len_encl).equalsIgnoreCase(enclosure);
boolean is_escape = len_esc > 0 && p + len_esc < length && line.substring(p, p + len_esc).equalsIgnoreCase(inf.content.escapeCharacter);
boolean enclosure_after = false;
// Is it really an enclosure? See if it's not repeated twice or escaped!
if ((is_enclosure || is_escape) && p < length - 1) {
String strnext = line.substring(p + len_encl, p + 2 * len_encl);
if (strnext.equalsIgnoreCase(enclosure)) {
p++;
enclosure_after = true;
dencl = true;
// Remember to replace them later on!
if (is_escape) {
contains_escaped_enclosures = true;
}
} else if (strnext.equals(inf.content.escapeCharacter)) {
p++;
// Remember to replace them later on!
if (is_escape) {
// remember
contains_escaped_escape = true;
}
}
}
// Look for a closing enclosure!
while ((!is_enclosure || enclosure_after) && p < line.length()) {
p++;
enclosure_after = false;
is_enclosure = len_encl > 0 && p + len_encl < length && line.substring(p, p + len_encl).equals(enclosure);
is_escape = len_esc > 0 && p + len_esc < length && line.substring(p, p + len_esc).equals(inf.content.escapeCharacter);
// Is it really an enclosure? See if it's not repeated twice or escaped!
if ((is_enclosure || is_escape) && p < length - 1) {
String strnext = line.substring(p + len_encl, p + 2 * len_encl);
if (strnext.equals(enclosure)) {
p++;
enclosure_after = true;
dencl = true;
// Remember to replace them later on!
if (is_escape) {
// remember
contains_escaped_enclosures = true;
}
} else if (strnext.equals(inf.content.escapeCharacter)) {
p++;
// Remember to replace them later on!
if (is_escape) {
// remember
contains_escaped_escape = true;
}
}
}
}
if (p >= length) {
next = p;
} else {
next = p + len_encl;
}
if (log.isRowLevel()) {
log.logRowlevel(BaseMessages.getString(PKG, "TextFileInput.Log.ConvertLineToRowTitle"), BaseMessages.getString(PKG, "TextFileInput.Log.EndOfEnclosure", "" + p));
}
} else {
encl_found = false;
boolean found = false;
int startpoint = from;
// int tries = 1;
do {
next = line.indexOf(delimiter, startpoint);
// See if this position is preceded by an escape character.
if (len_esc > 0 && next - len_esc > 0) {
String before = line.substring(next - len_esc, next);
if (inf.content.escapeCharacter.equals(before)) {
// take the next separator, this one is escaped...
startpoint = next + 1;
// tries++;
contains_escaped_separators = true;
} else {
found = true;
}
} else {
found = true;
}
} while (!found && next >= 0);
}
if (next == -1) {
next = length;
}
if (encl_found && ((from + len_encl) <= (next - len_encl))) {
pol = line.substring(from + len_encl, next - len_encl);
if (log.isRowLevel()) {
log.logRowlevel(BaseMessages.getString(PKG, "TextFileInput.Log.ConvertLineToRowTitle"), BaseMessages.getString(PKG, "TextFileInput.Log.EnclosureFieldFound", "" + pol));
}
} else {
pol = line.substring(from, next);
if (log.isRowLevel()) {
log.logRowlevel(BaseMessages.getString(PKG, "TextFileInput.Log.ConvertLineToRowTitle"), BaseMessages.getString(PKG, "TextFileInput.Log.NormalFieldFound", "" + pol));
}
}
if (dencl && Utils.isEmpty(inf.content.escapeCharacter)) {
StringBuilder sbpol = new StringBuilder(pol);
int idx = sbpol.indexOf(enclosure + enclosure);
while (idx >= 0) {
sbpol.delete(idx, idx + enclosure.length());
idx = sbpol.indexOf(enclosure + enclosure);
}
pol = sbpol.toString();
}
// replace the escaped enclosures with enclosures...
if (contains_escaped_enclosures) {
String replace = inf.content.escapeCharacter + enclosure;
String replaceWith = enclosure;
pol = Const.replace(pol, replace, replaceWith);
}
// replace the escaped separators with separators...
if (contains_escaped_separators) {
String replace = inf.content.escapeCharacter + delimiter;
String replaceWith = delimiter;
pol = Const.replace(pol, replace, replaceWith);
}
// replace the escaped escape with escape...
if (contains_escaped_escape) {
String replace = inf.content.escapeCharacter + inf.content.escapeCharacter;
String replaceWith = inf.content.escapeCharacter;
pol = Const.replace(pol, replace, replaceWith);
}
// Now add pol to the strings found!
try {
strings[fieldnr] = pol;
} catch (ArrayIndexOutOfBoundsException e) {
// In case we didn't allocate enough space.
// This happens when you have less header values specified than there are actual values in the rows.
// As this is "the exception" we catch and resize here.
//
String[] newStrings = new String[strings.length];
for (int x = 0; x < strings.length; x++) {
newStrings[x] = strings[x];
}
strings = newStrings;
}
pos = next + delimiter.length();
fieldnr++;
}
if (pos == length) {
if (log.isRowLevel()) {
log.logRowlevel(BaseMessages.getString(PKG, "TextFileInput.Log.ConvertLineToRowTitle"), BaseMessages.getString(PKG, "TextFileInput.Log.EndOfEmptyLineFound"));
}
if (fieldnr < strings.length) {
strings[fieldnr] = Const.EMPTY_STRING;
}
fieldnr++;
}
} else {
// Fixed file format: Simply get the strings at the required positions...
// Note - charBased is the old default behavior. If this is an old transformation, content.length will be null
// and should be processed as before. If the content.length is equal to "Characters" or there is no specified encoding,
// it will still use the old behavior. The *only* way to get the new behavior is if content.length = "Bytes" and
// the encoding is specified.
// Default to classic behavior
boolean charBased = (inf.content.length == null || inf.content.length.equalsIgnoreCase("Characters") || inf.getEncoding() == null);
for (int i = 0; i < inf.inputFields.length; i++) {
BaseFileField field = inf.inputFields[i];
int length;
int fPos = field.getPosition();
int fLength = field.getLength();
int fPl = fPos + fLength;
if (charBased) {
length = line.length();
if (fPl <= length) {
strings[i] = line.substring(fPos, fPl);
} else {
if (fPos < length) {
strings[i] = line.substring(fPos);
} else {
strings[i] = "";
}
}
} else {
byte[] b = null;
String enc = inf.getEncoding();
b = line.getBytes(enc);
length = b.length;
if (fPl <= length) {
strings[i] = new String(Arrays.copyOfRange(b, fPos, fPl), enc);
} else {
if (fPos < length) {
strings[i] = new String(Arrays.copyOfRange(b, fPos, length - 1), enc);
} else {
strings[i] = "";
}
}
}
}
}
} catch (Exception e) {
throw new KettleException(BaseMessages.getString(PKG, "TextFileInput.Log.Error.ErrorConvertingLine", e.toString()), e);
}
return strings;
}
Aggregations