use of org.pentaho.di.trans.steps.file.BaseFileField in project pentaho-kettle by pentaho.
the class TextFileInputTest method readInputWithMissedValues.
@Test
public void readInputWithMissedValues() throws Exception {
final String virtualFile = createVirtualFile("pdi-14172.txt", "1,1,1\n", "2,,2\n");
BaseFileField field2 = field("col2");
field2.setRepeated(true);
TextFileInputMeta meta = createMetaObject(field("col1"), field2, field("col3"));
TextFileInputData data = createDataObject(virtualFile, ",", "col1", "col2", "col3");
TextFileInput input = StepMockUtil.getStep(TextFileInput.class, TextFileInputMeta.class, "test");
List<Object[]> output = TransTestingUtil.execute(input, meta, data, 2, false);
TransTestingUtil.assertResult(new Object[] { "1", "1", "1" }, output.get(0));
TransTestingUtil.assertResult(new Object[] { "2", "1", "2" }, output.get(1));
deleteVfsFile(virtualFile);
}
use of org.pentaho.di.trans.steps.file.BaseFileField in project pentaho-metaverse by pentaho.
the class ExternalResourceStepAnalyzerTest method testGetInputFieldsToIgnore.
@Test
public void testGetInputFieldsToIgnore() {
doReturn(true).when(analyzer).isInput();
// setup input fields
RowMetaInterface inputFieldRowMeta = mock(RowMetaInterface.class);
List<ValueMetaInterface> inputFields = new ArrayList<>();
inputFields.add(new ValueMeta("in_field_1"));
Map<String, RowMetaInterface> inputFieldRowMetaMap = new HashMap<>();
inputFieldRowMetaMap.put(ExternalResourceStepAnalyzer.RESOURCE, inputFieldRowMeta);
when(inputFieldRowMeta.getValueMetaList()).thenReturn(inputFields);
// setup output fields
List<ValueMetaInterface> outputFields = new ArrayList<>();
outputFields.addAll(inputFields);
outputFields.add(new ValueMeta("file_field_1"));
outputFields.add(new ValueMeta("additional_field"));
outputFields.add(new ValueMeta("file_field_2"));
RowMetaInterface outputFieldsRowMeta = mock(RowMetaInterface.class);
when(outputFieldsRowMeta.getValueMetaList()).thenReturn(outputFields);
// setup step "resource" fields
final BaseFileField[] resourceFields = new BaseFileField[2];
resourceFields[0] = new BaseFileField("file_field_1", 0, 0);
resourceFields[1] = new BaseFileField("file_field_2", 0, 0);
doReturn(resourceFields).when(fileMeta).getInputFields();
Set<String> fieldsToIgnore = analyzer.getInputFieldsToIgnore(fileMeta, inputFieldRowMetaMap, outputFieldsRowMeta);
assertEquals(2, fieldsToIgnore.size());
assertTrue(fieldsToIgnore.contains("in_field_1"));
assertTrue(fieldsToIgnore.contains("additional_field"));
}
use of org.pentaho.di.trans.steps.file.BaseFileField in project pentaho-metaverse by pentaho.
the class ExternalResourceStepAnalyzer method getInputFieldsToIgnore.
/**
* Returns a {@lnk Set} of field names that are to be ignored when fetching "resource" fields. This set will
* typically include any field that is either provided by a previous step, or is some "additional" field.
*/
protected Set<String> getInputFieldsToIgnore(final T meta, final Map<String, RowMetaInterface> inputRows, final RowMetaInterface rowMeta) {
Set<String> inputFieldsToIgnore = new HashSet<>();
for (RowMetaInterface rowMetaInterface : inputRows.values()) {
for (ValueMetaInterface inputField : rowMetaInterface.getValueMetaList()) {
inputFieldsToIgnore.add(inputField.getName());
}
}
if (meta instanceof BaseFileInputMeta) {
final BaseFileInputMeta fileMeta = (BaseFileInputMeta) meta;
final BaseFileField[] inputFields = fileMeta.getInputFields();
final List<String> inputFieldNames = new ArrayList<>();
for (final BaseFileField inputField : inputFields) {
inputFieldNames.add(inputField.getName());
}
// get the fields within rowMeta - any field tha ISN'T a meta input field, should be ignored
final List<ValueMetaInterface> rowMetaValueMetaList = rowMeta.getValueMetaList();
for (final ValueMetaInterface rowMetaValueMeta : rowMetaValueMetaList) {
if (!inputFieldNames.contains(rowMetaValueMeta.getName())) {
inputFieldsToIgnore.add(rowMetaValueMeta.getName());
}
}
}
return inputFieldsToIgnore;
}
use of org.pentaho.di.trans.steps.file.BaseFileField in project pentaho-kettle by pentaho.
the class TextFileInputMeta method loadXML.
@Override
public void loadXML(Node stepnode, List<DatabaseMeta> databases, IMetaStore metaStore) throws KettleXMLException {
try {
inputFiles.acceptingFilenames = YES.equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "accept_filenames"));
inputFiles.passingThruFields = YES.equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "passing_through_fields"));
inputFiles.acceptingField = XMLHandler.getTagValue(stepnode, "accept_field");
inputFiles.acceptingStepName = XMLHandler.getTagValue(stepnode, "accept_stepname");
content.separator = XMLHandler.getTagValue(stepnode, "separator");
content.enclosure = XMLHandler.getTagValue(stepnode, "enclosure");
content.breakInEnclosureAllowed = YES.equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "enclosure_breaks"));
content.escapeCharacter = XMLHandler.getTagValue(stepnode, "escapechar");
content.header = YES.equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "header"));
content.nrHeaderLines = Const.toInt(XMLHandler.getTagValue(stepnode, "nr_headerlines"), 1);
content.footer = YES.equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "footer"));
content.nrFooterLines = Const.toInt(XMLHandler.getTagValue(stepnode, "nr_footerlines"), 1);
content.lineWrapped = YES.equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "line_wrapped"));
content.nrWraps = Const.toInt(XMLHandler.getTagValue(stepnode, "nr_wraps"), 1);
content.layoutPaged = YES.equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "layout_paged"));
content.nrLinesPerPage = Const.toInt(XMLHandler.getTagValue(stepnode, "nr_lines_per_page"), 1);
content.nrLinesDocHeader = Const.toInt(XMLHandler.getTagValue(stepnode, "nr_lines_doc_header"), 1);
String addToResult = XMLHandler.getTagValue(stepnode, "add_to_result_filenames");
if (Utils.isEmpty(addToResult)) {
inputFiles.isaddresult = true;
} else {
inputFiles.isaddresult = "Y".equalsIgnoreCase(addToResult);
}
String nempty = XMLHandler.getTagValue(stepnode, "noempty");
content.noEmptyLines = YES.equalsIgnoreCase(nempty) || nempty == null;
content.includeFilename = YES.equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "include"));
content.filenameField = XMLHandler.getTagValue(stepnode, "include_field");
content.includeRowNumber = YES.equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "rownum"));
content.rowNumberByFile = YES.equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "rownumByFile"));
content.rowNumberField = XMLHandler.getTagValue(stepnode, "rownum_field");
content.fileFormat = XMLHandler.getTagValue(stepnode, "format");
content.encoding = XMLHandler.getTagValue(stepnode, "encoding");
content.length = XMLHandler.getTagValue(stepnode, "length");
Node filenode = XMLHandler.getSubNode(stepnode, "file");
Node fields = XMLHandler.getSubNode(stepnode, "fields");
Node filtersNode = XMLHandler.getSubNode(stepnode, "filters");
int nrfiles = XMLHandler.countNodes(filenode, "name");
int nrfields = XMLHandler.countNodes(fields, "field");
int nrfilters = XMLHandler.countNodes(filtersNode, "filter");
allocate(nrfiles, nrfields, nrfilters);
for (int i = 0; i < nrfiles; i++) {
Node filenamenode = XMLHandler.getSubNodeByNr(filenode, "name", i);
Node filemasknode = XMLHandler.getSubNodeByNr(filenode, "filemask", i);
Node excludefilemasknode = XMLHandler.getSubNodeByNr(filenode, "exclude_filemask", i);
Node fileRequirednode = XMLHandler.getSubNodeByNr(filenode, "file_required", i);
Node includeSubFoldersnode = XMLHandler.getSubNodeByNr(filenode, "include_subfolders", i);
inputFiles.fileName[i] = loadSource(filenode, filenamenode, i, metaStore);
inputFiles.fileMask[i] = XMLHandler.getNodeValue(filemasknode);
inputFiles.excludeFileMask[i] = XMLHandler.getNodeValue(excludefilemasknode);
inputFiles.fileRequired[i] = XMLHandler.getNodeValue(fileRequirednode);
inputFiles.includeSubFolders[i] = XMLHandler.getNodeValue(includeSubFoldersnode);
}
content.fileType = XMLHandler.getTagValue(stepnode, "file", "type");
content.fileCompression = XMLHandler.getTagValue(stepnode, "file", "compression");
if (content.fileCompression == null) {
content.fileCompression = "None";
if (YES.equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "file", "zipped"))) {
content.fileCompression = "Zip";
}
}
// Backward compatibility : just one filter
if (XMLHandler.getTagValue(stepnode, "filter") != null) {
filter = new TextFileFilter[1];
filter[0] = new TextFileFilter();
filter[0].setFilterPosition(Const.toInt(XMLHandler.getTagValue(stepnode, "filter_position"), -1));
filter[0].setFilterString(XMLHandler.getTagValue(stepnode, "filter_string"));
filter[0].setFilterLastLine(YES.equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "filter_is_last_line")));
filter[0].setFilterPositive(YES.equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "filter_is_positive")));
} else {
for (int i = 0; i < nrfilters; i++) {
Node fnode = XMLHandler.getSubNodeByNr(filtersNode, "filter", i);
filter[i] = new TextFileFilter();
filter[i].setFilterPosition(Const.toInt(XMLHandler.getTagValue(fnode, "filter_position"), -1));
String filterString = XMLHandler.getTagValue(fnode, "filter_string");
if (filterString != null && filterString.startsWith(STRING_BASE64_PREFIX)) {
filter[i].setFilterString(new String(Base64.decodeBase64(filterString.substring(STRING_BASE64_PREFIX.length()).getBytes())));
} else {
filter[i].setFilterString(filterString);
}
filter[i].setFilterLastLine(YES.equalsIgnoreCase(XMLHandler.getTagValue(fnode, "filter_is_last_line")));
filter[i].setFilterPositive(YES.equalsIgnoreCase(XMLHandler.getTagValue(fnode, "filter_is_positive")));
}
}
for (int i = 0; i < nrfields; i++) {
Node fnode = XMLHandler.getSubNodeByNr(fields, "field", i);
BaseFileField field = new BaseFileField();
field.setName(XMLHandler.getTagValue(fnode, "name"));
field.setType(ValueMetaFactory.getIdForValueMeta(XMLHandler.getTagValue(fnode, "type")));
field.setFormat(XMLHandler.getTagValue(fnode, "format"));
field.setCurrencySymbol(XMLHandler.getTagValue(fnode, "currency"));
field.setDecimalSymbol(XMLHandler.getTagValue(fnode, "decimal"));
field.setGroupSymbol(XMLHandler.getTagValue(fnode, "group"));
field.setNullString(XMLHandler.getTagValue(fnode, "nullif"));
field.setIfNullValue(XMLHandler.getTagValue(fnode, "ifnull"));
field.setPosition(Const.toInt(XMLHandler.getTagValue(fnode, "position"), -1));
field.setLength(Const.toInt(XMLHandler.getTagValue(fnode, "length"), -1));
field.setPrecision(Const.toInt(XMLHandler.getTagValue(fnode, "precision"), -1));
field.setTrimType(ValueMetaString.getTrimTypeByCode(XMLHandler.getTagValue(fnode, "trim_type")));
field.setRepeated(YES.equalsIgnoreCase(XMLHandler.getTagValue(fnode, "repeat")));
inputFields[i] = field;
}
// Is there a limit on the number of rows we process?
content.rowLimit = Const.toLong(XMLHandler.getTagValue(stepnode, "limit"), 0L);
errorHandling.errorIgnored = YES.equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "error_ignored"));
errorHandling.skipBadFiles = YES.equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "skip_bad_files"));
errorHandling.fileErrorField = XMLHandler.getTagValue(stepnode, "file_error_field");
errorHandling.fileErrorMessageField = XMLHandler.getTagValue(stepnode, "file_error_message_field");
errorLineSkipped = YES.equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "error_line_skipped"));
errorCountField = XMLHandler.getTagValue(stepnode, "error_count_field");
errorFieldsField = XMLHandler.getTagValue(stepnode, "error_fields_field");
errorTextField = XMLHandler.getTagValue(stepnode, "error_text_field");
errorHandling.warningFilesDestinationDirectory = XMLHandler.getTagValue(stepnode, "bad_line_files_destination_directory");
errorHandling.warningFilesExtension = XMLHandler.getTagValue(stepnode, "bad_line_files_extension");
errorHandling.errorFilesDestinationDirectory = XMLHandler.getTagValue(stepnode, "error_line_files_destination_directory");
errorHandling.errorFilesExtension = XMLHandler.getTagValue(stepnode, "error_line_files_extension");
errorHandling.lineNumberFilesDestinationDirectory = XMLHandler.getTagValue(stepnode, "line_number_files_destination_directory");
errorHandling.lineNumberFilesExtension = XMLHandler.getTagValue(stepnode, "line_number_files_extension");
// Backward compatible
content.dateFormatLenient = !NO.equalsIgnoreCase(XMLHandler.getTagValue(stepnode, "date_format_lenient"));
String dateLocale = XMLHandler.getTagValue(stepnode, "date_format_locale");
if (dateLocale != null) {
content.dateFormatLocale = EnvUtil.createLocale(dateLocale);
} else {
content.dateFormatLocale = Locale.getDefault();
}
additionalOutputFields.shortFilenameField = XMLHandler.getTagValue(stepnode, "shortFileFieldName");
additionalOutputFields.pathField = XMLHandler.getTagValue(stepnode, "pathFieldName");
additionalOutputFields.hiddenField = XMLHandler.getTagValue(stepnode, "hiddenFieldName");
additionalOutputFields.lastModificationField = XMLHandler.getTagValue(stepnode, "lastModificationTimeFieldName");
additionalOutputFields.uriField = XMLHandler.getTagValue(stepnode, "uriNameFieldName");
additionalOutputFields.rootUriField = XMLHandler.getTagValue(stepnode, "rootUriNameFieldName");
additionalOutputFields.extensionField = XMLHandler.getTagValue(stepnode, "extensionFieldName");
additionalOutputFields.sizeField = XMLHandler.getTagValue(stepnode, "sizeFieldName");
} catch (Exception e) {
throw new KettleXMLException("Unable to load step info from XML", e);
}
}
use of org.pentaho.di.trans.steps.file.BaseFileField in project pentaho-kettle by pentaho.
the class TextFileInputMeta method readRep.
@Override
public void readRep(Repository rep, IMetaStore metaStore, ObjectId id_step, List<DatabaseMeta> databases) throws KettleException {
try {
inputFiles.acceptingFilenames = rep.getStepAttributeBoolean(id_step, "accept_filenames");
inputFiles.passingThruFields = rep.getStepAttributeBoolean(id_step, "passing_through_fields");
inputFiles.acceptingField = rep.getStepAttributeString(id_step, "accept_field");
inputFiles.acceptingStepName = rep.getStepAttributeString(id_step, "accept_stepname");
content.separator = rep.getStepAttributeString(id_step, "separator");
content.enclosure = rep.getStepAttributeString(id_step, "enclosure");
content.breakInEnclosureAllowed = rep.getStepAttributeBoolean(id_step, "enclosure_breaks");
content.escapeCharacter = rep.getStepAttributeString(id_step, "escapechar");
content.header = rep.getStepAttributeBoolean(id_step, "header");
content.nrHeaderLines = (int) rep.getStepAttributeInteger(id_step, "nr_headerlines");
content.footer = rep.getStepAttributeBoolean(id_step, "footer");
content.nrFooterLines = (int) rep.getStepAttributeInteger(id_step, "nr_footerlines");
content.lineWrapped = rep.getStepAttributeBoolean(id_step, "line_wrapped");
content.nrWraps = (int) rep.getStepAttributeInteger(id_step, "nr_wraps");
content.layoutPaged = rep.getStepAttributeBoolean(id_step, "layout_paged");
content.nrLinesPerPage = (int) rep.getStepAttributeInteger(id_step, "nr_lines_per_page");
content.nrLinesDocHeader = (int) rep.getStepAttributeInteger(id_step, "nr_lines_doc_header");
content.noEmptyLines = rep.getStepAttributeBoolean(id_step, "noempty");
content.includeFilename = rep.getStepAttributeBoolean(id_step, "include");
content.filenameField = rep.getStepAttributeString(id_step, "include_field");
content.includeRowNumber = rep.getStepAttributeBoolean(id_step, "rownum");
content.rowNumberByFile = rep.getStepAttributeBoolean(id_step, "rownumByFile");
content.rowNumberField = rep.getStepAttributeString(id_step, "rownum_field");
content.fileFormat = rep.getStepAttributeString(id_step, "format");
content.encoding = rep.getStepAttributeString(id_step, "encoding");
content.length = rep.getStepAttributeString(id_step, "length");
String addToResult = rep.getStepAttributeString(id_step, "add_to_result_filenames");
if (Utils.isEmpty(addToResult)) {
inputFiles.isaddresult = true;
} else {
inputFiles.isaddresult = rep.getStepAttributeBoolean(id_step, "add_to_result_filenames");
}
content.rowLimit = rep.getStepAttributeInteger(id_step, "limit");
int nrfiles = rep.countNrStepAttributes(id_step, "file_name");
int nrfields = rep.countNrStepAttributes(id_step, "field_name");
int nrfilters = rep.countNrStepAttributes(id_step, "filter_string");
allocate(nrfiles, nrfields, nrfilters);
for (int i = 0; i < nrfiles; i++) {
inputFiles.fileName[i] = loadSourceRep(rep, id_step, i, metaStore);
inputFiles.fileMask[i] = rep.getStepAttributeString(id_step, i, "file_mask");
inputFiles.excludeFileMask[i] = rep.getStepAttributeString(id_step, i, "exclude_file_mask");
inputFiles.fileRequired[i] = rep.getStepAttributeString(id_step, i, "file_required");
if (!YES.equalsIgnoreCase(inputFiles.fileRequired[i])) {
inputFiles.fileRequired[i] = NO;
}
inputFiles.includeSubFolders[i] = rep.getStepAttributeString(id_step, i, "include_subfolders");
if (!YES.equalsIgnoreCase(inputFiles.includeSubFolders[i])) {
inputFiles.includeSubFolders[i] = NO;
}
}
content.fileType = rep.getStepAttributeString(id_step, "file_type");
content.fileCompression = rep.getStepAttributeString(id_step, "compression");
if (content.fileCompression == null) {
content.fileCompression = "None";
if (rep.getStepAttributeBoolean(id_step, "file_zipped")) {
content.fileCompression = "Zip";
}
}
for (int i = 0; i < nrfilters; i++) {
filter[i] = new TextFileFilter();
filter[i].setFilterPosition((int) rep.getStepAttributeInteger(id_step, i, "filter_position"));
filter[i].setFilterString(rep.getStepAttributeString(id_step, i, "filter_string"));
filter[i].setFilterLastLine(rep.getStepAttributeBoolean(id_step, i, "filter_is_last_line"));
filter[i].setFilterPositive(rep.getStepAttributeBoolean(id_step, i, "filter_is_positive"));
}
for (int i = 0; i < nrfields; i++) {
BaseFileField field = new BaseFileField();
field.setName(rep.getStepAttributeString(id_step, i, "field_name"));
field.setType(ValueMetaFactory.getIdForValueMeta(rep.getStepAttributeString(id_step, i, "field_type")));
field.setFormat(rep.getStepAttributeString(id_step, i, "field_format"));
field.setCurrencySymbol(rep.getStepAttributeString(id_step, i, "field_currency"));
field.setDecimalSymbol(rep.getStepAttributeString(id_step, i, "field_decimal"));
field.setGroupSymbol(rep.getStepAttributeString(id_step, i, "field_group"));
field.setNullString(rep.getStepAttributeString(id_step, i, "field_nullif"));
field.setIfNullValue(rep.getStepAttributeString(id_step, i, "field_ifnull"));
field.setPosition((int) rep.getStepAttributeInteger(id_step, i, "field_position"));
field.setLength((int) rep.getStepAttributeInteger(id_step, i, "field_length"));
field.setPrecision((int) rep.getStepAttributeInteger(id_step, i, "field_precision"));
field.setTrimType(ValueMetaString.getTrimTypeByCode(rep.getStepAttributeString(id_step, i, "field_trim_type")));
field.setRepeated(rep.getStepAttributeBoolean(id_step, i, "field_repeat"));
inputFields[i] = field;
}
errorHandling.errorIgnored = rep.getStepAttributeBoolean(id_step, "error_ignored");
errorHandling.skipBadFiles = rep.getStepAttributeBoolean(id_step, "skip_bad_files");
errorHandling.fileErrorField = rep.getStepAttributeString(id_step, "file_error_field");
errorHandling.fileErrorMessageField = rep.getStepAttributeString(id_step, "file_error_message_field");
errorLineSkipped = rep.getStepAttributeBoolean(id_step, "error_line_skipped");
errorCountField = rep.getStepAttributeString(id_step, "error_count_field");
errorFieldsField = rep.getStepAttributeString(id_step, "error_fields_field");
errorTextField = rep.getStepAttributeString(id_step, "error_text_field");
errorHandling.warningFilesDestinationDirectory = rep.getStepAttributeString(id_step, "bad_line_files_dest_dir");
errorHandling.warningFilesExtension = rep.getStepAttributeString(id_step, "bad_line_files_ext");
errorHandling.errorFilesDestinationDirectory = rep.getStepAttributeString(id_step, "error_line_files_dest_dir");
errorHandling.errorFilesExtension = rep.getStepAttributeString(id_step, "error_line_files_ext");
errorHandling.lineNumberFilesDestinationDirectory = rep.getStepAttributeString(id_step, "line_number_files_dest_dir");
errorHandling.lineNumberFilesExtension = rep.getStepAttributeString(id_step, "line_number_files_ext");
content.dateFormatLenient = rep.getStepAttributeBoolean(id_step, 0, "date_format_lenient", true);
String dateLocale = rep.getStepAttributeString(id_step, 0, "date_format_locale");
if (dateLocale != null) {
content.dateFormatLocale = EnvUtil.createLocale(dateLocale);
} else {
content.dateFormatLocale = Locale.getDefault();
}
additionalOutputFields.shortFilenameField = rep.getStepAttributeString(id_step, "shortFileFieldName");
additionalOutputFields.pathField = rep.getStepAttributeString(id_step, "pathFieldName");
additionalOutputFields.hiddenField = rep.getStepAttributeString(id_step, "hiddenFieldName");
additionalOutputFields.lastModificationField = rep.getStepAttributeString(id_step, "lastModificationTimeFieldName");
additionalOutputFields.uriField = rep.getStepAttributeString(id_step, "uriNameFieldName");
additionalOutputFields.rootUriField = rep.getStepAttributeString(id_step, "rootUriNameFieldName");
additionalOutputFields.extensionField = rep.getStepAttributeString(id_step, "extensionFieldName");
additionalOutputFields.sizeField = rep.getStepAttributeString(id_step, "sizeFieldName");
} catch (Exception e) {
throw new KettleException("Unexpected error reading step information from the repository", e);
}
}
Aggregations