use of org.pentaho.di.trans.steps.fileinput.text.BufferedInputStreamReader in project pentaho-kettle by pentaho.
the class TextFileInputDialog method getFirst.
// Get the first x lines
private List<String> getFirst(int nrlines, boolean skipHeaders) throws KettleException {
TextFileInputMeta meta = new TextFileInputMeta();
getInfo(meta, true);
FileInputList textFileList = meta.getFileInputList(transMeta);
InputStream fi;
CompressionInputStream f = null;
StringBuilder lineStringBuilder = new StringBuilder(256);
int fileFormatType = meta.getFileFormatTypeNr();
List<String> retval = new ArrayList<>();
if (textFileList.nrOfFiles() > 0) {
FileObject file = textFileList.getFile(0);
try {
fi = KettleVFS.getInputStream(file);
CompressionProvider provider = CompressionProviderFactory.getInstance().createCompressionProviderInstance(meta.content.fileCompression);
f = provider.createInputStream(fi);
BufferedInputStreamReader reader;
if (meta.getEncoding() != null && meta.getEncoding().length() > 0) {
reader = new BufferedInputStreamReader(new InputStreamReader(f, meta.getEncoding()));
} else {
reader = new BufferedInputStreamReader(new InputStreamReader(f));
}
EncodingType encodingType = EncodingType.guessEncodingType(reader.getEncoding());
int linenr = 0;
int maxnr = nrlines + (meta.content.header ? meta.content.nrHeaderLines : 0);
if (skipHeaders) {
// Skip the header lines first if more then one, it helps us position
if (meta.content.layoutPaged && meta.content.nrLinesDocHeader > 0) {
TextFileInputUtils.skipLines(log, reader, encodingType, fileFormatType, lineStringBuilder, meta.content.nrLinesDocHeader - 1, meta.getEnclosure(), meta.getEscapeCharacter(), 0);
}
// Skip the header lines first if more then one, it helps us position
if (meta.content.header && meta.content.nrHeaderLines > 0) {
TextFileInputUtils.skipLines(log, reader, encodingType, fileFormatType, lineStringBuilder, meta.content.nrHeaderLines - 1, meta.getEnclosure(), meta.getEscapeCharacter(), 0);
}
}
String line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineStringBuilder, meta.getEnclosure(), meta.getEscapeCharacter());
while (line != null && (linenr < maxnr || nrlines == 0)) {
retval.add(line);
linenr++;
line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineStringBuilder, meta.getEnclosure(), meta.getEscapeCharacter());
}
} catch (Exception e) {
throw new KettleException(BaseMessages.getString(PKG, "TextFileInputDialog.Exception.ErrorGettingFirstLines", "" + nrlines, file.getName().getURI()), e);
} finally {
try {
if (f != null) {
f.close();
}
} catch (Exception e) {
// Ignore errors
}
}
}
return retval;
}
use of org.pentaho.di.trans.steps.fileinput.text.BufferedInputStreamReader in project pentaho-kettle by pentaho.
the class CsvInputAwareStepDialog method getFieldNames.
/**
* @See {@link GetFieldsCapableStepDialog#getFieldNames(org.pentaho.di.trans.step.BaseStepMeta)}
*/
default String[] getFieldNames(final CsvInputAwareMeta meta) {
String[] fieldNames = new String[] {};
final InputStream inputStream = getInputStream(meta);
final BufferedInputStreamReader reader = getBufferedReader(meta, inputStream);
try {
fieldNames = getFieldNamesImpl(reader, meta);
} catch (final KettleException e) {
logError(BaseMessages.getString("Dialog.ErrorGettingFields.Message"), e);
} finally {
try {
inputStream.close();
} catch (Exception e) {
// Ignore close errors
}
}
return fieldNames;
}
Aggregations