use of org.pentaho.di.core.compress.CompressionProvider in project pentaho-kettle by pentaho.
the class TextFileInputDialog method getCSV.
// Get the data layout
private void getCSV() {
TextFileInputMeta meta = new TextFileInputMeta();
getInfo(meta, true);
// CSV without separator defined
if (meta.content.fileType.equalsIgnoreCase("CSV") && (meta.content.separator == null || meta.content.separator.isEmpty())) {
MessageBox mb = new MessageBox(shell, SWT.OK | SWT.ICON_ERROR);
mb.setMessage(BaseMessages.getString(PKG, "TextFileInput.Exception.NoSeparator"));
mb.setText(BaseMessages.getString(PKG, "TextFileInputDialog.DialogTitle"));
mb.open();
return;
}
TextFileInputMeta previousMeta = (TextFileInputMeta) meta.clone();
FileInputList textFileList = meta.getFileInputList(transMeta);
InputStream fileInputStream;
CompressionInputStream inputStream = null;
StringBuilder lineStringBuilder = new StringBuilder(256);
int fileFormatType = meta.getFileFormatTypeNr();
String delimiter = transMeta.environmentSubstitute(meta.content.separator);
String enclosure = transMeta.environmentSubstitute(meta.content.enclosure);
String escapeCharacter = transMeta.environmentSubstitute(meta.content.escapeCharacter);
if (textFileList.nrOfFiles() > 0) {
int clearFields = meta.content.header ? SWT.YES : SWT.NO;
int nrInputFields = meta.inputFields.length;
if (nrInputFields > 0) {
MessageBox mb = new MessageBox(shell, SWT.YES | SWT.NO | SWT.CANCEL | SWT.ICON_QUESTION);
mb.setMessage(BaseMessages.getString(PKG, "TextFileInputDialog.ClearFieldList.DialogMessage"));
mb.setText(BaseMessages.getString(PKG, "TextFileInputDialog.ClearFieldList.DialogTitle"));
clearFields = mb.open();
if (clearFields == SWT.CANCEL) {
return;
}
}
try {
wFields.table.removeAll();
FileObject fileObject = textFileList.getFile(0);
fileInputStream = KettleVFS.getInputStream(fileObject);
Table table = wFields.table;
CompressionProvider provider = CompressionProviderFactory.getInstance().createCompressionProviderInstance(meta.content.fileCompression);
inputStream = provider.createInputStream(fileInputStream);
InputStreamReader reader;
if (meta.getEncoding() != null && meta.getEncoding().length() > 0) {
reader = new InputStreamReader(inputStream, meta.getEncoding());
} else {
reader = new InputStreamReader(inputStream);
}
EncodingType encodingType = EncodingType.guessEncodingType(reader.getEncoding());
// Scan the header-line, determine fields...
String line = TextFileInputUtils.getLine(log, reader, encodingType, fileFormatType, lineStringBuilder);
if (line != null) {
// Estimate the number of input fields...
// Chop up the line using the delimiter
String[] fields = TextFileInputUtils.guessStringsFromLine(transMeta, log, line, meta, delimiter, enclosure, escapeCharacter);
for (int i = 0; i < fields.length; i++) {
String field = fields[i];
if (field == null || field.length() == 0 || !meta.content.header) {
field = "Field" + (i + 1);
} else {
// Trim the field
field = Const.trim(field);
// Replace all spaces & - with underscore _
field = Const.replace(field, " ", "_");
field = Const.replace(field, "-", "_");
}
TableItem item = new TableItem(table, SWT.NONE);
item.setText(1, field);
// The default type is String...
item.setText(2, "String");
}
wFields.setRowNums();
wFields.optWidth(true);
// Copy it...
getInfo(meta, true);
// Sample a few lines to determine the correct type of the fields...
String shellText = BaseMessages.getString(PKG, "TextFileInputDialog.LinesToSample.DialogTitle");
String lineText = BaseMessages.getString(PKG, "TextFileInputDialog.LinesToSample.DialogMessage");
EnterNumberDialog end = new EnterNumberDialog(shell, 100, shellText, lineText);
int samples = end.open();
if (samples >= 0) {
getInfo(meta, true);
TextFileCSVImportProgressDialog pd = new TextFileCSVImportProgressDialog(shell, meta, transMeta, reader, samples, clearFields == SWT.YES);
String message = pd.open();
if (message != null) {
wFields.removeAll();
// OK, what's the result of our search?
getData(meta);
//
if (clearFields == SWT.NO) {
getFieldsData(previousMeta, true);
wFields.table.setSelection(previousMeta.inputFields.length, wFields.table.getItemCount() - 1);
}
wFields.removeEmptyRows();
wFields.setRowNums();
wFields.optWidth(true);
EnterTextDialog etd = new EnterTextDialog(shell, BaseMessages.getString(PKG, "TextFileInputDialog.ScanResults.DialogTitle"), BaseMessages.getString(PKG, "TextFileInputDialog.ScanResults.DialogMessage"), message, true);
etd.setReadOnly();
etd.open();
}
}
} else {
MessageBox mb = new MessageBox(shell, SWT.OK | SWT.ICON_ERROR);
mb.setMessage(BaseMessages.getString(PKG, "TextFileInputDialog.UnableToReadHeaderLine.DialogMessage"));
mb.setText(BaseMessages.getString(PKG, "System.Dialog.Error.Title"));
mb.open();
}
} catch (IOException e) {
new ErrorDialog(shell, BaseMessages.getString(PKG, "TextFileInputDialog.IOError.DialogTitle"), BaseMessages.getString(PKG, "TextFileInputDialog.IOError.DialogMessage"), e);
} catch (KettleException e) {
new ErrorDialog(shell, BaseMessages.getString(PKG, "System.Dialog.Error.Title"), BaseMessages.getString(PKG, "TextFileInputDialog.ErrorGettingFileDesc.DialogMessage"), e);
} finally {
try {
if (inputStream != null) {
inputStream.close();
}
} catch (Exception e) {
// Ignore errors
}
}
} else {
MessageBox mb = new MessageBox(shell, SWT.OK | SWT.ICON_ERROR);
mb.setMessage(BaseMessages.getString(PKG, "TextFileInputDialog.NoValidFileFound.DialogMessage"));
mb.setText(BaseMessages.getString(PKG, "System.Dialog.Error.Title"));
mb.open();
}
}
use of org.pentaho.di.core.compress.CompressionProvider in project pentaho-kettle by pentaho.
the class TextFileInput method openNextFile.
private boolean openNextFile() {
try {
lineNumberInFile = 0;
if (!closeLastFile() && failAfterBadFile(null)) {
// (!meta.isSkipBadFiles() || data.isLastFile) ) return false;
return false;
}
if (data.getFiles().nrOfFiles() == 0) {
return false;
}
// Is this the last file?
data.isLastFile = (data.filenr == data.getFiles().nrOfFiles() - 1);
data.file = data.getFiles().getFile(data.filenr);
data.filename = KettleVFS.getFilename(data.file);
// Move file pointer ahead!
data.filenr++;
// Add additional fields?
if (data.addShortFilename) {
data.shortFilename = data.file.getName().getBaseName();
}
if (data.addPath) {
data.path = KettleVFS.getFilename(data.file.getParent());
}
if (data.addIsHidden) {
data.hidden = data.file.isHidden();
}
if (data.addExtension) {
data.extension = data.file.getName().getExtension();
}
if (data.addLastModificationDate) {
data.lastModificationDateTime = new Date(data.file.getContent().getLastModifiedTime());
}
if (data.addUri) {
data.uriName = Const.optionallyDecodeUriString(data.file.getName().getURI());
}
if (data.addRootUri) {
data.rootUriName = data.file.getName().getRootURI();
}
if (data.addSize) {
data.size = new Long(data.file.getContent().getSize());
}
data.lineInFile = 0;
if (meta.isPassingThruFields()) {
data.currentPassThruFieldsRow = data.passThruFields.get(data.file);
}
//
if (meta.isAddResultFile()) {
ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_GENERAL, data.file, getTransMeta().getName(), toString());
resultFile.setComment("File was read by an Text File input step");
addResultFile(resultFile);
}
if (log.isBasic()) {
logBasic("Opening file: " + data.file.getName().getFriendlyURI());
}
CompressionProvider provider = CompressionProviderFactory.getInstance().getCompressionProviderByName(meta.getFileCompression());
data.in = provider.createInputStream(KettleVFS.getInputStream(data.file));
data.dataErrorLineHandler.handleFile(data.file);
data.in.nextEntry();
if (log.isDetailed()) {
logDetailed("This is a compressed file being handled by the " + provider.getName() + " provider");
}
if (meta.getEncoding() != null && meta.getEncoding().length() > 0) {
data.isr = new InputStreamReader(new BufferedInputStream(data.in, BUFFER_SIZE_INPUT_STREAM), meta.getEncoding());
} else {
data.isr = new InputStreamReader(new BufferedInputStream(data.in, BUFFER_SIZE_INPUT_STREAM));
}
String encoding = data.isr.getEncoding();
data.encodingType = EncodingType.guessEncodingType(encoding);
// /////////////////////////////////////////////////////////////////////////////
// Read the first lines...
/*
* Keep track of the status of the file: are there any lines left to read?
*/
data.doneReading = false;
/*
* OK, read a number of lines in the buffer: The header rows The nr rows in the page : optional The footer rows
*/
int bufferSize = 1;
bufferSize += meta.hasHeader() ? meta.getNrHeaderLines() : 0;
bufferSize += meta.isLayoutPaged() ? meta.getNrLinesPerPage() * (Math.max(0, meta.getNrWraps()) + 1) : // it helps when we have wrapped input w/o header
Math.max(0, meta.getNrWraps());
bufferSize += meta.hasFooter() ? meta.getNrFooterLines() : 0;
// See if we need to skip the document header lines...
if (meta.isLayoutPaged()) {
for (int i = 0; i < meta.getNrLinesDocHeader(); i++) {
// Just skip these...
// header and
getLine(log, data.isr, data.encodingType, data.fileFormatType, data.lineStringBuilder);
// footer: not
// wrapped
lineNumberInFile++;
}
}
for (int i = 0; i < bufferSize && !data.doneReading; i++) {
boolean wasNotFiltered = tryToReadLine(!meta.hasHeader() || i >= meta.getNrHeaderLines());
if (!wasNotFiltered) {
// grab another line, this one got filtered
bufferSize++;
}
}
// Reset counters etc.
data.headerLinesRead = 0;
data.footerLinesRead = 0;
data.pageLinesRead = 0;
// Set a flags
data.doneWithHeader = !meta.hasHeader();
} catch (Exception e) {
String errorMsg = "Couldn't open file #" + data.filenr + " : " + data.file.getName().getFriendlyURI() + " --> " + e.toString();
logError(errorMsg);
if (failAfterBadFile(errorMsg)) {
// !meta.isSkipBadFiles()) stopAll();
stopAll();
}
setErrors(getErrors() + 1);
return false;
}
return true;
}
use of org.pentaho.di.core.compress.CompressionProvider in project pentaho-kettle by pentaho.
the class TextFileOutput method getCompressionProvider.
private CompressionProvider getCompressionProvider() throws KettleException {
String compressionType = Const.NVL(meta.getFileCompression(), FILE_COMPRESSION_TYPE_NONE);
CompressionProvider compressionProvider = CompressionProviderFactory.getInstance().getCompressionProviderByName(compressionType);
if (compressionProvider == null) {
throw new KettleException("No compression provider found with name = " + compressionType);
}
if (!compressionProvider.supportsOutput()) {
throw new KettleException("Compression provider " + compressionType + " does not support output streams!");
}
return compressionProvider;
}
use of org.pentaho.di.core.compress.CompressionProvider in project pentaho-kettle by pentaho.
the class ZIPCompressionOutputStreamTest method setUp.
@Before
public void setUp() throws Exception {
factory = CompressionProviderFactory.getInstance();
CompressionProvider provider = factory.getCompressionProviderByName(PROVIDER_NAME);
internalStream = new ByteArrayOutputStream();
outStream = new ZIPCompressionOutputStream(internalStream, provider);
}
use of org.pentaho.di.core.compress.CompressionProvider in project pentaho-kettle by pentaho.
the class ZIPCompressionInputStreamTest method testRead.
@Test
public void testRead() throws IOException {
CompressionProvider provider = inStream.getCompressionProvider();
ByteArrayInputStream in = new ByteArrayInputStream("Test".getBytes());
inStream = new ZIPCompressionInputStream(in, provider) {
};
inStream.read(new byte[100], 0, inStream.available());
}
Aggregations