use of java.io.PushbackInputStream in project freeplane by freeplane.
the class StdXMLReader method stream2reader.
/**
* Converts a stream to a reader while detecting the encoding.
*
* @param stream
* the input for the XML data.
* @param charsRead
* buffer where to put characters that have been read
* @throws java.io.IOException
* if an I/O error occurred
*/
protected Reader stream2reader(final InputStream stream, final StringBuilder charsRead) throws IOException {
final PushbackInputStream pbstream = new PushbackInputStream(stream);
int b = pbstream.read();
switch(b) {
case 0x00:
case 0xFE:
case 0xFF:
pbstream.unread(b);
return new InputStreamReader(pbstream, "UTF-16");
case 0xEF:
for (int i = 0; i < 2; i++) {
pbstream.read();
}
return new InputStreamReader(pbstream, "UTF-8");
case 0x3C:
b = pbstream.read();
charsRead.append('<');
while ((b > 0) && (b != 0x3E)) {
charsRead.append((char) b);
b = pbstream.read();
}
if (b > 0) {
charsRead.append((char) b);
}
final String encoding = this.getEncoding(charsRead.toString());
if (encoding == null) {
return new InputStreamReader(pbstream, "UTF-8");
}
charsRead.setLength(0);
try {
return new InputStreamReader(pbstream, encoding);
} catch (final UnsupportedEncodingException e) {
return new InputStreamReader(pbstream, "UTF-8");
}
default:
charsRead.append((char) b);
return new InputStreamReader(pbstream, "UTF-8");
}
}
use of java.io.PushbackInputStream in project elki by elki-project.
the class FileUtil method tryGzipInput.
/**
* Try to open a stream as gzip, if it starts with the gzip magic.
*
* TODO: move to utils package.
*
* @param in original input stream
* @return old input stream or a {@link GZIPInputStream} if appropriate.
* @throws IOException on IO error
*/
public static InputStream tryGzipInput(InputStream in) throws IOException {
// try autodetecting gzip compression.
if (!in.markSupported()) {
PushbackInputStream pb = new PushbackInputStream(in, 16);
in = pb;
// read a magic from the file header
byte[] magic = { 0, 0 };
pb.read(magic);
pb.unread(magic);
if (magic[0] == 31 && magic[1] == -117) {
return new GZIPInputStream(pb);
}
return in;
}
// Mark is supported.
in.mark(16);
boolean isgzip = (in.read() == 31 && in.read() == -117);
// Rewind
in.reset();
if (isgzip) {
in = new GZIPInputStream(in);
}
return in;
}
use of java.io.PushbackInputStream in project data-prep by Talend.
the class XlsSchemaParser method parseAllSheetsOldFormat.
/**
* Parse all xls sheets for old excel document type
*
* @param request the xls request.
* @return The parsed sheets request.
*/
private List<Schema.SheetContent> parseAllSheetsOldFormat(Request request) {
final Marker marker = Markers.dataset(request.getMetadata().getId());
try {
InputStream inputStream = request.getContent();
if (!inputStream.markSupported()) {
inputStream = new PushbackInputStream(inputStream, 8);
}
Workbook hssfWorkbook = WorkbookFactory.create(inputStream);
List<Schema.SheetContent> schemas;
try {
if (hssfWorkbook == null) {
throw new IOException("could not open " + request.getMetadata().getId() + " as an excel file");
}
int sheetNumber = hssfWorkbook.getNumberOfSheets();
if (sheetNumber < 1) {
LOGGER.debug(marker, "has not sheet to read");
return Collections.emptyList();
}
schemas = new ArrayList<>();
for (int i = 0; i < sheetNumber; i++) {
Sheet sheet = hssfWorkbook.getSheetAt(i);
if (sheet.getLastRowNum() < 1) {
LOGGER.debug(marker, "sheet '{}' do not have rows skip ip", sheet.getSheetName());
continue;
}
List<ColumnMetadata> columnsMetadata = parsePerSheet(//
sheet, //
request.getMetadata().getId(), hssfWorkbook.getCreationHelper().createFormulaEvaluator());
String sheetName = sheet.getSheetName();
// update XlsSerializer if this default sheet naming change!!!
schemas.add(new Schema.SheetContent(sheetName == null ? "sheet-" + i : sheetName, columnsMetadata));
}
} finally {
hssfWorkbook.close();
}
return schemas;
} catch (Exception e) {
LOGGER.debug(marker, "Exception during parsing xls request :" + e.getMessage(), e);
throw new TDPException(CommonErrorCodes.UNEXPECTED_EXCEPTION, e);
}
}
use of java.io.PushbackInputStream in project data-prep by Talend.
the class XlsSchemaParser method parseAllSheets.
/**
* Parse all xls sheets.
*
* @param request the schema parser request.
* @return the list of parsed xls sheet.
* @throws IOException if an error occurs.
*/
protected List<Schema.SheetContent> parseAllSheets(Request request) throws IOException {
InputStream inputStream = request.getContent();
if (!inputStream.markSupported()) {
inputStream = new PushbackInputStream(inputStream, 8);
}
boolean newExcelFormat = XlsUtils.isNewExcelFormat(inputStream);
// parse the xls input stream using the correct format
if (newExcelFormat) {
return parseAllSheetsStream(new Request(inputStream, request.getMetadata()));
} else {
return parseAllSheetsOldFormat(new Request(inputStream, request.getMetadata()));
}
}
use of java.io.PushbackInputStream in project spf4j by zolyfarkas.
the class Converter method load.
@SuppressFBWarnings("NP_LOAD_OF_KNOWN_NULL_VALUE")
public static SampleNode load(final File file) throws IOException {
try (MemorizingBufferedInputStream bis = new MemorizingBufferedInputStream(Files.newInputStream(file.toPath()))) {
final PushbackInputStream pis = new PushbackInputStream(bis);
final SpecificDatumReader<ASample> reader = new SpecificDatumReader<>(ASample.SCHEMA$);
final BinaryDecoder decoder = DecoderFactory.get().directBinaryDecoder(pis, null);
return convert(new Iterator<ASample>() {
@Override
public boolean hasNext() {
try {
int read = pis.read();
pis.unread(read);
return read >= 0;
} catch (IOException ex) {
throw new UncheckedIOException(ex);
}
}
@Override
@SuppressFBWarnings
public ASample next() {
try {
return reader.read(null, decoder);
} catch (IOException ex) {
NoSuchElementException e = new NoSuchElementException();
e.addSuppressed(ex);
throw e;
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
});
}
}
Aggregations