use of com.google.common.io.CountingInputStream in project alluxio by Alluxio.
the class ObjectUnderFileInputStream method openStream.
/**
* Open a new stream.
*
* @param options for opening a stream
* @throws IOException if a non-Alluxio error occurs
*/
private void openStream(OpenOptions options) throws IOException {
if (mStream != null) {
mStream.close();
}
mInitPos = options.getOffset();
mStream = new CountingInputStream(mUfs.openObject(mKey, options));
}
use of com.google.common.io.CountingInputStream in project beam by apache.
the class CoderProperties method decode.
@VisibleForTesting
static <T> T decode(Coder<T> coder, Coder.Context context, byte[] bytes) throws CoderException, IOException {
@SuppressWarnings("unchecked") Coder<T> deserializedCoder = SerializableUtils.clone(coder);
byte[] buffer;
if (context == Coder.Context.NESTED) {
buffer = new byte[bytes.length + 1];
System.arraycopy(bytes, 0, buffer, 0, bytes.length);
buffer[bytes.length] = 1;
} else {
buffer = bytes;
}
CountingInputStream cis = new CountingInputStream(new ByteArrayInputStream(buffer));
T value = deserializedCoder.decode(new UnownedInputStream(cis), context);
assertThat("consumed bytes equal to encoded bytes", cis.getCount(), equalTo((long) bytes.length));
return value;
}
use of com.google.common.io.CountingInputStream in project jackrabbit-oak by apache.
the class BinaryTextExtractor method parseStringValue0.
private String parseStringValue0(Blob v, Metadata metadata, String path) {
WriteOutContentHandler handler = new WriteOutContentHandler(definition.getMaxExtractLength());
long start = System.currentTimeMillis();
long bytesRead = 0;
long length = v.length();
if (log.isDebugEnabled()) {
log.debug("Extracting {}, {} bytes, id {}", path, length, v.getContentIdentity());
}
String oldThreadName = null;
if (length > SMALL_BINARY) {
Thread t = Thread.currentThread();
oldThreadName = t.getName();
t.setName(oldThreadName + ": Extracting " + path + ", " + length + " bytes");
}
try {
CountingInputStream stream = new CountingInputStream(new LazyInputStream(new BlobByteSource(v)));
try {
getParser().parse(stream, handler, metadata, new ParseContext());
} finally {
bytesRead = stream.getCount();
stream.close();
}
} catch (LinkageError e) {
// Capture and ignore errors caused by extraction libraries
// not being present. This is equivalent to disabling
// selected media types in configuration, so we can simply
// ignore these errors.
} catch (Throwable t) {
// The special STOP exception is used for normal termination.
if (!handler.isWriteLimitReached(t)) {
log.debug("[{}] Failed to extract text from a binary property: {}." + " This is a fairly common case, and nothing to" + " worry about. The stack trace is included to" + " help improve the text extraction feature.", getIndexName(), path, t);
extractedTextCache.put(v, ExtractedText.ERROR);
return TEXT_EXTRACTION_ERROR;
}
} finally {
if (oldThreadName != null) {
Thread.currentThread().setName(oldThreadName);
}
}
String result = handler.toString();
if (bytesRead > 0) {
long time = System.currentTimeMillis() - start;
int len = result.length();
recordTextExtractionStats(time, bytesRead, len);
if (log.isDebugEnabled()) {
log.debug("Extracting {} took {} ms, {} bytes read, {} text size", path, time, bytesRead, len);
}
}
extractedTextCache.put(v, new ExtractedText(ExtractedText.ExtractionResult.SUCCESS, result));
return result;
}
use of com.google.common.io.CountingInputStream in project jackrabbit-oak by apache.
the class TextExtractor method parseStringValue.
//~--------------------------------------< Tika >
private String parseStringValue(ByteSource byteSource, Metadata metadata, String path) {
WriteOutContentHandler handler = new WriteOutContentHandler(maxExtractedLength);
long start = System.currentTimeMillis();
long size = 0;
try {
CountingInputStream stream = new CountingInputStream(new LazyInputStream(byteSource));
try {
tika.getParser().parse(stream, handler, metadata, new ParseContext());
} finally {
size = stream.getCount();
stream.close();
}
} catch (LinkageError e) {
// Capture and ignore errors caused by extraction libraries
// not being present. This is equivalent to disabling
// selected media types in configuration, so we can simply
// ignore these errors.
} catch (Throwable t) {
// The special STOP exception is used for normal termination.
if (!handler.isWriteLimitReached(t)) {
parserErrorCount.incrementAndGet();
parserError.debug("Failed to extract text from a binary property: " + path + " This is a fairly common case, and nothing to" + " worry about. The stack trace is included to" + " help improve the text extraction feature.", t);
return ERROR_TEXT;
}
}
String result = handler.toString();
timeTaken.addAndGet(System.currentTimeMillis() - start);
if (size > 0) {
extractedTextSize.addAndGet(result.length());
extractionCount.incrementAndGet();
totalSizeRead.addAndGet(size);
return result;
}
return null;
}
use of com.google.common.io.CountingInputStream in project jackrabbit-oak by apache.
the class IndexConsistencyChecker method checkBlob.
private void checkBlob(String propName, Blob blob, Tree tree, Result result) {
String id = blob.getContentIdentity();
String blobPath = String.format("%s/%s/%s", tree.getPath(), propName, id);
try {
InputStream is = blob.getNewStream();
CountingInputStream cis = new CountingInputStream(is);
IOUtils.copyLarge(cis, ByteStreams.nullOutputStream());
if (cis.getCount() != blob.length()) {
String msg = String.format("Invalid blob %s. Length mismatch - expected ${%d} -> found ${%d}", blobPath, blob.length(), cis.getCount());
result.invalidBlobIds.add(new FileSizeStatus(blobPath, cis.getCount(), blob.length()));
log.warn("[{}] {}", indexPath, msg);
result.clean = false;
result.blobSizeMismatch = true;
}
result.binaryPropSize += cis.getCount();
} catch (Exception e) {
log.warn("[{}] Error occurred reading blob at {}", indexPath, blobPath, e);
result.missingBlobIds.add(id);
result.clean = false;
result.missingBlobs = true;
}
}
Aggregations