use of java.util.zip.GZIPInputStream in project otter by alibaba.
the class GzipCompressor method decompressTo.
public void decompressTo(InputStream in, OutputStream out) throws CompressException {
GZIPInputStream gzipin = null;
try {
gzipin = new GZIPInputStream(in);
NioUtils.copy(gzipin, out);
out.flush();
} catch (Exception e) {
throw new CompressException("gzip_decompress_error", e);
}
}
use of java.util.zip.GZIPInputStream in project rest.li by linkedin.
the class GzipCompressor method inflate.
//Consider changing input param as streams rather than fixed bytes?
@Override
public byte[] inflate(InputStream data) throws CompressionException {
ByteArrayOutputStream out;
GZIPInputStream gzip = null;
try {
out = new ByteArrayOutputStream();
gzip = new GZIPInputStream(data);
IOUtils.copy(gzip, out);
} catch (IOException e) {
throw new CompressionException(CompressionConstants.DECODING_ERROR + getContentEncodingName(), e);
} finally {
if (gzip != null) {
IOUtils.closeQuietly(gzip);
}
}
return out.toByteArray();
}
use of java.util.zip.GZIPInputStream in project languagetool by languagetool-org.
the class FrequencyIndexCreator method indexLinesFromGoogleFile.
private void indexLinesFromGoogleFile(DataWriter writer, File inputFile, long totalBytes, boolean hiveMode) throws IOException {
float progress = (float) bytesProcessed.get() / totalBytes * 100;
System.out.printf("==== Working on " + inputFile + " (%.2f%%) ====\n", progress);
try (InputStream fileStream = new FileInputStream(inputFile);
InputStream gzipStream = new GZIPInputStream(fileStream, BUFFER_SIZE);
Reader decoder = new InputStreamReader(gzipStream, "utf-8");
BufferedReader buffered = new BufferedReader(decoder, BUFFER_SIZE)) {
int i = 0;
long docCount = 0;
long lineCount = 0;
String prevText = null;
long startTime = System.nanoTime() / 1000;
String line;
//noinspection NestedAssignment
while ((line = buffered.readLine()) != null) {
lineCount++;
// To create a smaller index just for testing, comment in this. For there/their
// with the v1 Google ngram data, the index will be 110MB (instead of 3.1GB with all words):
//if (!line.matches(".*\\b([Tt]here|[Tt]heir)\\b.*")) {
// continue;
//}
String[] parts = line.split("\t");
String text = parts[0];
if (IGNORE_POS && isRealPosTag(text)) {
// filtering '_VERB_', 'Italian_ADJ', etc.
continue;
}
if (hiveMode) {
if (parts.length <= 1) {
System.err.println("Could not index: " + line);
continue;
}
String docCountStr = parts[1];
writer.addDoc(text, Long.parseLong(docCountStr));
if (++i % 500_000 == 0) {
printStats(i, inputFile, Long.parseLong(docCountStr), lineCount, text, startTime, totalBytes);
}
} else {
int year = Integer.parseInt(parts[1]);
if (year < MIN_YEAR) {
continue;
}
if (prevText == null || prevText.equals(text)) {
// aggregate years
docCount += Long.parseLong(parts[2]);
} else {
//System.out.println(">"+ prevText + ": " + count);
writer.addDoc(prevText, docCount);
if (++i % 5_000 == 0) {
printStats(i, inputFile, docCount, lineCount, prevText, startTime, totalBytes);
}
docCount = Long.parseLong(parts[2]);
}
}
prevText = text;
}
printStats(i, inputFile, docCount, lineCount, prevText, startTime, totalBytes);
}
writer.addTotalTokenCountDoc(totalTokenCount);
}
use of java.util.zip.GZIPInputStream in project okhttp by square.
the class OkApacheClientTest method gunzip.
private static String gunzip(HttpEntity body) throws IOException {
InputStream in = new GZIPInputStream(body.getContent());
Buffer buffer = new Buffer();
byte[] temp = new byte[1024];
int read;
while ((read = in.read(temp)) != -1) {
buffer.write(temp, 0, read);
}
return buffer.readUtf8();
}
use of java.util.zip.GZIPInputStream in project okhttp by square.
the class URLConnectionTest method testClientConfiguredGzipContentEncodingAndConnectionReuse.
/**
* Test a bug where gzip input streams weren't exhausting the input stream, which corrupted the
* request that followed or prevented connection reuse.
* http://code.google.com/p/android/issues/detail?id=7059
* http://code.google.com/p/android/issues/detail?id=38817
*/
private void testClientConfiguredGzipContentEncodingAndConnectionReuse(TransferKind transferKind, boolean tls) throws Exception {
if (tls) {
SSLSocketFactory socketFactory = sslClient.socketFactory;
RecordingHostnameVerifier hostnameVerifier = new RecordingHostnameVerifier();
server.useHttps(socketFactory, false);
urlFactory.setClient(urlFactory.client().newBuilder().sslSocketFactory(socketFactory, sslClient.trustManager).hostnameVerifier(hostnameVerifier).build());
}
MockResponse responseOne = new MockResponse();
responseOne.addHeader("Content-Encoding: gzip");
transferKind.setBody(responseOne, gzip("one (gzipped)"), 5);
server.enqueue(responseOne);
MockResponse responseTwo = new MockResponse();
transferKind.setBody(responseTwo, "two (identity)", 5);
server.enqueue(responseTwo);
HttpURLConnection connection1 = urlFactory.open(server.url("/").url());
connection1.addRequestProperty("Accept-Encoding", "gzip");
InputStream gunzippedIn = new GZIPInputStream(connection1.getInputStream());
assertEquals("one (gzipped)", readAscii(gunzippedIn, Integer.MAX_VALUE));
assertEquals(0, server.takeRequest().getSequenceNumber());
HttpURLConnection connection2 = urlFactory.open(server.url("/").url());
assertEquals("two (identity)", readAscii(connection2.getInputStream(), Integer.MAX_VALUE));
assertEquals(1, server.takeRequest().getSequenceNumber());
}
Aggregations