Search in sources :

Example 6 with ByteSink

use of com.google.common.io.ByteSink in project druid by druid-io.

the class CompressionUtilsTest method testGoodGzipWithException.

@Test
public void testGoodGzipWithException() throws Exception {
    final AtomicLong flushes = new AtomicLong(0);
    final File tmpDir = temporaryFolder.newFolder("testGoodGzipByteSource");
    final File gzFile = new File(tmpDir, testFile.getName() + ".gz");
    Assert.assertFalse(gzFile.exists());
    CompressionUtils.gzip(Files.asByteSource(testFile), new ByteSink() {

        @Override
        public OutputStream openStream() throws IOException {
            return new FilterOutputStream(new FileOutputStream(gzFile)) {

                @Override
                public void flush() throws IOException {
                    if (flushes.getAndIncrement() > 0) {
                        super.flush();
                    } else {
                        throw new IOException("Haven't flushed enough");
                    }
                }
            };
        }
    }, Predicates.<Throwable>alwaysTrue());
    Assert.assertTrue(gzFile.exists());
    try (final InputStream inputStream = CompressionUtils.gzipInputStream(new FileInputStream(gzFile))) {
        assertGoodDataStream(inputStream);
    }
    if (!testFile.delete()) {
        throw new IOException(String.format("Unable to delete file [%s]", testFile.getAbsolutePath()));
    }
    Assert.assertFalse(testFile.exists());
    CompressionUtils.gunzip(Files.asByteSource(gzFile), testFile);
    Assert.assertTrue(testFile.exists());
    try (final InputStream inputStream = new FileInputStream(testFile)) {
        assertGoodDataStream(inputStream);
    }
    // 2 for suppressed closes, 2 for manual calls to shake out errors
    Assert.assertEquals(4, flushes.get());
}
Also used : AtomicLong(java.util.concurrent.atomic.AtomicLong) ByteSink(com.google.common.io.ByteSink) GZIPInputStream(java.util.zip.GZIPInputStream) FilterInputStream(java.io.FilterInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) FilterOutputStream(java.io.FilterOutputStream) GZIPOutputStream(java.util.zip.GZIPOutputStream) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) FilterOutputStream(java.io.FilterOutputStream) File(java.io.File) FileInputStream(java.io.FileInputStream) Test(org.junit.Test)

Example 7 with ByteSink

use of com.google.common.io.ByteSink in project druid by druid-io.

the class CompressedFloatsSerdeTest method testWithValues.

public void testWithValues(float[] values) throws Exception {
    FloatSupplierSerializer serializer = CompressionFactory.getFloatSerializer(new IOPeonForTesting(), "test", order, compressionStrategy);
    serializer.open();
    for (float value : values) {
        serializer.add(value);
    }
    Assert.assertEquals(values.length, serializer.size());
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    serializer.closeAndConsolidate(new ByteSink() {

        @Override
        public OutputStream openStream() throws IOException {
            return baos;
        }
    });
    Assert.assertEquals(baos.size(), serializer.getSerializedSize());
    CompressedFloatsIndexedSupplier supplier = CompressedFloatsIndexedSupplier.fromByteBuffer(ByteBuffer.wrap(baos.toByteArray()), order, null);
    IndexedFloats floats = supplier.get();
    assertIndexMatchesVals(floats, values);
    for (int i = 0; i < 10; i++) {
        int a = (int) (Math.random() * values.length);
        int b = (int) (Math.random() * values.length);
        int start = a < b ? a : b;
        int end = a < b ? b : a;
        tryFill(floats, values, start, end - start);
    }
    testSupplierSerde(supplier, values);
    testConcurrentThreadReads(supplier, floats, values);
    floats.close();
}
Also used : ByteSink(com.google.common.io.ByteSink) OutputStream(java.io.OutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException)

Example 8 with ByteSink

use of com.google.common.io.ByteSink in project kotlin by JetBrains.

the class TypoLookup method writeDatabase.

/** See the {@link #readData(LintClient,File,File)} for documentation on the data format. */
private static void writeDatabase(File file, List<String> lines) throws IOException {
    /*
         * 1. A file header, which is the exact contents of {@link FILE_HEADER} encoded
         *     as ASCII characters. The purpose of the header is to identify what the file
         *     is for, for anyone attempting to open the file.
         * 2. A file version number. If the binary file does not match the reader's expected
         *     version, it can ignore it (and regenerate the cache from XML).
         */
    // Drop comments etc
    List<String> words = new ArrayList<String>(lines.size());
    for (String line : lines) {
        if (!line.isEmpty() && Character.isLetter(line.charAt(0))) {
            int end = line.indexOf(WORD_SEPARATOR);
            if (end == -1) {
                end = line.trim().length();
            }
            String typo = line.substring(0, end).trim();
            String replacements = line.substring(end + WORD_SEPARATOR.length()).trim();
            if (replacements.isEmpty()) {
                // We don't support empty replacements
                continue;
            }
            String combined = typo + (char) 0 + replacements;
            words.add(combined);
        }
    }
    byte[][] wordArrays = new byte[words.size()][];
    for (int i = 0, n = words.size(); i < n; i++) {
        String word = words.get(i);
        wordArrays[i] = word.getBytes(Charsets.UTF_8);
    }
    // Sort words, using our own comparator to ensure that it matches the
    // binary search in getTypos()
    Comparator<byte[]> comparator = new Comparator<byte[]>() {

        @Override
        public int compare(byte[] o1, byte[] o2) {
            return TypoLookup.compare(o1, 0, (byte) 0, o2, 0, o2.length);
        }
    };
    Arrays.sort(wordArrays, comparator);
    byte[] headerBytes = FILE_HEADER.getBytes(Charsets.US_ASCII);
    int entryCount = wordArrays.length;
    int capacity = entryCount * BYTES_PER_ENTRY + headerBytes.length + 5;
    ByteBuffer buffer = ByteBuffer.allocate(capacity);
    buffer.order(ByteOrder.BIG_ENDIAN);
    //  1. A file header, which is the exact contents of {@link FILE_HEADER} encoded
    //      as ASCII characters. The purpose of the header is to identify what the file
    //      is for, for anyone attempting to open the file.
    buffer.put(headerBytes);
    //  2. A file version number. If the binary file does not match the reader's expected
    //      version, it can ignore it (and regenerate the cache from XML).
    buffer.put((byte) BINARY_FORMAT_VERSION);
    //  3. The number of words [1 int]
    buffer.putInt(entryCount);
    //  4. Word offset table (one integer per word, pointing to the byte offset in the
    //       file (relative to the beginning of the file) where each word begins.
    //       The words are always sorted alphabetically.
    int wordOffsetTable = buffer.position();
    // as we're writing out the data structures below
    for (int i = 0, n = entryCount; i < n; i++) {
        buffer.putInt(0);
    }
    int nextEntry = buffer.position();
    int nextOffset = wordOffsetTable;
    //      may be empty), or a final 0.
    for (byte[] word : wordArrays) {
        buffer.position(nextOffset);
        buffer.putInt(nextEntry);
        nextOffset = buffer.position();
        buffer.position(nextEntry);
        // already embeds 0 to separate typo from words
        buffer.put(word);
        buffer.put((byte) 0);
        nextEntry = buffer.position();
    }
    int size = buffer.position();
    assert size <= buffer.limit();
    buffer.mark();
    if (WRITE_STATS) {
        System.out.println("Wrote " + words.size() + " word entries");
        System.out.print("Actual binary size: " + size + " bytes");
        System.out.println(String.format(" (%.1fM)", size / (1024 * 1024.f)));
        System.out.println("Allocated size: " + (entryCount * BYTES_PER_ENTRY) + " bytes");
        System.out.println("Required bytes per entry: " + (size / entryCount) + " bytes");
    }
    // Now dump this out as a file
    // There's probably an API to do this more efficiently; TODO: Look into this.
    byte[] b = new byte[size];
    buffer.rewind();
    buffer.get(b);
    ByteSink sink = Files.asByteSink(file);
    sink.write(b);
}
Also used : ByteSink(com.google.common.io.ByteSink) ArrayList(java.util.ArrayList) ByteBuffer(java.nio.ByteBuffer) MappedByteBuffer(java.nio.MappedByteBuffer) Comparator(java.util.Comparator)

Example 9 with ByteSink

use of com.google.common.io.ByteSink in project kotlin by JetBrains.

the class ApiLookup method writeDatabase.

/** See the {@link #readData(LintClient,File,File)} for documentation on the data format. */
private static void writeDatabase(File file, Api info) throws IOException {
    Map<String, ApiClass> classMap = info.getClasses();
    List<ApiPackage> packages = Lists.newArrayList(info.getPackages().values());
    Collections.sort(packages);
    // Compute members of each class that must be included in the database; we can
    // skip those that have the same since-level as the containing class. And we
    // also need to keep those entries that are marked deprecated.
    int estimatedSize = 0;
    for (ApiPackage pkg : packages) {
        // offset entry
        estimatedSize += 4;
        // package entry
        estimatedSize += pkg.getName().length() + 20;
        if (assertionsEnabled() && !isRelevantOwner(pkg.getName() + "/") && !pkg.getName().startsWith("android/support")) {
            System.out.println("Warning: isRelevantOwner fails for " + pkg.getName() + "/");
        }
        for (ApiClass apiClass : pkg.getClasses()) {
            // offset entry
            estimatedSize += 4;
            // class entry
            estimatedSize += apiClass.getName().length() + 20;
            Set<String> allMethods = apiClass.getAllMethods(info);
            Set<String> allFields = apiClass.getAllFields(info);
            // Strip out all members that have been supported since version 1.
            // This makes the database *much* leaner (down from about 4M to about
            // 1.7M), and this just fills the table with entries that ultimately
            // don't help the API checker since it just needs to know if something
            // requires a version *higher* than the minimum. If in the future the
            // database needs to answer queries about whether a method is public
            // or not, then we'd need to put this data back in.
            int clsSince = apiClass.getSince();
            List<String> members = new ArrayList<String>(allMethods.size() + allFields.size());
            for (String member : allMethods) {
                if (apiClass.getMethod(member, info) != clsSince || apiClass.getMemberDeprecatedIn(member, info) > 0) {
                    members.add(member);
                }
            }
            for (String member : allFields) {
                if (apiClass.getField(member, info) != clsSince || apiClass.getMemberDeprecatedIn(member, info) > 0) {
                    members.add(member);
                }
            }
            estimatedSize += 2 + 4 * (apiClass.getInterfaces().size());
            if (apiClass.getSuperClasses().size() > 1) {
                estimatedSize += 2 + 4 * (apiClass.getSuperClasses().size());
            }
            // Only include classes that have one or more members requiring version 2 or higher:
            Collections.sort(members);
            apiClass.members = members;
            for (String member : members) {
                estimatedSize += member.length();
                estimatedSize += 16;
            }
        }
        // Ensure the classes are sorted
        Collections.sort(pkg.getClasses());
    }
    // Write header
    ByteBuffer buffer = ByteBuffer.allocate(estimatedSize);
    buffer.order(ByteOrder.BIG_ENDIAN);
    buffer.put(FILE_HEADER.getBytes(Charsets.US_ASCII));
    buffer.put((byte) BINARY_FORMAT_VERSION);
    int indexCountOffset = buffer.position();
    int indexCount = 0;
    // placeholder
    buffer.putInt(0);
    // Write the number of packages in the package index
    buffer.putInt(packages.size());
    // Write package index
    int newIndex = buffer.position();
    for (ApiPackage pkg : packages) {
        pkg.indexOffset = newIndex;
        newIndex += 4;
        indexCount++;
    }
    // Write class index
    for (ApiPackage pkg : packages) {
        for (ApiClass cls : pkg.getClasses()) {
            cls.indexOffset = newIndex;
            cls.index = indexCount;
            newIndex += 4;
            indexCount++;
        }
    }
    // Write member indices
    for (ApiPackage pkg : packages) {
        for (ApiClass cls : pkg.getClasses()) {
            if (cls.members != null && !cls.members.isEmpty()) {
                cls.memberOffsetBegin = newIndex;
                cls.memberIndexStart = indexCount;
                for (String ignored : cls.members) {
                    newIndex += 4;
                    indexCount++;
                }
                cls.memberOffsetEnd = newIndex;
                cls.memberIndexLength = indexCount - cls.memberIndexStart;
            } else {
                cls.memberOffsetBegin = -1;
                cls.memberOffsetEnd = -1;
                cls.memberIndexStart = -1;
                cls.memberIndexLength = 0;
            }
        }
    }
    // Fill in the earlier index count
    buffer.position(indexCountOffset);
    buffer.putInt(indexCount);
    buffer.position(newIndex);
    // Write member entries
    for (ApiPackage pkg : packages) {
        for (ApiClass apiClass : pkg.getClasses()) {
            String clz = apiClass.getName();
            int index = apiClass.memberOffsetBegin;
            for (String member : apiClass.members) {
                // Update member offset to point to this entry
                int start = buffer.position();
                buffer.position(index);
                buffer.putInt(start);
                index = buffer.position();
                buffer.position(start);
                int since;
                if (member.indexOf('(') != -1) {
                    since = apiClass.getMethod(member, info);
                } else {
                    since = apiClass.getField(member, info);
                }
                if (since == Integer.MAX_VALUE) {
                    assert false : clz + ':' + member;
                    since = 1;
                }
                int deprecatedIn = apiClass.getMemberDeprecatedIn(member, info);
                if (deprecatedIn != 0) {
                    assert deprecatedIn != -1 : deprecatedIn + " for " + member;
                }
                byte[] signature = member.getBytes(Charsets.UTF_8);
                for (byte b : signature) {
                    // Make sure all signatures are really just simple ASCII
                    assert b == (b & 0x7f) : member;
                    buffer.put(b);
                    // Skip types on methods
                    if (b == (byte) ')') {
                        break;
                    }
                }
                buffer.put((byte) 0);
                int api = since;
                assert api == UnsignedBytes.toInt((byte) api);
                // max that fits in a byte
                assert api >= 1 && api < 0xFF;
                boolean isDeprecated = deprecatedIn > 0;
                if (isDeprecated) {
                    api |= HAS_DEPRECATION_BYTE_FLAG;
                }
                buffer.put((byte) api);
                if (isDeprecated) {
                    assert deprecatedIn == UnsignedBytes.toInt((byte) deprecatedIn);
                    buffer.put((byte) deprecatedIn);
                }
            }
            assert index == apiClass.memberOffsetEnd : apiClass.memberOffsetEnd;
        }
    }
    // are likely to look at entries near each other.)
    for (ApiPackage pkg : packages) {
        List<ApiClass> classes = pkg.getClasses();
        for (ApiClass cls : classes) {
            int index = buffer.position();
            buffer.position(cls.indexOffset);
            buffer.putInt(index);
            buffer.position(index);
            String name = cls.getSimpleName();
            byte[] nameBytes = name.getBytes(Charsets.UTF_8);
            assert nameBytes.length < 254 : name;
            // 2: terminating 0, and this byte itself
            buffer.put((byte) (nameBytes.length + 2));
            buffer.put(nameBytes);
            buffer.put((byte) 0);
            // 3 bytes for beginning, 2 bytes for *length*
            put3ByteInt(buffer, cls.memberIndexStart);
            put2ByteInt(buffer, cls.memberIndexLength);
            ApiClass apiClass = classMap.get(cls.getName());
            assert apiClass != null : cls.getName();
            int since = apiClass.getSince();
            // make sure it fits
            assert since == UnsignedBytes.toInt((byte) since) : since;
            int deprecatedIn = apiClass.getDeprecatedIn();
            boolean isDeprecated = deprecatedIn > 0;
            // The first byte is deprecated in
            if (isDeprecated) {
                since |= HAS_DEPRECATION_BYTE_FLAG;
                // make sure it fits
                assert since == UnsignedBytes.toInt((byte) since) : since;
            }
            buffer.put((byte) since);
            if (isDeprecated) {
                assert deprecatedIn == UnsignedBytes.toInt((byte) deprecatedIn) : deprecatedIn;
                buffer.put((byte) deprecatedIn);
            }
            List<Pair<String, Integer>> interfaces = apiClass.getInterfaces();
            int count = 0;
            if (interfaces != null && !interfaces.isEmpty()) {
                for (Pair<String, Integer> pair : interfaces) {
                    int api = pair.getSecond();
                    if (api > apiClass.getSince()) {
                        count++;
                    }
                }
            }
            List<Pair<String, Integer>> supers = apiClass.getSuperClasses();
            if (supers != null && !supers.isEmpty()) {
                for (Pair<String, Integer> pair : supers) {
                    int api = pair.getSecond();
                    if (api > apiClass.getSince()) {
                        count++;
                    }
                }
            }
            buffer.put((byte) count);
            if (count > 0) {
                if (supers != null) {
                    for (Pair<String, Integer> pair : supers) {
                        int api = pair.getSecond();
                        if (api > apiClass.getSince()) {
                            ApiClass superClass = classMap.get(pair.getFirst());
                            assert superClass != null : cls;
                            put3ByteInt(buffer, superClass.index);
                            buffer.put((byte) api);
                        }
                    }
                }
                if (interfaces != null) {
                    for (Pair<String, Integer> pair : interfaces) {
                        int api = pair.getSecond();
                        if (api > apiClass.getSince()) {
                            ApiClass interfaceClass = classMap.get(pair.getFirst());
                            assert interfaceClass != null : cls;
                            put3ByteInt(buffer, interfaceClass.index);
                            buffer.put((byte) api);
                        }
                    }
                }
            }
        }
    }
    for (ApiPackage pkg : packages) {
        int index = buffer.position();
        buffer.position(pkg.indexOffset);
        buffer.putInt(index);
        buffer.position(index);
        byte[] bytes = pkg.getName().getBytes(Charsets.UTF_8);
        buffer.put(bytes);
        buffer.put((byte) 0);
        List<ApiClass> classes = pkg.getClasses();
        if (classes.isEmpty()) {
            put3ByteInt(buffer, 0);
            put2ByteInt(buffer, 0);
        } else {
            // 3 bytes for beginning, 2 bytes for *length*
            int firstClassIndex = classes.get(0).index;
            int classCount = classes.get(classes.size() - 1).index - firstClassIndex + 1;
            put3ByteInt(buffer, firstClassIndex);
            put2ByteInt(buffer, classCount);
        }
    }
    int size = buffer.position();
    assert size <= buffer.limit();
    buffer.mark();
    if (WRITE_STATS) {
        System.out.print("Actual binary size: " + size + " bytes");
        System.out.println(String.format(" (%.1fM)", size / (1024 * 1024.f)));
    }
    // Now dump this out as a file
    // There's probably an API to do this more efficiently; TODO: Look into this.
    byte[] b = new byte[size];
    buffer.rewind();
    buffer.get(b);
    if (file.exists()) {
        boolean deleted = file.delete();
        assert deleted : file;
    }
    ByteSink sink = Files.asByteSink(file);
    sink.write(b);
}
Also used : ArrayList(java.util.ArrayList) ByteBuffer(java.nio.ByteBuffer) ByteSink(com.google.common.io.ByteSink) Pair(com.android.utils.Pair)

Aggregations

ByteSink (com.google.common.io.ByteSink)9 IOException (java.io.IOException)7 ByteArrayOutputStream (java.io.ByteArrayOutputStream)6 OutputStream (java.io.OutputStream)5 File (java.io.File)4 FileOutputStream (java.io.FileOutputStream)4 CompressedObjectStrategy (io.druid.segment.data.CompressedObjectStrategy)3 TmpFileIOPeon (io.druid.segment.data.TmpFileIOPeon)3 FileInputStream (java.io.FileInputStream)3 FileChannel (java.nio.channels.FileChannel)3 ArrayList (java.util.ArrayList)3 Map (java.util.Map)3 BenchmarkColumnSchema (io.druid.benchmark.datagen.BenchmarkColumnSchema)2 BenchmarkColumnValueGenerator (io.druid.benchmark.datagen.BenchmarkColumnValueGenerator)2 CompressionFactory (io.druid.segment.data.CompressionFactory)2 LongSupplierSerializer (io.druid.segment.data.LongSupplierSerializer)2 BufferedReader (java.io.BufferedReader)2 BufferedWriter (java.io.BufferedWriter)2 ByteArrayInputStream (java.io.ByteArrayInputStream)2 FilterOutputStream (java.io.FilterOutputStream)2