Search in sources :

Example 26 with DataOutputStream

use of java.io.DataOutputStream in project pinot by linkedin.

the class FixedByteWidthRowColDataFileReaderTest method testSingleCol.

@Test
void testSingleCol() throws Exception {
    String fileName = "test_single_col.dat";
    File f = new File(fileName);
    f.delete();
    DataOutputStream dos = new DataOutputStream(new FileOutputStream(f));
    int[] data = new int[100];
    Random r = new Random();
    for (int i = 0; i < data.length; i++) {
        data[i] = r.nextInt();
        dos.writeInt(data[i]);
    }
    dos.flush();
    dos.close();
    RandomAccessFile raf = new RandomAccessFile(f, "rw");
    //    System.out.println("file size: " + raf.getChannel().size());
    raf.close();
    PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(f, ReadMode.heap, FileChannel.MapMode.READ_ONLY, "testing");
    FixedByteSingleValueMultiColReader heapReader = new FixedByteSingleValueMultiColReader(heapBuffer, data.length, 1, new int[] { 4 });
    heapReader.open();
    for (int i = 0; i < data.length; i++) {
        Assert.assertEquals(heapReader.getInt(i, 0), data[i]);
    }
    heapBuffer.close();
    heapReader.close();
    // Not strictly required. Let the tests pass first...then we can remove
    // TODO: remove me
    PinotDataBuffer mmapBuffer = PinotDataBuffer.fromFile(f, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "mmap_testing");
    FixedByteSingleValueMultiColReader mmapReader = new FixedByteSingleValueMultiColReader(mmapBuffer, data.length, 1, new int[] { 4 });
    mmapReader.open();
    for (int i = 0; i < data.length; i++) {
        Assert.assertEquals(mmapReader.getInt(i, 0), data[i]);
    }
    mmapBuffer.close();
    mmapReader.close();
    f.delete();
}
Also used : FixedByteSingleValueMultiColReader(com.linkedin.pinot.core.io.reader.impl.FixedByteSingleValueMultiColReader) Random(java.util.Random) RandomAccessFile(java.io.RandomAccessFile) DataOutputStream(java.io.DataOutputStream) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) FileOutputStream(java.io.FileOutputStream) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) Test(org.testng.annotations.Test)

Example 27 with DataOutputStream

use of java.io.DataOutputStream in project pinot by linkedin.

the class ForwardIndexWriterBenchmark method convertRawToForwardIndex.

public static void convertRawToForwardIndex(File rawFile) throws Exception {
    List<String> lines = IOUtils.readLines(new FileReader(rawFile));
    int totalDocs = lines.size();
    int max = Integer.MIN_VALUE;
    int maxNumberOfMultiValues = Integer.MIN_VALUE;
    int totalNumValues = 0;
    int[][] data = new int[totalDocs][];
    for (int i = 0; i < lines.size(); i++) {
        String line = lines.get(i);
        String[] split = line.split(",");
        totalNumValues = totalNumValues + split.length;
        if (split.length > maxNumberOfMultiValues) {
            maxNumberOfMultiValues = split.length;
        }
        data[i] = new int[split.length];
        for (int j = 0; j < split.length; j++) {
            String token = split[j];
            int val = Integer.parseInt(token);
            data[i][j] = val;
            if (val > max) {
                max = val;
            }
        }
    }
    int maxBitsNeeded = (int) Math.ceil(Math.log(max) / Math.log(2));
    int size = 2048;
    int[] offsets = new int[size];
    int bitMapSize = 0;
    File outputFile = new File("output.mv.fwd");
    FixedBitMultiValueWriter fixedBitSkipListSCMVWriter = new FixedBitMultiValueWriter(outputFile, totalDocs, totalNumValues, maxBitsNeeded);
    for (int i = 0; i < totalDocs; i++) {
        fixedBitSkipListSCMVWriter.setIntArray(i, data[i]);
        if (i % size == size - 1) {
            MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(offsets);
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            DataOutputStream dos = new DataOutputStream(bos);
            rr1.serialize(dos);
            dos.close();
            // System.out.println("Chunk " + i / size + " bitmap size:" + bos.size());
            bitMapSize += bos.size();
        } else if (i == totalDocs - 1) {
            MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(Arrays.copyOf(offsets, i % size));
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            DataOutputStream dos = new DataOutputStream(bos);
            rr1.serialize(dos);
            dos.close();
            // System.out.println("Chunk " + i / size + " bitmap size:" + bos.size());
            bitMapSize += bos.size();
        }
    }
    fixedBitSkipListSCMVWriter.close();
    System.out.println("Output file size:" + outputFile.length());
    System.out.println("totalNumberOfDoc\t\t\t:" + totalDocs);
    System.out.println("totalNumberOfValues\t\t\t:" + totalNumValues);
    System.out.println("chunk size\t\t\t\t:" + size);
    System.out.println("Num chunks\t\t\t\t:" + totalDocs / size);
    int numChunks = totalDocs / size + 1;
    int totalBits = (totalNumValues * maxBitsNeeded);
    int dataSizeinBytes = (totalBits + 7) / 8;
    System.out.println("Raw data size with fixed bit encoding\t:" + dataSizeinBytes);
    System.out.println("\nPer encoding size");
    System.out.println();
    System.out.println("size (offset + length)\t\t\t:" + ((totalDocs * (4 + 4)) + dataSizeinBytes));
    System.out.println();
    System.out.println("size (offset only)\t\t\t:" + ((totalDocs * (4)) + dataSizeinBytes));
    System.out.println();
    System.out.println("bitMapSize\t\t\t\t:" + bitMapSize);
    System.out.println("size (with bitmap)\t\t\t:" + (bitMapSize + (numChunks * 4) + dataSizeinBytes));
    System.out.println();
    System.out.println("Custom Bitset\t\t\t\t:" + (totalNumValues + 7) / 8);
    System.out.println("size (with custom bitset)\t\t\t:" + (((totalNumValues + 7) / 8) + (numChunks * 4) + dataSizeinBytes));
}
Also used : FixedBitMultiValueWriter(com.linkedin.pinot.core.io.writer.impl.v1.FixedBitMultiValueWriter) MutableRoaringBitmap(org.roaringbitmap.buffer.MutableRoaringBitmap) DataOutputStream(java.io.DataOutputStream) FileReader(java.io.FileReader) ByteArrayOutputStream(java.io.ByteArrayOutputStream) File(java.io.File)

Example 28 with DataOutputStream

use of java.io.DataOutputStream in project pinot by linkedin.

the class SpeedTest method init.

static void init() throws Exception {
    // write a temp file
    FileOutputStream fout = new FileOutputStream(FILE_NAME);
    DataOutputStream out = new DataOutputStream(fout);
    heapStorage = new int[SIZE];
    directMemory = ByteBuffer.allocateDirect(SIZE * 4);
    for (int i = 0; i < SIZE; i++) {
        int data = (int) (Math.random() * Integer.MAX_VALUE);
        out.writeInt(data);
        heapStorage[i] = data;
        directMemory.putInt(data);
    }
    out.close();
    RandomAccessFile raf = new RandomAccessFile(FILE_NAME, "rw");
    mmappedByteBuffer = MmapUtils.mmapFile(raf, MapMode.READ_WRITE, 0L, raf.length(), new File(FILE_NAME), " Speed test mmappedByteBuffer");
    mmappedByteBuffer.load();
    int toSelect = (int) (SIZE * readPercentage);
    readIndices = new int[toSelect];
    int ind = 0;
    for (int i = 0; i < SIZE && toSelect > 0; i++) {
        double probOfSelection = toSelect * 1.0 / (SIZE - i);
        double random = Math.random();
        if (random < probOfSelection) {
            readIndices[ind] = i;
            toSelect = toSelect - 1;
            ind = ind + 1;
        }
    }
//System.out.println(Arrays.toString(heapStorage));
//System.out.println(Arrays.toString(readIndices));
}
Also used : RandomAccessFile(java.io.RandomAccessFile) DataOutputStream(java.io.DataOutputStream) FileOutputStream(java.io.FileOutputStream) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File)

Example 29 with DataOutputStream

use of java.io.DataOutputStream in project pinot by linkedin.

the class TopKPhaseMapOutputKey method toBytes.

public byte[] toBytes() throws IOException {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream dos = new DataOutputStream(baos);
    byte[] bytes;
    // dimension name
    bytes = dimensionName.getBytes();
    dos.writeInt(bytes.length);
    dos.write(bytes);
    // dimension value
    bytes = dimensionValue.getBytes();
    dos.writeInt(bytes.length);
    dos.write(bytes);
    baos.close();
    dos.close();
    return baos.toByteArray();
}
Also used : DataOutputStream(java.io.DataOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream)

Example 30 with DataOutputStream

use of java.io.DataOutputStream in project pinot by linkedin.

the class TopKPhaseMapOutputValue method toBytes.

public byte[] toBytes() throws IOException {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream dos = new DataOutputStream(baos);
    // metric values
    dos.writeInt(metricValues.length);
    for (int i = 0; i < metricValues.length; i++) {
        Number number = metricValues[i];
        MetricType metricType = metricTypes.get(i);
        switch(metricType) {
            case SHORT:
                dos.writeShort(number.intValue());
                break;
            case LONG:
                dos.writeLong(number.longValue());
                break;
            case INT:
                dos.writeInt(number.intValue());
                break;
            case FLOAT:
                dos.writeFloat(number.floatValue());
                break;
            case DOUBLE:
                dos.writeDouble(number.doubleValue());
                break;
        }
    }
    baos.close();
    dos.close();
    return baos.toByteArray();
}
Also used : DataOutputStream(java.io.DataOutputStream) MetricType(com.linkedin.thirdeye.hadoop.config.MetricType) ByteArrayOutputStream(java.io.ByteArrayOutputStream)

Aggregations

DataOutputStream (java.io.DataOutputStream)2957 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1309 IOException (java.io.IOException)1019 Test (org.junit.Test)633 DataInputStream (java.io.DataInputStream)611 FileOutputStream (java.io.FileOutputStream)426 ByteArrayInputStream (java.io.ByteArrayInputStream)409 File (java.io.File)279 BufferedOutputStream (java.io.BufferedOutputStream)227 UnitTest (org.apache.geode.test.junit.categories.UnitTest)172 URL (java.net.URL)149 InputStreamReader (java.io.InputStreamReader)144 BufferedReader (java.io.BufferedReader)140 Path (org.apache.hadoop.fs.Path)137 DataInput (java.io.DataInput)124 ArrayList (java.util.ArrayList)122 HttpURLConnection (java.net.HttpURLConnection)121 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)117 FileInputStream (java.io.FileInputStream)107 InputStream (java.io.InputStream)107