use of java.io.DataOutputStream in project pinot by linkedin.
the class FixedByteWidthRowColDataFileReaderTest method testSingleCol.
@Test
void testSingleCol() throws Exception {
String fileName = "test_single_col.dat";
File f = new File(fileName);
f.delete();
DataOutputStream dos = new DataOutputStream(new FileOutputStream(f));
int[] data = new int[100];
Random r = new Random();
for (int i = 0; i < data.length; i++) {
data[i] = r.nextInt();
dos.writeInt(data[i]);
}
dos.flush();
dos.close();
RandomAccessFile raf = new RandomAccessFile(f, "rw");
// System.out.println("file size: " + raf.getChannel().size());
raf.close();
PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(f, ReadMode.heap, FileChannel.MapMode.READ_ONLY, "testing");
FixedByteSingleValueMultiColReader heapReader = new FixedByteSingleValueMultiColReader(heapBuffer, data.length, 1, new int[] { 4 });
heapReader.open();
for (int i = 0; i < data.length; i++) {
Assert.assertEquals(heapReader.getInt(i, 0), data[i]);
}
heapBuffer.close();
heapReader.close();
// Not strictly required. Let the tests pass first...then we can remove
// TODO: remove me
PinotDataBuffer mmapBuffer = PinotDataBuffer.fromFile(f, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "mmap_testing");
FixedByteSingleValueMultiColReader mmapReader = new FixedByteSingleValueMultiColReader(mmapBuffer, data.length, 1, new int[] { 4 });
mmapReader.open();
for (int i = 0; i < data.length; i++) {
Assert.assertEquals(mmapReader.getInt(i, 0), data[i]);
}
mmapBuffer.close();
mmapReader.close();
f.delete();
}
use of java.io.DataOutputStream in project pinot by linkedin.
the class ForwardIndexWriterBenchmark method convertRawToForwardIndex.
public static void convertRawToForwardIndex(File rawFile) throws Exception {
List<String> lines = IOUtils.readLines(new FileReader(rawFile));
int totalDocs = lines.size();
int max = Integer.MIN_VALUE;
int maxNumberOfMultiValues = Integer.MIN_VALUE;
int totalNumValues = 0;
int[][] data = new int[totalDocs][];
for (int i = 0; i < lines.size(); i++) {
String line = lines.get(i);
String[] split = line.split(",");
totalNumValues = totalNumValues + split.length;
if (split.length > maxNumberOfMultiValues) {
maxNumberOfMultiValues = split.length;
}
data[i] = new int[split.length];
for (int j = 0; j < split.length; j++) {
String token = split[j];
int val = Integer.parseInt(token);
data[i][j] = val;
if (val > max) {
max = val;
}
}
}
int maxBitsNeeded = (int) Math.ceil(Math.log(max) / Math.log(2));
int size = 2048;
int[] offsets = new int[size];
int bitMapSize = 0;
File outputFile = new File("output.mv.fwd");
FixedBitMultiValueWriter fixedBitSkipListSCMVWriter = new FixedBitMultiValueWriter(outputFile, totalDocs, totalNumValues, maxBitsNeeded);
for (int i = 0; i < totalDocs; i++) {
fixedBitSkipListSCMVWriter.setIntArray(i, data[i]);
if (i % size == size - 1) {
MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(offsets);
ByteArrayOutputStream bos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(bos);
rr1.serialize(dos);
dos.close();
// System.out.println("Chunk " + i / size + " bitmap size:" + bos.size());
bitMapSize += bos.size();
} else if (i == totalDocs - 1) {
MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(Arrays.copyOf(offsets, i % size));
ByteArrayOutputStream bos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(bos);
rr1.serialize(dos);
dos.close();
// System.out.println("Chunk " + i / size + " bitmap size:" + bos.size());
bitMapSize += bos.size();
}
}
fixedBitSkipListSCMVWriter.close();
System.out.println("Output file size:" + outputFile.length());
System.out.println("totalNumberOfDoc\t\t\t:" + totalDocs);
System.out.println("totalNumberOfValues\t\t\t:" + totalNumValues);
System.out.println("chunk size\t\t\t\t:" + size);
System.out.println("Num chunks\t\t\t\t:" + totalDocs / size);
int numChunks = totalDocs / size + 1;
int totalBits = (totalNumValues * maxBitsNeeded);
int dataSizeinBytes = (totalBits + 7) / 8;
System.out.println("Raw data size with fixed bit encoding\t:" + dataSizeinBytes);
System.out.println("\nPer encoding size");
System.out.println();
System.out.println("size (offset + length)\t\t\t:" + ((totalDocs * (4 + 4)) + dataSizeinBytes));
System.out.println();
System.out.println("size (offset only)\t\t\t:" + ((totalDocs * (4)) + dataSizeinBytes));
System.out.println();
System.out.println("bitMapSize\t\t\t\t:" + bitMapSize);
System.out.println("size (with bitmap)\t\t\t:" + (bitMapSize + (numChunks * 4) + dataSizeinBytes));
System.out.println();
System.out.println("Custom Bitset\t\t\t\t:" + (totalNumValues + 7) / 8);
System.out.println("size (with custom bitset)\t\t\t:" + (((totalNumValues + 7) / 8) + (numChunks * 4) + dataSizeinBytes));
}
use of java.io.DataOutputStream in project pinot by linkedin.
the class SpeedTest method init.
static void init() throws Exception {
// write a temp file
FileOutputStream fout = new FileOutputStream(FILE_NAME);
DataOutputStream out = new DataOutputStream(fout);
heapStorage = new int[SIZE];
directMemory = ByteBuffer.allocateDirect(SIZE * 4);
for (int i = 0; i < SIZE; i++) {
int data = (int) (Math.random() * Integer.MAX_VALUE);
out.writeInt(data);
heapStorage[i] = data;
directMemory.putInt(data);
}
out.close();
RandomAccessFile raf = new RandomAccessFile(FILE_NAME, "rw");
mmappedByteBuffer = MmapUtils.mmapFile(raf, MapMode.READ_WRITE, 0L, raf.length(), new File(FILE_NAME), " Speed test mmappedByteBuffer");
mmappedByteBuffer.load();
int toSelect = (int) (SIZE * readPercentage);
readIndices = new int[toSelect];
int ind = 0;
for (int i = 0; i < SIZE && toSelect > 0; i++) {
double probOfSelection = toSelect * 1.0 / (SIZE - i);
double random = Math.random();
if (random < probOfSelection) {
readIndices[ind] = i;
toSelect = toSelect - 1;
ind = ind + 1;
}
}
//System.out.println(Arrays.toString(heapStorage));
//System.out.println(Arrays.toString(readIndices));
}
use of java.io.DataOutputStream in project pinot by linkedin.
the class TopKPhaseMapOutputKey method toBytes.
public byte[] toBytes() throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
byte[] bytes;
// dimension name
bytes = dimensionName.getBytes();
dos.writeInt(bytes.length);
dos.write(bytes);
// dimension value
bytes = dimensionValue.getBytes();
dos.writeInt(bytes.length);
dos.write(bytes);
baos.close();
dos.close();
return baos.toByteArray();
}
use of java.io.DataOutputStream in project pinot by linkedin.
the class TopKPhaseMapOutputValue method toBytes.
public byte[] toBytes() throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
// metric values
dos.writeInt(metricValues.length);
for (int i = 0; i < metricValues.length; i++) {
Number number = metricValues[i];
MetricType metricType = metricTypes.get(i);
switch(metricType) {
case SHORT:
dos.writeShort(number.intValue());
break;
case LONG:
dos.writeLong(number.longValue());
break;
case INT:
dos.writeInt(number.intValue());
break;
case FLOAT:
dos.writeFloat(number.floatValue());
break;
case DOUBLE:
dos.writeDouble(number.doubleValue());
break;
}
}
baos.close();
dos.close();
return baos.toByteArray();
}
Aggregations