use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.
the class FixedBitRowColDataWriterReaderTest method testSingleColUnsigned.
@Test
public void testSingleColUnsigned() throws Exception {
int maxBits = 1;
while (maxBits < 32) {
LOGGER.debug("START test maxBits:" + maxBits);
final String fileName = getClass().getName() + "_single_col_fixed_bit_" + maxBits + ".dat";
final File file = new File(fileName);
file.delete();
final int rows = 100;
final int cols = 1;
final int[] columnSizesInBits = new int[] { maxBits };
final FixedBitSingleValueMultiColWriter writer = new FixedBitSingleValueMultiColWriter(file, rows, cols, columnSizesInBits, new boolean[] { true });
final int[] data = new int[rows];
final Random r = new Random();
writer.open();
final int maxValue = (int) Math.pow(2, maxBits);
for (int i = 0; i < rows; i++) {
data[i] = r.nextInt(maxValue) * ((Math.random() > .5) ? 1 : -1);
writer.setInt(i, 0, data[i]);
}
writer.close();
// Test heap mode
PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.heap, FileChannel.MapMode.READ_ONLY, "testing");
FixedBitSingleValueMultiColReader heapReader = new FixedBitSingleValueMultiColReader(heapBuffer, rows, cols, columnSizesInBits, new boolean[] { true });
for (int i = 0; i < rows; i++) {
Assert.assertEquals(heapReader.getInt(i, 0), data[i]);
}
heapReader.close();
heapBuffer.close();
// Test mmap mode
PinotDataBuffer mmapBuffer = PinotDataBuffer.fromFile(file, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "mmap-testing");
FixedBitSingleValueMultiColReader mmapReader = new FixedBitSingleValueMultiColReader(mmapBuffer, rows, cols, columnSizesInBits, new boolean[] { true });
for (int i = 0; i < rows; i++) {
Assert.assertEquals(mmapReader.getInt(i, 0), data[i]);
}
mmapReader.close();
mmapBuffer.close();
maxBits = maxBits + 1;
file.delete();
}
}
use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.
the class FixedBitSingleValueTest method testV2.
@Test
public void testV2() throws Exception {
int ROWS = 1000;
for (int numBits = 1; numBits < 32; numBits++) {
File file = new File(this.getClass().getName() + "_" + numBits + ".test");
FixedBitSingleValueWriter writer = new FixedBitSingleValueWriter(file, ROWS, numBits);
int[] data = new int[ROWS];
Random random = new Random();
int max = (int) Math.pow(2, numBits);
for (int i = 0; i < ROWS; i++) {
data[i] = random.nextInt(max);
writer.setInt(i, data[i]);
}
writer.close();
PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.heap, FileChannel.MapMode.READ_ONLY, "testing");
FixedBitSingleValueReader reader = new FixedBitSingleValueReader(heapBuffer, ROWS, numBits);
int[] read = new int[ROWS];
for (int i = 0; i < ROWS; i++) {
read[i] = reader.getInt(i);
//Assert.assertEquals(reader.getInt(i), data[i],
// "Failed for bit:" + numBits + " Expected " + data[i] + " but found " + reader.getInt(i) + " at " + i);
}
LOGGER.trace(Arrays.toString(data));
LOGGER.trace(Arrays.toString(read));
reader.close();
heapBuffer.close();
file.delete();
}
}
use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.
the class VarByteChunkSingleValueReaderWriteTest method test.
/**
* This test writes {@link #NUM_STRINGS} using {@link VarByteChunkSingleValueWriter}. It then reads
* the strings using {@link VarByteChunkSingleValueReader}, and asserts that what was written is the same as
* what was read in.
*
* Number of docs and docs per chunk are chosen to generate complete as well partial chunks.
*
* @throws Exception
*/
@Test
public void test() throws Exception {
String[] expected = new String[NUM_STRINGS];
Random random = new Random();
File outFile = new File(TEST_FILE);
FileUtils.deleteQuietly(outFile);
int maxStringLengthInBytes = 0;
for (int i = 0; i < NUM_STRINGS; i++) {
expected[i] = RandomStringUtils.random(random.nextInt(MAX_STRING_LENGTH));
maxStringLengthInBytes = Math.max(maxStringLengthInBytes, expected[i].getBytes(UTF_8).length);
}
ChunkCompressor compressor = ChunkCompressorFactory.getCompressor("snappy");
VarByteChunkSingleValueWriter writer = new VarByteChunkSingleValueWriter(outFile, compressor, NUM_STRINGS, NUM_DOCS_PER_CHUNK, maxStringLengthInBytes);
for (int i = 0; i < NUM_STRINGS; i++) {
writer.setString(i, expected[i]);
}
writer.close();
PinotDataBuffer pinotDataBuffer = PinotDataBuffer.fromFile(outFile, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, getClass().getName());
ChunkDecompressor uncompressor = ChunkCompressorFactory.getDecompressor("snappy");
VarByteChunkSingleValueReader reader = new VarByteChunkSingleValueReader(pinotDataBuffer, uncompressor);
ChunkReaderContext context = reader.createContext();
for (int i = 0; i < NUM_STRINGS; i++) {
String actual = reader.getString(i, context);
Assert.assertEquals(actual, expected[i]);
}
reader.close();
FileUtils.deleteQuietly(outFile);
}
use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.
the class FixedByteSingleColumnMultiValueReaderWriter method close.
@Override
public void close() {
for (PinotDataBuffer dataBuffer : dataBuffers) {
dataBuffer.close();
}
dataBuffers.clear();
headerBuffer.close();
headerBuffer = null;
}
use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.
the class SegmentFormatConverterV1ToV2 method convert.
@Override
public void convert(File indexSegmentDir) throws Exception {
SegmentMetadataImpl segmentMetadataImpl = new SegmentMetadataImpl(indexSegmentDir);
SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(indexSegmentDir, segmentMetadataImpl, ReadMode.mmap);
Set<String> columns = segmentMetadataImpl.getAllColumns();
SegmentDirectory.Writer segmentWriter = segmentDirectory.createWriter();
for (String column : columns) {
ColumnMetadata columnMetadata = segmentMetadataImpl.getColumnMetadataFor(column);
if (columnMetadata.isSorted()) {
// no need to change sorted forward index
continue;
}
PinotDataBuffer fwdIndexBuffer = segmentWriter.getIndexFor(column, ColumnIndexType.FORWARD_INDEX);
if (columnMetadata.isSingleValue() && !columnMetadata.isSorted()) {
// since we use dictionary to encode values, we wont have any negative values in forward
// index
boolean signed = false;
SingleColumnSingleValueReader v1Reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitSingleValueReader(fwdIndexBuffer, segmentMetadataImpl.getTotalDocs(), columnMetadata.getBitsPerElement(), false);
File convertedFwdIndexFile = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_SV_FWD_IDX_FILE_EXTENTION + ".tmp");
SingleColumnSingleValueWriter v2Writer = new com.linkedin.pinot.core.io.writer.impl.v2.FixedBitSingleValueWriter(convertedFwdIndexFile, segmentMetadataImpl.getTotalDocs(), columnMetadata.getBitsPerElement());
for (int row = 0; row < segmentMetadataImpl.getTotalDocs(); row++) {
int value = v1Reader.getInt(row);
v2Writer.setInt(row, value);
}
v1Reader.close();
v2Writer.close();
File fwdIndexFileCopy = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_SV_FWD_IDX_FILE_EXTENTION + ".orig");
segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX);
// FIXME
PinotDataBuffer newIndexBuffer = segmentWriter.newIndexFor(column, ColumnIndexType.FORWARD_INDEX, (int) convertedFwdIndexFile.length());
newIndexBuffer.readFrom(convertedFwdIndexFile);
convertedFwdIndexFile.delete();
}
if (!columnMetadata.isSingleValue()) {
// since we use dictionary to encode values, we wont have any negative values in forward
// index
boolean signed = false;
SingleColumnMultiValueReader v1Reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitMultiValueReader(fwdIndexBuffer, segmentMetadataImpl.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getBitsPerElement(), signed);
File convertedFwdIndexFile = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_MV_FWD_IDX_FILE_EXTENTION + ".tmp");
SingleColumnMultiValueWriter v2Writer = new com.linkedin.pinot.core.io.writer.impl.v2.FixedBitMultiValueWriter(convertedFwdIndexFile, segmentMetadataImpl.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getBitsPerElement());
int[] values = new int[columnMetadata.getMaxNumberOfMultiValues()];
for (int row = 0; row < segmentMetadataImpl.getTotalDocs(); row++) {
int length = v1Reader.getIntArray(row, values);
int[] copy = new int[length];
System.arraycopy(values, 0, copy, 0, length);
v2Writer.setIntArray(row, copy);
}
v1Reader.close();
v2Writer.close();
segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX);
PinotDataBuffer newIndexBuffer = segmentWriter.newIndexFor(column, ColumnIndexType.FORWARD_INDEX, (int) convertedFwdIndexFile.length());
newIndexBuffer.readFrom(convertedFwdIndexFile);
convertedFwdIndexFile.delete();
}
}
File metadataFile = new File(indexSegmentDir, V1Constants.MetadataKeys.METADATA_FILE_NAME);
File metadataFileCopy = new File(indexSegmentDir, V1Constants.MetadataKeys.METADATA_FILE_NAME + ".orig");
bis = new BufferedInputStream(new FileInputStream(metadataFile));
bos = new BufferedOutputStream(new FileOutputStream(metadataFileCopy));
IOUtils.copy(bis, bos);
bis.close();
bos.close();
final PropertiesConfiguration properties = new PropertiesConfiguration(metadataFileCopy);
// update the segment version
properties.setProperty(V1Constants.MetadataKeys.Segment.SEGMENT_VERSION, SegmentVersion.v2.toString());
metadataFile.delete();
properties.save(metadataFile);
}
Aggregations