use of com.linkedin.pinot.core.io.writer.SingleColumnMultiValueWriter in project pinot by linkedin.
the class FixedBitMultiValueTest method testSingleColMultiValue.
public void testSingleColMultiValue(Class<? extends SingleColumnMultiValueWriter> writerClazz, Class<? extends SingleColumnMultiValueReader> readerClazz) throws Exception {
LOGGER.info("Testing for writerClazz:{} readerClass:{}", writerClazz.getName(), readerClazz.getName());
Constructor<? extends SingleColumnMultiValueWriter> writerClazzConstructor = writerClazz.getConstructor(new Class[] { File.class, int.class, int.class, int.class });
Constructor<? extends SingleColumnMultiValueReader> readerClazzConstructor = readerClazz.getConstructor(new Class[] { PinotDataBuffer.class, int.class, int.class, int.class, boolean.class });
int maxBits = 1;
while (maxBits < 32) {
final String fileName = getClass().getName() + "_test_single_col_mv_fixed_bit.dat";
final File f = new File(fileName);
f.delete();
int numDocs = 10;
int maxNumValues = 100;
final int[][] data = new int[numDocs][];
final Random r = new Random();
final int maxValue = (int) Math.pow(2, maxBits);
int totalNumValues = 0;
int[] startOffsets = new int[numDocs];
int[] lengths = new int[numDocs];
for (int i = 0; i < data.length; i++) {
final int numValues = r.nextInt(maxNumValues) + 1;
data[i] = new int[numValues];
for (int j = 0; j < numValues; j++) {
data[i][j] = r.nextInt(maxValue);
}
startOffsets[i] = totalNumValues;
lengths[i] = numValues;
totalNumValues = totalNumValues + numValues;
}
SingleColumnMultiValueWriter writer = writerClazzConstructor.newInstance(new Object[] { f, numDocs, totalNumValues, maxBits });
for (int i = 0; i < data.length; i++) {
writer.setIntArray(i, data[i]);
}
writer.close();
final RandomAccessFile raf = new RandomAccessFile(f, "rw");
raf.close();
// Test heap mode
PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(f, ReadMode.heap, FileChannel.MapMode.READ_ONLY, "testing");
SingleColumnMultiValueReader<? extends ReaderContext> heapReader = readerClazzConstructor.newInstance(new Object[] { heapBuffer, numDocs, totalNumValues, maxBits, false });
final int[] readValues = new int[maxNumValues];
for (int i = 0; i < data.length; i++) {
final int numValues = heapReader.getIntArray(i, readValues);
Assert.assertEquals(numValues, data[i].length);
for (int j = 0; j < numValues; j++) {
Assert.assertEquals(readValues[j], data[i][j]);
}
}
heapReader.close();
heapBuffer.close();
// Test mmap mode
PinotDataBuffer mmapBuffer = PinotDataBuffer.fromFile(f, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "testing");
SingleColumnMultiValueReader<? extends ReaderContext> mmapReader = readerClazzConstructor.newInstance(new Object[] { mmapBuffer, numDocs, totalNumValues, maxBits, false });
for (int i = 0; i < data.length; i++) {
final int numValues = mmapReader.getIntArray(i, readValues);
Assert.assertEquals(numValues, data[i].length);
for (int j = 0; j < numValues; j++) {
Assert.assertEquals(readValues[j], data[i][j]);
}
}
// Assert.assertEquals(FileReaderTestUtils.getNumOpenFiles(f), 2);
mmapReader.close();
mmapBuffer.close();
// Assert.assertEquals(FileReaderTestUtils.getNumOpenFiles(f), 0);
f.delete();
maxBits = maxBits + 1;
}
LOGGER.info("DONE: Testing for writerClazz:{} readerClass:{}", writerClazz.getName(), readerClazz.getName());
}
use of com.linkedin.pinot.core.io.writer.SingleColumnMultiValueWriter in project pinot by linkedin.
the class FixedBitMultiValueTest method testSingleColMultiValueWithContext.
public void testSingleColMultiValueWithContext(Class<? extends SingleColumnMultiValueWriter> writerClazz, Class<? extends SingleColumnMultiValueReader<? extends ReaderContext>> readerClazz) throws Exception {
LOGGER.info("Testing for writerClazz:{} readerClass:{}", writerClazz.getName(), readerClazz.getName());
Constructor<? extends SingleColumnMultiValueWriter> writerClazzConstructor = writerClazz.getConstructor(new Class[] { File.class, int.class, int.class, int.class });
Constructor<? extends SingleColumnMultiValueReader<? extends ReaderContext>> readerClazzConstructor = readerClazz.getConstructor(new Class[] { PinotDataBuffer.class, int.class, int.class, int.class, boolean.class });
int maxBits = 1;
while (maxBits < 32) {
final String fileName = getClass().getName() + "_test_single_col_mv_fixed_bit.dat";
final File f = new File(fileName);
f.delete();
int numDocs = 10;
int maxNumValues = 100;
final int[][] data = new int[numDocs][];
final Random r = new Random();
final int maxValue = (int) Math.pow(2, maxBits);
int totalNumValues = 0;
int[] startOffsets = new int[numDocs];
int[] lengths = new int[numDocs];
for (int i = 0; i < data.length; i++) {
final int numValues = r.nextInt(maxNumValues) + 1;
data[i] = new int[numValues];
for (int j = 0; j < numValues; j++) {
data[i][j] = r.nextInt(maxValue);
}
startOffsets[i] = totalNumValues;
lengths[i] = numValues;
totalNumValues = totalNumValues + numValues;
}
SingleColumnMultiValueWriter writer = writerClazzConstructor.newInstance(new Object[] { f, numDocs, totalNumValues, maxBits });
for (int i = 0; i < data.length; i++) {
writer.setIntArray(i, data[i]);
}
writer.close();
final RandomAccessFile raf = new RandomAccessFile(f, "rw");
raf.close();
// Test heap mode
PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(f, ReadMode.heap, FileChannel.MapMode.READ_ONLY, "testing");
SingleColumnMultiValueReader heapReader = readerClazzConstructor.newInstance(new Object[] { heapBuffer, numDocs, totalNumValues, maxBits, false });
ReaderContext context = heapReader.createContext();
final int[] readValues = new int[maxNumValues];
for (int i = 0; i < data.length; i++) {
final int numValues = heapReader.getIntArray(i, readValues, context);
if (numValues != data[i].length) {
System.err.println("Failed Expected:" + data[i].length + " Actual:" + numValues);
int length = heapReader.getIntArray(i, readValues, context);
}
Assert.assertEquals(numValues, data[i].length);
for (int j = 0; j < numValues; j++) {
Assert.assertEquals(readValues[j], data[i][j]);
}
}
heapReader.close();
heapBuffer.close();
// Test mmap mode
PinotDataBuffer mmapBuffer = PinotDataBuffer.fromFile(f, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "testing");
SingleColumnMultiValueReader<? extends ReaderContext> mmapReader = readerClazzConstructor.newInstance(new Object[] { mmapBuffer, numDocs, totalNumValues, maxBits, false });
for (int i = 0; i < data.length; i++) {
final int numValues = mmapReader.getIntArray(i, readValues);
Assert.assertEquals(numValues, data[i].length);
for (int j = 0; j < numValues; j++) {
Assert.assertEquals(readValues[j], data[i][j]);
}
}
mmapReader.close();
mmapBuffer.close();
f.delete();
maxBits = maxBits + 1;
}
LOGGER.info("DONE: Testing for writerClazz:{} readerClass:{}", writerClazz.getName(), readerClazz.getName());
}
use of com.linkedin.pinot.core.io.writer.SingleColumnMultiValueWriter in project pinot by linkedin.
the class SegmentFormatConverterV1ToV2 method convert.
@Override
public void convert(File indexSegmentDir) throws Exception {
SegmentMetadataImpl segmentMetadataImpl = new SegmentMetadataImpl(indexSegmentDir);
SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(indexSegmentDir, segmentMetadataImpl, ReadMode.mmap);
Set<String> columns = segmentMetadataImpl.getAllColumns();
SegmentDirectory.Writer segmentWriter = segmentDirectory.createWriter();
for (String column : columns) {
ColumnMetadata columnMetadata = segmentMetadataImpl.getColumnMetadataFor(column);
if (columnMetadata.isSorted()) {
// no need to change sorted forward index
continue;
}
PinotDataBuffer fwdIndexBuffer = segmentWriter.getIndexFor(column, ColumnIndexType.FORWARD_INDEX);
if (columnMetadata.isSingleValue() && !columnMetadata.isSorted()) {
// since we use dictionary to encode values, we wont have any negative values in forward
// index
boolean signed = false;
SingleColumnSingleValueReader v1Reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitSingleValueReader(fwdIndexBuffer, segmentMetadataImpl.getTotalDocs(), columnMetadata.getBitsPerElement(), false);
File convertedFwdIndexFile = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_SV_FWD_IDX_FILE_EXTENTION + ".tmp");
SingleColumnSingleValueWriter v2Writer = new com.linkedin.pinot.core.io.writer.impl.v2.FixedBitSingleValueWriter(convertedFwdIndexFile, segmentMetadataImpl.getTotalDocs(), columnMetadata.getBitsPerElement());
for (int row = 0; row < segmentMetadataImpl.getTotalDocs(); row++) {
int value = v1Reader.getInt(row);
v2Writer.setInt(row, value);
}
v1Reader.close();
v2Writer.close();
File fwdIndexFileCopy = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_SV_FWD_IDX_FILE_EXTENTION + ".orig");
segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX);
// FIXME
PinotDataBuffer newIndexBuffer = segmentWriter.newIndexFor(column, ColumnIndexType.FORWARD_INDEX, (int) convertedFwdIndexFile.length());
newIndexBuffer.readFrom(convertedFwdIndexFile);
convertedFwdIndexFile.delete();
}
if (!columnMetadata.isSingleValue()) {
// since we use dictionary to encode values, we wont have any negative values in forward
// index
boolean signed = false;
SingleColumnMultiValueReader v1Reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitMultiValueReader(fwdIndexBuffer, segmentMetadataImpl.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getBitsPerElement(), signed);
File convertedFwdIndexFile = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_MV_FWD_IDX_FILE_EXTENTION + ".tmp");
SingleColumnMultiValueWriter v2Writer = new com.linkedin.pinot.core.io.writer.impl.v2.FixedBitMultiValueWriter(convertedFwdIndexFile, segmentMetadataImpl.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getBitsPerElement());
int[] values = new int[columnMetadata.getMaxNumberOfMultiValues()];
for (int row = 0; row < segmentMetadataImpl.getTotalDocs(); row++) {
int length = v1Reader.getIntArray(row, values);
int[] copy = new int[length];
System.arraycopy(values, 0, copy, 0, length);
v2Writer.setIntArray(row, copy);
}
v1Reader.close();
v2Writer.close();
segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX);
PinotDataBuffer newIndexBuffer = segmentWriter.newIndexFor(column, ColumnIndexType.FORWARD_INDEX, (int) convertedFwdIndexFile.length());
newIndexBuffer.readFrom(convertedFwdIndexFile);
convertedFwdIndexFile.delete();
}
}
File metadataFile = new File(indexSegmentDir, V1Constants.MetadataKeys.METADATA_FILE_NAME);
File metadataFileCopy = new File(indexSegmentDir, V1Constants.MetadataKeys.METADATA_FILE_NAME + ".orig");
bis = new BufferedInputStream(new FileInputStream(metadataFile));
bos = new BufferedOutputStream(new FileOutputStream(metadataFileCopy));
IOUtils.copy(bis, bos);
bis.close();
bos.close();
final PropertiesConfiguration properties = new PropertiesConfiguration(metadataFileCopy);
// update the segment version
properties.setProperty(V1Constants.MetadataKeys.Segment.SEGMENT_VERSION, SegmentVersion.v2.toString());
metadataFile.delete();
properties.save(metadataFile);
}
Aggregations