use of com.linkedin.pinot.core.segment.store.SegmentDirectory in project pinot by linkedin.
the class SegmentV1V2ToV3FormatConverter method copyIndexData.
private void copyIndexData(File v2Directory, SegmentMetadataImpl v2Metadata, File v3Directory) throws Exception {
SegmentMetadataImpl v3Metadata = new SegmentMetadataImpl(v3Directory);
try (SegmentDirectory v2Segment = SegmentDirectory.createFromLocalFS(v2Directory, v2Metadata, ReadMode.mmap);
SegmentDirectory v3Segment = SegmentDirectory.createFromLocalFS(v3Directory, v3Metadata, ReadMode.mmap)) {
// for each dictionary and each fwdIndex, copy that to newDirectory buffer
Set<String> allColumns = v2Metadata.getAllColumns();
try (SegmentDirectory.Reader v2DataReader = v2Segment.createReader();
SegmentDirectory.Writer v3DataWriter = v3Segment.createWriter()) {
for (String column : allColumns) {
LOGGER.debug("Converting segment: {} , column: {}", v2Directory, column);
if (v2Metadata.hasDictionary(column)) {
copyDictionary(v2DataReader, v3DataWriter, column);
}
copyForwardIndex(v2DataReader, v3DataWriter, column);
}
// inverted indexes are intentionally stored at the end of the single file
for (String column : allColumns) {
copyExistingInvertedIndex(v2DataReader, v3DataWriter, column);
}
copyStarTree(v2DataReader, v3DataWriter);
v3DataWriter.saveAndClose();
}
}
}
use of com.linkedin.pinot.core.segment.store.SegmentDirectory in project pinot by linkedin.
the class BitmapPerformanceBenchmark method iterationSpeed.
public static void iterationSpeed(String indexSegmentDir, String column) throws Exception {
File indexSegment = new File(indexSegmentDir);
SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(indexSegment);
Map<String, BitmapInvertedIndexReader> bitMapIndexMap = new HashMap<String, BitmapInvertedIndexReader>();
Map<String, Integer> cardinalityMap = new HashMap<String, Integer>();
Map<String, ImmutableDictionaryReader> dictionaryMap = new HashMap<String, ImmutableDictionaryReader>();
File bitMapIndexFile = new File(indexSegmentDir, column + ".bitmap.inv");
ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
int cardinality = columnMetadata.getCardinality();
cardinalityMap.put(column, cardinality);
PinotDataBuffer bitMapDataBuffer = PinotDataBuffer.fromFile(bitMapIndexFile, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "testing");
BitmapInvertedIndexReader bitmapInvertedIndex = new BitmapInvertedIndexReader(bitMapDataBuffer, cardinality);
File dictionaryFile = new File(indexSegmentDir + "/" + column + ".dict");
SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(indexSegment, segmentMetadata, ReadMode.mmap);
SegmentDirectory.Reader segmentReader = segmentDirectory.createReader();
ColumnIndexContainer container = ColumnIndexContainer.init(segmentReader, columnMetadata, null);
ImmutableDictionaryReader dictionary = container.getDictionary();
dictionaryMap.put(column, dictionary);
// System.out.println(column + ":\t" + MemoryUtil.deepMemoryUsageOf(bitmapInvertedIndex));
bitMapIndexMap.put(column, bitmapInvertedIndex);
int dictId = dictionary.indexOf("na.us");
ImmutableRoaringBitmap immutable = bitmapInvertedIndex.getImmutable(dictId);
Iterator<Integer> iterator = immutable.iterator();
int count = 0;
long start = System.currentTimeMillis();
while (iterator.hasNext()) {
iterator.next();
count = count + 1;
}
long end = System.currentTimeMillis();
System.out.println(" matched: " + count + " Time to iterate:" + (end - start));
bitMapDataBuffer.close();
}
use of com.linkedin.pinot.core.segment.store.SegmentDirectory in project pinot by linkedin.
the class BitmapPerformanceBenchmark method benchmarkIntersetionAndUnion.
public static void benchmarkIntersetionAndUnion(String indexSegmentDir) throws ConfigurationException, IOException, Exception {
File[] listFiles = new File(indexSegmentDir).listFiles();
File indexDir = new File(indexSegmentDir);
SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(indexDir);
Map<String, BitmapInvertedIndexReader> bitMapIndexMap = new HashMap<String, BitmapInvertedIndexReader>();
Map<String, Integer> cardinalityMap = new HashMap<String, Integer>();
Map<String, ImmutableDictionaryReader> dictionaryMap = new HashMap<String, ImmutableDictionaryReader>();
for (File file : listFiles) {
if (!file.getName().endsWith("bitmap.inv")) {
continue;
}
String column = file.getName().replaceAll(".bitmap.inv", "");
ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
int cardinality = columnMetadata.getCardinality();
cardinalityMap.put(column, cardinality);
System.out.println(column + "\t\t\t" + cardinality + " \t" + columnMetadata.getDataType());
PinotDataBuffer bitmapDataBuffer = PinotDataBuffer.fromFile(file, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "testing");
BitmapInvertedIndexReader bitmapInvertedIndex = new BitmapInvertedIndexReader(bitmapDataBuffer, cardinality);
File dictionaryFile = new File(indexSegmentDir + "/" + column + ".dict");
SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(indexDir, segmentMetadata, ReadMode.mmap);
SegmentDirectory.Reader segmentReader = segmentDirectory.createReader();
ColumnIndexContainer container = ColumnIndexContainer.init(segmentReader, columnMetadata, null);
ImmutableDictionaryReader dictionary = container.getDictionary();
if (columnMetadata.getDataType() == DataType.INT) {
System.out.println("BitmapPerformanceBenchmark.main()");
assert dictionary instanceof IntDictionary;
}
dictionaryMap.put(column, dictionary);
// System.out.println(column + ":\t" + MemoryUtil.deepMemoryUsageOf(bitmapInvertedIndex));
bitMapIndexMap.put(column, bitmapInvertedIndex);
bitmapDataBuffer.close();
}
List<String> dimensionNamesList = segmentMetadata.getSchema().getDimensionNames();
Collections.shuffle(dimensionNamesList);
int NUM_TEST = 100;
final int MAX_DIMENSIONS_PER_DIMENSION = 1;
int MAX_DIMENSIONS_IN_WHERE_CLAUSE = 3;
Random random = new Random();
for (int numDimensions = 1; numDimensions <= MAX_DIMENSIONS_IN_WHERE_CLAUSE; numDimensions++) {
for (int numValuesPerDimension = 1; numValuesPerDimension <= MAX_DIMENSIONS_PER_DIMENSION; numValuesPerDimension++) {
int runCount = 0;
while (runCount < NUM_TEST) {
Collections.shuffle(dimensionNamesList);
List<ImmutableRoaringBitmap> bitMaps = new ArrayList<ImmutableRoaringBitmap>();
List<String> columnNameValuePairs = new ArrayList<String>();
for (int i = 0; i < numDimensions; i++) {
String columnName = dimensionNamesList.get(i);
InvertedIndexReader bitmapInvertedIndex = bitMapIndexMap.get(columnName);
for (int j = 0; j < numValuesPerDimension; j++) {
int dictId = random.nextInt(cardinalityMap.get(columnName));
String dictValue = dictionaryMap.get(columnName).getStringValue(dictId);
columnNameValuePairs.add(columnName + ":" + dictValue);
ImmutableRoaringBitmap immutable = bitmapInvertedIndex.getImmutable(dictId);
bitMaps.add(immutable);
}
}
System.out.println("START**********************************");
int[] cardinality = new int[bitMaps.size()];
int[] sizes = new int[bitMaps.size()];
for (int i = 0; i < bitMaps.size(); i++) {
ImmutableRoaringBitmap immutableRoaringBitmap = bitMaps.get(i);
cardinality[i] = immutableRoaringBitmap.getCardinality();
sizes[i] = immutableRoaringBitmap.getSizeInBytes();
}
System.out.println("\t#bitmaps:" + bitMaps.size());
System.out.println("\tinput values:" + columnNameValuePairs);
System.out.println("\tinput cardinality:" + Arrays.toString(cardinality));
System.out.println("\tinput sizes:" + Arrays.toString(sizes));
and(bitMaps);
or(bitMaps);
System.out.println("END**********************************");
runCount = runCount + 1;
}
}
}
}
use of com.linkedin.pinot.core.segment.store.SegmentDirectory in project pinot by linkedin.
the class LoadersTest method testPadding.
@Test
public void testPadding() throws Exception {
// Old Format
TarGzCompressionUtils.unTar(new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_OLD))), INDEX_DIR);
File segmentDirectory = new File(INDEX_DIR, "paddingOld");
SegmentMetadataImpl originalMetadata = new SegmentMetadataImpl(segmentDirectory);
Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(), V1Constants.Str.LEGACY_STRING_PAD_CHAR);
SegmentDirectory segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
ColumnMetadata columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
SegmentDirectory.Reader reader = segmentDir.createReader();
PinotDataBuffer dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
StringDictionary dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
Assert.assertEquals(dict.getStringValue(0), "lynda 2.0");
Assert.assertEquals(dict.getStringValue(1), "lynda%%%%");
Assert.assertEquals(dict.get(0), "lynda 2.0");
Assert.assertEquals(dict.get(1), "lynda");
Assert.assertEquals(dict.indexOf("lynda%"), 1);
Assert.assertEquals(dict.indexOf("lynda%%"), 1);
// New Format Padding character %
TarGzCompressionUtils.unTar(new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_PERCENT))), INDEX_DIR);
segmentDirectory = new File(INDEX_DIR, "paddingPercent");
originalMetadata = new SegmentMetadataImpl(segmentDirectory);
Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(), V1Constants.Str.LEGACY_STRING_PAD_CHAR);
segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
reader = segmentDir.createReader();
dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
Assert.assertEquals(dict.getStringValue(0), "lynda 2.0");
Assert.assertEquals(dict.getStringValue(1), "lynda%%%%");
Assert.assertEquals(dict.get(0), "lynda 2.0");
Assert.assertEquals(dict.get(1), "lynda");
Assert.assertEquals(dict.indexOf("lynda%"), 1);
Assert.assertEquals(dict.indexOf("lynda%%"), 1);
// New Format Padding character Null
TarGzCompressionUtils.unTar(new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_NULL))), INDEX_DIR);
segmentDirectory = new File(INDEX_DIR, "paddingNull");
originalMetadata = new SegmentMetadataImpl(segmentDirectory);
Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(), V1Constants.Str.DEFAULT_STRING_PAD_CHAR);
segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
reader = segmentDir.createReader();
dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
Assert.assertEquals(dict.getStringValue(0), "lynda\0\0\0\0");
Assert.assertEquals(dict.getStringValue(1), "lynda 2.0");
Assert.assertEquals(dict.get(0), "lynda");
Assert.assertEquals(dict.get(1), "lynda 2.0");
Assert.assertEquals(dict.indexOf("lynda\0"), 0);
Assert.assertEquals(dict.indexOf("lynda\0\0"), 0);
}
use of com.linkedin.pinot.core.segment.store.SegmentDirectory in project pinot by linkedin.
the class SegmentPreProcessorTest method checkInvertedIndexCreation.
private void checkInvertedIndexCreation(File segmentDirectoryFile, SegmentMetadataImpl segmentMetadata, boolean reCreate) throws Exception {
try (SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(segmentDirectoryFile, segmentMetadata, ReadMode.mmap);
SegmentDirectory.Reader reader = segmentDirectory.createReader()) {
if (reCreate) {
Assert.assertTrue(reader.hasIndexFor(COLUMN1_NAME, ColumnIndexType.INVERTED_INDEX));
Assert.assertTrue(reader.hasIndexFor(COLUMN13_NAME, ColumnIndexType.INVERTED_INDEX));
Assert.assertTrue(reader.hasIndexFor(COLUMN7_NAME, ColumnIndexType.INVERTED_INDEX));
Assert.assertFalse(reader.hasIndexFor(NO_SUCH_COLUMN_NAME, ColumnIndexType.INVERTED_INDEX));
} else {
Assert.assertFalse(reader.hasIndexFor(COLUMN1_NAME, ColumnIndexType.INVERTED_INDEX));
Assert.assertTrue(reader.hasIndexFor(COLUMN7_NAME, ColumnIndexType.INVERTED_INDEX));
Assert.assertFalse(reader.hasIndexFor(COLUMN13_NAME, ColumnIndexType.INVERTED_INDEX));
Assert.assertFalse(reader.hasIndexFor(NO_SUCH_COLUMN_NAME, ColumnIndexType.INVERTED_INDEX));
}
}
SegmentPreProcessor processor = new SegmentPreProcessor(segmentDirectoryFile, _indexLoadingConfigMetadata, null);
processor.process();
try (SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(segmentDirectoryFile, segmentMetadata, ReadMode.mmap);
SegmentDirectory.Reader reader = segmentDirectory.createReader()) {
Assert.assertTrue(reader.hasIndexFor(COLUMN1_NAME, ColumnIndexType.INVERTED_INDEX));
Assert.assertTrue(reader.hasIndexFor(COLUMN13_NAME, ColumnIndexType.INVERTED_INDEX));
Assert.assertTrue(reader.hasIndexFor(COLUMN7_NAME, ColumnIndexType.INVERTED_INDEX));
Assert.assertFalse(reader.hasIndexFor(NO_SUCH_COLUMN_NAME, ColumnIndexType.INVERTED_INDEX));
}
}
Aggregations