use of com.linkedin.pinot.core.segment.index.IndexSegmentImpl in project pinot by linkedin.
the class ChunkIndexCreationDriverImplTest method test4.
@Test(enabled = false)
public void test4() throws Exception {
final IndexSegmentImpl segment = (IndexSegmentImpl) Loaders.IndexSegment.load(INDEX_DIR.listFiles()[0], ReadMode.mmap);
final ImmutableDictionaryReader d = segment.getDictionaryFor("column1");
final List<String> rhs = new ArrayList<String>();
rhs.add(d.get(new Random().nextInt(d.length())).toString());
final Predicate p = new EqPredicate("column1", rhs);
final DataSource ds = segment.getDataSource("column1", p);
final Block bl = ds.nextBlock();
final BlockDocIdSet idSet = bl.getBlockDocIdSet();
final BlockDocIdIterator it = idSet.iterator();
int docId = it.next();
final StringBuilder b = new StringBuilder();
while (docId != Constants.EOF) {
b.append(docId + ",");
docId = it.next();
}
// System.out.println(b.toString());
}
use of com.linkedin.pinot.core.segment.index.IndexSegmentImpl in project pinot by linkedin.
the class BitmapInvertedIndexTest method testBitMapInvertedIndex.
void testBitMapInvertedIndex(ReadMode readMode) throws Exception {
IndexLoadingConfigMetadata indexLoadingConfig = new IndexLoadingConfigMetadata(new PropertiesConfiguration());
indexLoadingConfig.initLoadingInvertedIndexColumnSet(invertedIndexColumns);
final IndexSegmentImpl mmapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(segmentDirectory, readMode, indexLoadingConfig);
// compare the loaded inverted index with the record in avro file
final DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>(new FileInputStream(new File(getClass().getClassLoader().getResource(AVRO_DATA).getFile())), new GenericDatumReader<GenericRecord>());
int docId = 0;
while (reader.hasNext()) {
final GenericRecord rec = reader.next();
for (final String column : ((SegmentMetadataImpl) mmapSegment.getSegmentMetadata()).getColumnMetadataMap().keySet()) {
Object entry = rec.get(column);
if (entry instanceof Utf8) {
entry = ((Utf8) entry).toString();
}
final int dicId = mmapSegment.getDictionaryFor(column).indexOf(entry);
// make sure that docId for dicId exist in the inverted index
Assert.assertTrue(mmapSegment.getInvertedIndexFor(column).getImmutable(dicId).contains(docId));
final int size = mmapSegment.getDictionaryFor(column).length();
for (int i = 0; i < size; ++i) {
// remove this for-loop for quick test
if (i == dicId) {
continue;
}
// make sure that docId for dicId does not exist in the inverted index
Assert.assertFalse(mmapSegment.getInvertedIndexFor(column).getImmutable(i).contains(docId));
}
}
++docId;
}
}
use of com.linkedin.pinot.core.segment.index.IndexSegmentImpl in project pinot by linkedin.
the class DictionariesTest method test2.
@Test
public void test2() throws Exception {
final IndexSegmentImpl heapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(segmentDirectory, ReadMode.heap);
final IndexSegmentImpl mmapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(segmentDirectory, ReadMode.mmap);
final Map<String, ColumnMetadata> metadataMap = ((SegmentMetadataImpl) mmapSegment.getSegmentMetadata()).getColumnMetadataMap();
for (final String column : metadataMap.keySet()) {
final ImmutableDictionaryReader heapDictionary = heapSegment.getDictionaryFor(column);
final ImmutableDictionaryReader mmapDictionary = mmapSegment.getDictionaryFor(column);
final Set<Object> uniques = uniqueEntries.get(column);
final List<Object> list = Arrays.asList(uniques.toArray());
Collections.shuffle(list);
for (final Object entry : list) {
Assert.assertEquals(mmapDictionary.indexOf(entry), heapDictionary.indexOf(entry));
if (!column.equals("pageKey")) {
Assert.assertFalse(heapDictionary.indexOf(entry) < 0);
Assert.assertFalse(mmapDictionary.indexOf(entry) < 0);
}
}
}
}
use of com.linkedin.pinot.core.segment.index.IndexSegmentImpl in project pinot by linkedin.
the class DictionaryDumper method main.
public static void main(String[] args) throws Exception {
if (args.length != 3) {
LOGGER.error("Usage: DictionaryDumper <segmentDirectory> <dimensionName> <comma-separated dictionaryIds>");
System.exit(1);
}
File[] segmentDirs = new File(args[0]).listFiles();
for (int i = 0; i < segmentDirs.length; i++) {
File indexSegmentDir = segmentDirs[i];
System.out.println("Loading " + indexSegmentDir.getName());
IndexSegmentImpl indexSegmentImpl = (IndexSegmentImpl) Loaders.IndexSegment.load(indexSegmentDir, ReadMode.heap);
ImmutableDictionaryReader colDictionary = indexSegmentImpl.getDictionaryFor(args[1]);
List<String> strIdList = Arrays.asList(args[2].split(","));
for (String strId : strIdList) {
int id = Integer.valueOf(strId);
String s = colDictionary.getStringValue(id);
System.out.println(String.format("%d -> %s", id, s));
}
}
}
use of com.linkedin.pinot.core.segment.index.IndexSegmentImpl in project pinot by linkedin.
the class StringDictionaryPerfTest method perfTestLookups.
/**
* Measures the performance of string dictionary lookups by performing the provided
* number of lookups to random indices.
*
* @param numLookups Number of lookups to perform
* @throws Exception
*/
public void perfTestLookups(int numLookups) throws Exception {
IndexSegmentImpl segment = (IndexSegmentImpl) Loaders.IndexSegment.load(_indexDir, ReadMode.heap);
ImmutableDictionaryReader dictionary = segment.getDictionaryFor(COLUMN_NAME);
Random random = new Random(System.nanoTime());
long start = System.currentTimeMillis();
for (int i = 0; i < numLookups; i++) {
int index = 1 + random.nextInt(_dictLength);
dictionary.indexOf(_inputStrings[index]);
}
FileUtils.deleteQuietly(_indexDir);
System.out.println("Total time for " + TOTAL_NUM_LOOKUPS + " lookups: " + (System.currentTimeMillis() - start));
}
Aggregations