Search in sources :

Example 1 with IndexSegmentImpl

use of com.linkedin.pinot.core.segment.index.IndexSegmentImpl in project pinot by linkedin.

the class ChunkIndexCreationDriverImplTest method test4.

@Test(enabled = false)
public void test4() throws Exception {
    final IndexSegmentImpl segment = (IndexSegmentImpl) Loaders.IndexSegment.load(INDEX_DIR.listFiles()[0], ReadMode.mmap);
    final ImmutableDictionaryReader d = segment.getDictionaryFor("column1");
    final List<String> rhs = new ArrayList<String>();
    rhs.add(d.get(new Random().nextInt(d.length())).toString());
    final Predicate p = new EqPredicate("column1", rhs);
    final DataSource ds = segment.getDataSource("column1", p);
    final Block bl = ds.nextBlock();
    final BlockDocIdSet idSet = bl.getBlockDocIdSet();
    final BlockDocIdIterator it = idSet.iterator();
    int docId = it.next();
    final StringBuilder b = new StringBuilder();
    while (docId != Constants.EOF) {
        b.append(docId + ",");
        docId = it.next();
    }
//    System.out.println(b.toString());
}
Also used : ImmutableDictionaryReader(com.linkedin.pinot.core.segment.index.readers.ImmutableDictionaryReader) ArrayList(java.util.ArrayList) EqPredicate(com.linkedin.pinot.core.common.predicate.EqPredicate) EqPredicate(com.linkedin.pinot.core.common.predicate.EqPredicate) Predicate(com.linkedin.pinot.core.common.Predicate) DataSource(com.linkedin.pinot.core.common.DataSource) BlockDocIdIterator(com.linkedin.pinot.core.common.BlockDocIdIterator) IndexSegmentImpl(com.linkedin.pinot.core.segment.index.IndexSegmentImpl) Random(java.util.Random) BlockDocIdSet(com.linkedin.pinot.core.common.BlockDocIdSet) Block(com.linkedin.pinot.core.common.Block) Test(org.testng.annotations.Test)

Example 2 with IndexSegmentImpl

use of com.linkedin.pinot.core.segment.index.IndexSegmentImpl in project pinot by linkedin.

the class BitmapInvertedIndexTest method testBitMapInvertedIndex.

void testBitMapInvertedIndex(ReadMode readMode) throws Exception {
    IndexLoadingConfigMetadata indexLoadingConfig = new IndexLoadingConfigMetadata(new PropertiesConfiguration());
    indexLoadingConfig.initLoadingInvertedIndexColumnSet(invertedIndexColumns);
    final IndexSegmentImpl mmapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(segmentDirectory, readMode, indexLoadingConfig);
    // compare the loaded inverted index with the record in avro file
    final DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>(new FileInputStream(new File(getClass().getClassLoader().getResource(AVRO_DATA).getFile())), new GenericDatumReader<GenericRecord>());
    int docId = 0;
    while (reader.hasNext()) {
        final GenericRecord rec = reader.next();
        for (final String column : ((SegmentMetadataImpl) mmapSegment.getSegmentMetadata()).getColumnMetadataMap().keySet()) {
            Object entry = rec.get(column);
            if (entry instanceof Utf8) {
                entry = ((Utf8) entry).toString();
            }
            final int dicId = mmapSegment.getDictionaryFor(column).indexOf(entry);
            // make sure that docId for dicId exist in the inverted index
            Assert.assertTrue(mmapSegment.getInvertedIndexFor(column).getImmutable(dicId).contains(docId));
            final int size = mmapSegment.getDictionaryFor(column).length();
            for (int i = 0; i < size; ++i) {
                // remove this for-loop for quick test
                if (i == dicId) {
                    continue;
                }
                // make sure that docId for dicId does not exist in the inverted index
                Assert.assertFalse(mmapSegment.getInvertedIndexFor(column).getImmutable(i).contains(docId));
            }
        }
        ++docId;
    }
}
Also used : IndexLoadingConfigMetadata(com.linkedin.pinot.common.metadata.segment.IndexLoadingConfigMetadata) DataFileStream(org.apache.avro.file.DataFileStream) PropertiesConfiguration(org.apache.commons.configuration.PropertiesConfiguration) FileInputStream(java.io.FileInputStream) IndexSegmentImpl(com.linkedin.pinot.core.segment.index.IndexSegmentImpl) Utf8(org.apache.avro.util.Utf8) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 3 with IndexSegmentImpl

use of com.linkedin.pinot.core.segment.index.IndexSegmentImpl in project pinot by linkedin.

the class DictionariesTest method test2.

@Test
public void test2() throws Exception {
    final IndexSegmentImpl heapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(segmentDirectory, ReadMode.heap);
    final IndexSegmentImpl mmapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(segmentDirectory, ReadMode.mmap);
    final Map<String, ColumnMetadata> metadataMap = ((SegmentMetadataImpl) mmapSegment.getSegmentMetadata()).getColumnMetadataMap();
    for (final String column : metadataMap.keySet()) {
        final ImmutableDictionaryReader heapDictionary = heapSegment.getDictionaryFor(column);
        final ImmutableDictionaryReader mmapDictionary = mmapSegment.getDictionaryFor(column);
        final Set<Object> uniques = uniqueEntries.get(column);
        final List<Object> list = Arrays.asList(uniques.toArray());
        Collections.shuffle(list);
        for (final Object entry : list) {
            Assert.assertEquals(mmapDictionary.indexOf(entry), heapDictionary.indexOf(entry));
            if (!column.equals("pageKey")) {
                Assert.assertFalse(heapDictionary.indexOf(entry) < 0);
                Assert.assertFalse(mmapDictionary.indexOf(entry) < 0);
            }
        }
    }
}
Also used : ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) IndexSegmentImpl(com.linkedin.pinot.core.segment.index.IndexSegmentImpl) ImmutableDictionaryReader(com.linkedin.pinot.core.segment.index.readers.ImmutableDictionaryReader) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) Test(org.testng.annotations.Test)

Example 4 with IndexSegmentImpl

use of com.linkedin.pinot.core.segment.index.IndexSegmentImpl in project pinot by linkedin.

the class DictionaryDumper method main.

public static void main(String[] args) throws Exception {
    if (args.length != 3) {
        LOGGER.error("Usage: DictionaryDumper <segmentDirectory> <dimensionName> <comma-separated dictionaryIds>");
        System.exit(1);
    }
    File[] segmentDirs = new File(args[0]).listFiles();
    for (int i = 0; i < segmentDirs.length; i++) {
        File indexSegmentDir = segmentDirs[i];
        System.out.println("Loading " + indexSegmentDir.getName());
        IndexSegmentImpl indexSegmentImpl = (IndexSegmentImpl) Loaders.IndexSegment.load(indexSegmentDir, ReadMode.heap);
        ImmutableDictionaryReader colDictionary = indexSegmentImpl.getDictionaryFor(args[1]);
        List<String> strIdList = Arrays.asList(args[2].split(","));
        for (String strId : strIdList) {
            int id = Integer.valueOf(strId);
            String s = colDictionary.getStringValue(id);
            System.out.println(String.format("%d -> %s", id, s));
        }
    }
}
Also used : IndexSegmentImpl(com.linkedin.pinot.core.segment.index.IndexSegmentImpl) ImmutableDictionaryReader(com.linkedin.pinot.core.segment.index.readers.ImmutableDictionaryReader) File(java.io.File)

Example 5 with IndexSegmentImpl

use of com.linkedin.pinot.core.segment.index.IndexSegmentImpl in project pinot by linkedin.

the class StringDictionaryPerfTest method perfTestLookups.

/**
   * Measures the performance of string dictionary lookups by performing the provided
   * number of lookups to random indices.
   *
   * @param numLookups Number of lookups to perform
   * @throws Exception
   */
public void perfTestLookups(int numLookups) throws Exception {
    IndexSegmentImpl segment = (IndexSegmentImpl) Loaders.IndexSegment.load(_indexDir, ReadMode.heap);
    ImmutableDictionaryReader dictionary = segment.getDictionaryFor(COLUMN_NAME);
    Random random = new Random(System.nanoTime());
    long start = System.currentTimeMillis();
    for (int i = 0; i < numLookups; i++) {
        int index = 1 + random.nextInt(_dictLength);
        dictionary.indexOf(_inputStrings[index]);
    }
    FileUtils.deleteQuietly(_indexDir);
    System.out.println("Total time for " + TOTAL_NUM_LOOKUPS + " lookups: " + (System.currentTimeMillis() - start));
}
Also used : IndexSegmentImpl(com.linkedin.pinot.core.segment.index.IndexSegmentImpl) Random(java.util.Random) ImmutableDictionaryReader(com.linkedin.pinot.core.segment.index.readers.ImmutableDictionaryReader)

Aggregations

IndexSegmentImpl (com.linkedin.pinot.core.segment.index.IndexSegmentImpl)12 Test (org.testng.annotations.Test)8 ImmutableDictionaryReader (com.linkedin.pinot.core.segment.index.readers.ImmutableDictionaryReader)6 Block (com.linkedin.pinot.core.common.Block)5 DataSource (com.linkedin.pinot.core.common.DataSource)5 ArrayList (java.util.ArrayList)4 BlockDocIdIterator (com.linkedin.pinot.core.common.BlockDocIdIterator)3 BlockDocIdSet (com.linkedin.pinot.core.common.BlockDocIdSet)3 Predicate (com.linkedin.pinot.core.common.Predicate)3 EqPredicate (com.linkedin.pinot.core.common.predicate.EqPredicate)3 SegmentMetadataImpl (com.linkedin.pinot.core.segment.index.SegmentMetadataImpl)3 File (java.io.File)3 Random (java.util.Random)3 IndexLoadingConfigMetadata (com.linkedin.pinot.common.metadata.segment.IndexLoadingConfigMetadata)2 BlockValSet (com.linkedin.pinot.core.common.BlockValSet)2 ColumnMetadata (com.linkedin.pinot.core.segment.index.ColumnMetadata)2 PropertiesConfiguration (org.apache.commons.configuration.PropertiesConfiguration)2 BrokerRequest (com.linkedin.pinot.common.request.BrokerRequest)1 BlockMultiValIterator (com.linkedin.pinot.core.common.BlockMultiValIterator)1 BlockSingleValIterator (com.linkedin.pinot.core.common.BlockSingleValIterator)1