use of com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator in project pinot by linkedin.
the class InvertedIndexHandler method createInvertedIndexForColumn.
private void createInvertedIndexForColumn(ColumnMetadata columnMetadata) throws IOException {
String column = columnMetadata.getColumnName();
File inProgress = new File(indexDir, column + ".inv.inprogress");
File invertedIndexFile = new File(indexDir, column + V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION);
if (!inProgress.exists()) {
if (segmentWriter.hasIndexFor(column, ColumnIndexType.INVERTED_INDEX)) {
// Skip creating inverted index if already exists.
LOGGER.info("Found inverted index for segment: {}, column: {}", segmentName, column);
return;
}
// Create a marker file.
FileUtils.touch(inProgress);
} else {
// Marker file exists, which means last run gets interrupted.
// Remove inverted index if exists.
// For v1 and v2, it's the actual inverted index. For v3, it's the temporary inverted index.
FileUtils.deleteQuietly(invertedIndexFile);
}
// Create new inverted index for the column.
LOGGER.info("Creating new inverted index for segment: {}, column: {}", segmentName, column);
int totalDocs = columnMetadata.getTotalDocs();
OffHeapBitmapInvertedIndexCreator creator = new OffHeapBitmapInvertedIndexCreator(indexDir, columnMetadata.getCardinality(), totalDocs, columnMetadata.getTotalNumberOfEntries(), columnMetadata.getFieldSpec());
try (DataFileReader fwdIndex = getForwardIndexReader(columnMetadata, segmentWriter)) {
if (columnMetadata.isSingleValue()) {
// Single-value column.
FixedBitSingleValueReader svFwdIndex = (FixedBitSingleValueReader) fwdIndex;
for (int i = 0; i < totalDocs; i++) {
creator.add(i, svFwdIndex.getInt(i));
}
} else {
// Multi-value column.
SingleColumnMultiValueReader mvFwdIndex = (SingleColumnMultiValueReader) fwdIndex;
int[] dictIds = new int[columnMetadata.getMaxNumberOfMultiValues()];
for (int i = 0; i < totalDocs; i++) {
int len = mvFwdIndex.getIntArray(i, dictIds);
creator.add(i, dictIds, len);
}
}
}
creator.seal();
// For v3, write the generated inverted index file into the single file and remove it.
if (segmentVersion == SegmentVersion.v3) {
LoaderUtils.writeIndexToV3Format(segmentWriter, column, invertedIndexFile, ColumnIndexType.INVERTED_INDEX);
}
// Delete the marker file.
FileUtils.deleteQuietly(inProgress);
LOGGER.info("Created inverted index for segment: {}, column: {}", segmentName, column);
}
use of com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator in project pinot by linkedin.
the class SegmentColumnarIndexCreator method init.
@Override
public void init(SegmentGeneratorConfig segmentCreationSpec, SegmentIndexCreationInfo segmentIndexCreationInfo, Map<String, ColumnIndexCreationInfo> indexCreationInfoMap, Schema schema, File outDir) throws Exception {
docIdCounter = 0;
config = segmentCreationSpec;
this.indexCreationInfoMap = indexCreationInfoMap;
dictionaryCreatorMap = new HashMap<String, SegmentDictionaryCreator>();
forwardIndexCreatorMap = new HashMap<String, ForwardIndexCreator>();
this.indexCreationInfoMap = indexCreationInfoMap;
invertedIndexCreatorMap = new HashMap<String, InvertedIndexCreator>();
file = outDir;
// Check that the output directory does not exist
if (file.exists()) {
throw new RuntimeException("Segment output directory " + file.getAbsolutePath() + " already exists.");
}
file.mkdir();
this.schema = schema;
this.totalDocs = segmentIndexCreationInfo.getTotalDocs();
this.totalAggDocs = segmentIndexCreationInfo.getTotalAggDocs();
this.totalRawDocs = segmentIndexCreationInfo.getTotalRawDocs();
this.totalErrors = segmentIndexCreationInfo.getTotalErrors();
this.totalNulls = segmentIndexCreationInfo.getTotalNulls();
this.totalConversions = segmentIndexCreationInfo.getTotalConversions();
this.totalNullCols = segmentIndexCreationInfo.getTotalNullCols();
this.paddingCharacter = segmentCreationSpec.getPaddingCharacter();
// Initialize and build dictionaries
for (final FieldSpec spec : schema.getAllFieldSpecs()) {
String column = spec.getName();
final ColumnIndexCreationInfo info = indexCreationInfoMap.get(column);
if (createDictionaryForColumn(info, config, spec)) {
dictionaryCreatorMap.put(column, new SegmentDictionaryCreator(info.hasNulls(), info.getSortedUniqueElementsArray(), spec, file, paddingCharacter));
}
}
// For each column, build its dictionary and initialize a forwards and an inverted index
for (final String column : indexCreationInfoMap.keySet()) {
ColumnIndexCreationInfo indexCreationInfo = indexCreationInfoMap.get(column);
boolean[] isSorted = new boolean[1];
isSorted[0] = indexCreationInfo.isSorted();
SegmentDictionaryCreator dictionaryCreator = dictionaryCreatorMap.get(column);
if (dictionaryCreator != null) {
dictionaryCreator.build(isSorted);
indexCreationInfo.setSorted(isSorted[0]);
dictionaryCache.put(column, new HashMap<Object, Object>());
}
int uniqueValueCount = indexCreationInfo.getDistinctValueCount();
int maxLength = indexCreationInfo.getLegnthOfLongestEntry();
boolean buildRawIndex = config.getRawIndexCreationColumns().contains(column);
FieldSpec fieldSpec = schema.getFieldSpecFor(column);
if (fieldSpec.isSingleValueField()) {
// Raw indexes store actual values, instead of dictionary ids.
if (buildRawIndex) {
forwardIndexCreatorMap.put(column, getRawIndexCreatorForColumn(file, column, fieldSpec.getDataType(), totalDocs, maxLength));
} else {
if (indexCreationInfo.isSorted()) {
forwardIndexCreatorMap.put(column, new SingleValueSortedForwardIndexCreator(file, uniqueValueCount, fieldSpec));
} else {
forwardIndexCreatorMap.put(column, new SingleValueUnsortedForwardIndexCreator(fieldSpec, file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), indexCreationInfo.hasNulls()));
}
}
} else {
if (buildRawIndex) {
// TODO: Add support for multi-valued columns.
throw new RuntimeException("Raw index generation not supported for multi-valued columns: " + column);
}
forwardIndexCreatorMap.put(column, new MultiValueUnsortedForwardIndexCreator(fieldSpec, file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), indexCreationInfo.hasNulls()));
}
}
for (String column : config.getInvertedIndexCreationColumns()) {
if (!schema.hasColumn(column)) {
LOGGER.warn("Skipping enabling index on column:{} since its missing in schema", column);
continue;
}
ColumnIndexCreationInfo indexCreationInfo = indexCreationInfoMap.get(column);
int uniqueValueCount = indexCreationInfo.getDistinctValueCount();
OffHeapBitmapInvertedIndexCreator invertedIndexCreator = new OffHeapBitmapInvertedIndexCreator(file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), schema.getFieldSpecFor(column));
invertedIndexCreatorMap.put(column, invertedIndexCreator);
}
}
use of com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator in project pinot by linkedin.
the class BitmapInvertedIndexCreatorTest method testSingleValue.
@Test
public void testSingleValue() throws IOException {
boolean singleValue = true;
String colName = "single_value_col";
FieldSpec spec = new DimensionFieldSpec(colName, DataType.INT, singleValue);
int numDocs = 20;
int[] data = new int[numDocs];
int cardinality = 10;
File indexDirHeap = new File("/tmp/indexDirHeap");
FileUtils.forceMkdir(indexDirHeap);
indexDirHeap.mkdirs();
File indexDirOffHeap = new File("/tmp/indexDirOffHeap");
FileUtils.forceMkdir(indexDirOffHeap);
indexDirOffHeap.mkdirs();
File bitmapIndexFileOffHeap = new File(indexDirOffHeap, colName + V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION);
File bitmapIndexFileHeap = new File(indexDirHeap, colName + V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION);
// GENERATE RANDOM DATA SET
Random r = new Random();
Map<Integer, Set<Integer>> postingListMap = new HashMap<>();
for (int i = 0; i < cardinality; i++) {
postingListMap.put(i, new LinkedHashSet<Integer>());
}
for (int i = 0; i < numDocs; i++) {
data[i] = r.nextInt(cardinality);
LOGGER.debug("docId:" + i + " dictId:" + data[i]);
postingListMap.get(data[i]).add(i);
}
for (int i = 0; i < cardinality; i++) {
LOGGER.debug("Posting list for " + i + " : " + postingListMap.get(i));
}
// GENERATE BITMAP USING OffHeapCreator and validate
OffHeapBitmapInvertedIndexCreator offHeapCreator = new OffHeapBitmapInvertedIndexCreator(indexDirOffHeap, cardinality, numDocs, numDocs, spec);
for (int i = 0; i < numDocs; i++) {
offHeapCreator.add(i, data[i]);
}
offHeapCreator.seal();
validate(colName, bitmapIndexFileOffHeap, cardinality, postingListMap);
// GENERATE BITMAP USING HeapCreator and validate
HeapBitmapInvertedIndexCreator heapCreator = new HeapBitmapInvertedIndexCreator(indexDirHeap, cardinality, numDocs, 0, spec);
for (int i = 0; i < numDocs; i++) {
heapCreator.add(i, data[i]);
}
heapCreator.seal();
validate(colName, bitmapIndexFileHeap, cardinality, postingListMap);
// assert that the file sizes and contents are the same
Assert.assertEquals(bitmapIndexFileHeap.length(), bitmapIndexFileHeap.length());
Assert.assertTrue(FileUtils.contentEquals(bitmapIndexFileHeap, bitmapIndexFileHeap));
FileUtils.deleteQuietly(indexDirHeap);
FileUtils.deleteQuietly(indexDirOffHeap);
}
use of com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator in project pinot by linkedin.
the class BitmapInvertedIndexCreatorTest method testMultiValue.
@Test
public void testMultiValue() throws IOException {
boolean singleValue = false;
String colName = "multi_value_col";
FieldSpec spec = new DimensionFieldSpec(colName, DataType.INT, singleValue);
int numDocs = 20;
int[][] data = new int[numDocs][];
int maxLength = 10;
int cardinality = 10;
File indexDirHeap = new File("/tmp/indexDirHeap");
FileUtils.forceMkdir(indexDirHeap);
indexDirHeap.mkdirs();
File indexDirOffHeap = new File("/tmp/indexDirOffHeap");
FileUtils.forceMkdir(indexDirOffHeap);
indexDirOffHeap.mkdirs();
File bitmapIndexFileOffHeap = new File(indexDirOffHeap, colName + V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION);
File bitmapIndexFileHeap = new File(indexDirHeap, colName + V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION);
// GENERATE RANDOM MULTI VALUE DATA SET
Random r = new Random();
Map<Integer, Set<Integer>> postingListMap = new HashMap<>();
for (int i = 0; i < cardinality; i++) {
postingListMap.put(i, new LinkedHashSet<Integer>());
}
int totalNumberOfEntries = 0;
for (int docId = 0; docId < numDocs; docId++) {
int length = r.nextInt(maxLength);
data[docId] = new int[length];
totalNumberOfEntries += length;
for (int j = 0; j < length; j++) {
data[docId][j] = r.nextInt(cardinality);
postingListMap.get(data[docId][j]).add(docId);
}
LOGGER.debug("docId:" + docId + " dictId:" + data[docId]);
}
for (int i = 0; i < cardinality; i++) {
LOGGER.debug("Posting list for " + i + " : " + postingListMap.get(i));
}
// GENERATE BITMAP USING OffHeapCreator and validate
OffHeapBitmapInvertedIndexCreator offHeapCreator = new OffHeapBitmapInvertedIndexCreator(indexDirOffHeap, cardinality, numDocs, totalNumberOfEntries, spec);
for (int i = 0; i < numDocs; i++) {
offHeapCreator.add(i, data[i]);
}
offHeapCreator.seal();
validate(colName, bitmapIndexFileOffHeap, cardinality, postingListMap);
// GENERATE BITMAP USING HeapCreator and validate
HeapBitmapInvertedIndexCreator heapCreator = new HeapBitmapInvertedIndexCreator(indexDirHeap, cardinality, numDocs, totalNumberOfEntries, spec);
for (int i = 0; i < numDocs; i++) {
heapCreator.add(i, data[i]);
}
heapCreator.seal();
validate(colName, bitmapIndexFileHeap, cardinality, postingListMap);
// assert that the file sizes and contents are the same
Assert.assertEquals(bitmapIndexFileHeap.length(), bitmapIndexFileHeap.length());
Assert.assertTrue(FileUtils.contentEquals(bitmapIndexFileHeap, bitmapIndexFileHeap));
FileUtils.deleteQuietly(indexDirHeap);
FileUtils.deleteQuietly(indexDirOffHeap);
}
Aggregations