use of com.linkedin.pinot.core.segment.creator.SingleValueRawIndexCreator in project pinot by linkedin.
the class DictionaryToRawIndexConverter method convertOneColumn.
/**
* Helper method to perform conversion for the specific column.
*
* @param segment Input segment to convert
* @param column Column to convert
* @param newSegment Directory where raw index to be written
* @throws IOException
*/
private void convertOneColumn(IndexSegment segment, String column, File newSegment) throws IOException {
DataSource dataSource = segment.getDataSource(column);
Dictionary dictionary = dataSource.getDictionary();
if (dictionary == null) {
LOGGER.error("Column '{}' does not have dictionary, cannot convert to raw index.", column);
return;
}
DataSourceMetadata dataSourceMetadata = dataSource.getDataSourceMetadata();
if (!dataSourceMetadata.isSingleValue()) {
LOGGER.error("Cannot convert multi-valued columns '{}'", column);
return;
}
int totalDocs = segment.getSegmentMetadata().getTotalDocs();
BlockSingleValIterator bvIter = (BlockSingleValIterator) dataSource.getNextBlock().getBlockValueSet().iterator();
FieldSpec.DataType dataType = dataSourceMetadata.getDataType();
int lengthOfLongestEntry = (dataType == FieldSpec.DataType.STRING) ? getLengthOfLongestEntry(bvIter, dictionary) : -1;
SingleValueRawIndexCreator rawIndexCreator = SegmentColumnarIndexCreator.getRawIndexCreatorForColumn(newSegment, column, dataType, totalDocs, lengthOfLongestEntry);
int docId = 0;
bvIter.reset();
while (bvIter.hasNext()) {
int dictId = bvIter.nextIntVal();
Object value = dictionary.get(dictId);
rawIndexCreator.index(docId++, value);
if (docId % 1000000 == 0) {
LOGGER.info("Converted {} records.", docId);
}
}
rawIndexCreator.close();
deleteForwardIndex(newSegment.getParentFile(), column, dataSourceMetadata.isSorted());
}
Aggregations