Search in sources :

Example 1 with SingleValueRawIndexCreator

use of com.linkedin.pinot.core.segment.creator.SingleValueRawIndexCreator in project pinot by linkedin.

the class DictionaryToRawIndexConverter method convertOneColumn.

/**
   * Helper method to perform conversion for the specific column.
   *
   * @param segment Input segment to convert
   * @param column Column to convert
   * @param newSegment Directory where raw index to be written
   * @throws IOException
   */
private void convertOneColumn(IndexSegment segment, String column, File newSegment) throws IOException {
    DataSource dataSource = segment.getDataSource(column);
    Dictionary dictionary = dataSource.getDictionary();
    if (dictionary == null) {
        LOGGER.error("Column '{}' does not have dictionary, cannot convert to raw index.", column);
        return;
    }
    DataSourceMetadata dataSourceMetadata = dataSource.getDataSourceMetadata();
    if (!dataSourceMetadata.isSingleValue()) {
        LOGGER.error("Cannot convert multi-valued columns '{}'", column);
        return;
    }
    int totalDocs = segment.getSegmentMetadata().getTotalDocs();
    BlockSingleValIterator bvIter = (BlockSingleValIterator) dataSource.getNextBlock().getBlockValueSet().iterator();
    FieldSpec.DataType dataType = dataSourceMetadata.getDataType();
    int lengthOfLongestEntry = (dataType == FieldSpec.DataType.STRING) ? getLengthOfLongestEntry(bvIter, dictionary) : -1;
    SingleValueRawIndexCreator rawIndexCreator = SegmentColumnarIndexCreator.getRawIndexCreatorForColumn(newSegment, column, dataType, totalDocs, lengthOfLongestEntry);
    int docId = 0;
    bvIter.reset();
    while (bvIter.hasNext()) {
        int dictId = bvIter.nextIntVal();
        Object value = dictionary.get(dictId);
        rawIndexCreator.index(docId++, value);
        if (docId % 1000000 == 0) {
            LOGGER.info("Converted {} records.", docId);
        }
    }
    rawIndexCreator.close();
    deleteForwardIndex(newSegment.getParentFile(), column, dataSourceMetadata.isSorted());
}
Also used : Dictionary(com.linkedin.pinot.core.segment.index.readers.Dictionary) SingleValueRawIndexCreator(com.linkedin.pinot.core.segment.creator.SingleValueRawIndexCreator) DataSourceMetadata(com.linkedin.pinot.core.common.DataSourceMetadata) BlockSingleValIterator(com.linkedin.pinot.core.common.BlockSingleValIterator) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DataSource(com.linkedin.pinot.core.common.DataSource)

Aggregations

FieldSpec (com.linkedin.pinot.common.data.FieldSpec)1 BlockSingleValIterator (com.linkedin.pinot.core.common.BlockSingleValIterator)1 DataSource (com.linkedin.pinot.core.common.DataSource)1 DataSourceMetadata (com.linkedin.pinot.core.common.DataSourceMetadata)1 SingleValueRawIndexCreator (com.linkedin.pinot.core.segment.creator.SingleValueRawIndexCreator)1 Dictionary (com.linkedin.pinot.core.segment.index.readers.Dictionary)1