Search in sources :

Example 1 with HllConfig

use of com.linkedin.pinot.core.startree.hll.HllConfig in project pinot by linkedin.

the class SegmentIndexCreationDriverImpl method populateDefaultDerivedColumnValues.

private void populateDefaultDerivedColumnValues(GenericRow row) throws IOException {
    //add default hll value in each row
    if (createHllIndex) {
        HllConfig hllConfig = config.getHllConfig();
        for (Entry<String, String> entry : hllConfig.getDerivedHllFieldToOriginMap().entrySet()) {
            String derivedFieldName = entry.getKey();
            String originFieldName = entry.getValue();
            row.putField(derivedFieldName, HllUtil.singleValueHllAsString(hllConfig.getHllLog2m(), row.getValue(originFieldName)));
        }
    }
}
Also used : HllConfig(com.linkedin.pinot.core.startree.hll.HllConfig)

Example 2 with HllConfig

use of com.linkedin.pinot.core.startree.hll.HllConfig in project pinot by linkedin.

the class ColumnMetadataTest method testHllIndexRelatedMetadata.

@Test
public void testHllIndexRelatedMetadata() throws Exception {
    SegmentWithHllIndexCreateHelper helper = null;
    try {
        // Build the Segment metadata.
        helper = new SegmentWithHllIndexCreateHelper("testHllIndexRelatedMetadata", getClass().getClassLoader().getResource("data/test_data-sv.avro"), "daysSinceEpoch", TimeUnit.DAYS, "starTreeSegment");
        helper.build(true, new HllConfig(9, new HashSet<String>(Arrays.asList("column7")), "_hllSuffix"));
        // Load segment metadata.
        IndexSegment segment = Loaders.IndexSegment.load(helper.getSegmentDirectory(), ReadMode.mmap);
        SegmentMetadataImpl metadata = (SegmentMetadataImpl) segment.getSegmentMetadata();
        Assert.assertEquals(metadata.getHllLog2m(), 9);
        // Verify Hll Related Info
        StarTreeMetadata starTreeMetadata = metadata.getStarTreeMetadata();
        Assert.assertNotNull(starTreeMetadata);
        ColumnMetadata column = metadata.getColumnMetadataFor("column7_hllSuffix");
        Assert.assertEquals(column.getDerivedMetricType(), MetricFieldSpec.DerivedMetricType.HLL);
        Assert.assertEquals(column.getOriginColumnName(), "column7");
    } finally {
        if (helper != null) {
            helper.cleanTempDir();
        }
    }
}
Also used : StarTreeMetadata(com.linkedin.pinot.common.segment.StarTreeMetadata) IndexSegment(com.linkedin.pinot.core.indexsegment.IndexSegment) HllConfig(com.linkedin.pinot.core.startree.hll.HllConfig) SegmentWithHllIndexCreateHelper(com.linkedin.pinot.core.startree.hll.SegmentWithHllIndexCreateHelper) HashSet(java.util.HashSet) Test(org.testng.annotations.Test)

Example 3 with HllConfig

use of com.linkedin.pinot.core.startree.hll.HllConfig in project pinot by linkedin.

the class SegmentIndexCreationDriverImpl method addDerivedFieldsInSchema.

private void addDerivedFieldsInSchema() {
    if (createHllIndex) {
        Collection<String> columnNames = dataSchema.getColumnNames();
        HllConfig hllConfig = config.getHllConfig();
        for (String derivedFieldName : hllConfig.getDerivedHllFieldToOriginMap().keySet()) {
            if (columnNames.contains(derivedFieldName)) {
                throw new IllegalArgumentException("Cannot add derived field: " + derivedFieldName + " since it already exists in schema.");
            } else {
                dataSchema.addField(new MetricFieldSpec(derivedFieldName, FieldSpec.DataType.STRING, hllConfig.getHllFieldSize(), MetricFieldSpec.DerivedMetricType.HLL));
            }
        }
    }
}
Also used : HllConfig(com.linkedin.pinot.core.startree.hll.HllConfig) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec)

Example 4 with HllConfig

use of com.linkedin.pinot.core.startree.hll.HllConfig in project pinot by linkedin.

the class SegmentIndexCreationDriverImpl method init.

public void init(SegmentGeneratorConfig config, SegmentCreationDataSource dataSource) throws Exception {
    this.config = config;
    this.createStarTree = config.isEnableStarTreeIndex();
    recordReader = dataSource.getRecordReader();
    dataSchema = recordReader.getSchema();
    if (config.getHllConfig() != null) {
        HllConfig hllConfig = config.getHllConfig();
        // generate HLL fields
        if (hllConfig.getColumnsToDeriveHllFields() != null && !hllConfig.getColumnsToDeriveHllFields().isEmpty()) {
            if (!createStarTree) {
                throw new IllegalArgumentException("Derived HLL fields generation will not work if StarTree is not enabled.");
            } else {
                createHllIndex = true;
            }
        }
    // else columnsToDeriveHllFields is null...don't do anything in this case
    // segment seal() will write the log2m value to the metadata
    }
    addDerivedFieldsInSchema();
    extractor = FieldExtractorFactory.getPlainFieldExtractor(dataSchema);
    // Initialize stats collection
    if (!createStarTree) {
        // For star tree, the stats are gathered in buildStarTree()
        segmentStats = dataSource.gatherStats(extractor);
        totalDocs = segmentStats.getTotalDocCount();
        totalRawDocs = segmentStats.getRawDocCount();
        totalAggDocs = segmentStats.getAggregatedDocCount();
    }
    // Initialize index creation
    segmentIndexCreationInfo = new SegmentIndexCreationInfo();
    indexCreationInfoMap = new HashMap<>();
    // Check if has star tree
    indexCreator = new SegmentColumnarIndexCreator();
    // Ensure that the output directory exists
    final File indexDir = new File(config.getOutDir());
    if (!indexDir.exists()) {
        indexDir.mkdirs();
    }
    // Create a temporary directory used in segment creation
    tempIndexDir = new File(indexDir, com.linkedin.pinot.common.utils.FileUtils.getRandomFileName());
    starTreeTempDir = new File(indexDir, com.linkedin.pinot.common.utils.FileUtils.getRandomFileName());
    LOGGER.debug("tempIndexDir:{}", tempIndexDir);
    LOGGER.debug("starTreeTempDir:{}", starTreeTempDir);
}
Also used : SegmentIndexCreationInfo(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationInfo) HllConfig(com.linkedin.pinot.core.startree.hll.HllConfig) File(java.io.File)

Example 5 with HllConfig

use of com.linkedin.pinot.core.startree.hll.HllConfig in project pinot by linkedin.

the class SegmentColumnarIndexCreator method writeMetadata.

void writeMetadata() throws ConfigurationException {
    PropertiesConfiguration properties = new PropertiesConfiguration(new File(file, V1Constants.MetadataKeys.METADATA_FILE_NAME));
    properties.setProperty(SEGMENT_CREATOR_VERSION, config.getCreatorVersion());
    properties.setProperty(SEGMENT_PADDING_CHARACTER, StringEscapeUtils.escapeJava(Character.toString(config.getPaddingCharacter())));
    properties.setProperty(SEGMENT_NAME, segmentName);
    properties.setProperty(TABLE_NAME, config.getTableName());
    properties.setProperty(DIMENSIONS, config.getDimensions());
    properties.setProperty(METRICS, config.getMetrics());
    properties.setProperty(TIME_COLUMN_NAME, config.getTimeColumnName());
    properties.setProperty(TIME_INTERVAL, "not_there");
    properties.setProperty(SEGMENT_TOTAL_RAW_DOCS, String.valueOf(totalRawDocs));
    properties.setProperty(SEGMENT_TOTAL_AGGREGATE_DOCS, String.valueOf(totalAggDocs));
    properties.setProperty(SEGMENT_TOTAL_DOCS, String.valueOf(totalDocs));
    properties.setProperty(STAR_TREE_ENABLED, String.valueOf(config.isEnableStarTreeIndex()));
    properties.setProperty(SEGMENT_TOTAL_ERRORS, String.valueOf(totalErrors));
    properties.setProperty(SEGMENT_TOTAL_NULLS, String.valueOf(totalNulls));
    properties.setProperty(SEGMENT_TOTAL_CONVERSIONS, String.valueOf(totalConversions));
    properties.setProperty(SEGMENT_TOTAL_NULL_COLS, String.valueOf(totalNullCols));
    StarTreeIndexSpec starTreeIndexSpec = config.getStarTreeIndexSpec();
    if (starTreeIndexSpec != null) {
        properties.setProperty(STAR_TREE_SPLIT_ORDER, starTreeIndexSpec.getDimensionsSplitOrder());
        properties.setProperty(STAR_TREE_MAX_LEAF_RECORDS, starTreeIndexSpec.getMaxLeafRecords());
        properties.setProperty(STAR_TREE_SKIP_STAR_NODE_CREATION_FOR_DIMENSIONS, starTreeIndexSpec.getSkipStarNodeCreationForDimensions());
        properties.setProperty(STAR_TREE_SKIP_MATERIALIZATION_CARDINALITY, starTreeIndexSpec.getskipMaterializationCardinalityThreshold());
        properties.setProperty(STAR_TREE_SKIP_MATERIALIZATION_FOR_DIMENSIONS, starTreeIndexSpec.getskipMaterializationForDimensions());
    }
    HllConfig hllConfig = config.getHllConfig();
    Map<String, String> derivedHllFieldToOriginMap = null;
    if (hllConfig != null) {
        properties.setProperty(SEGMENT_HLL_LOG2M, hllConfig.getHllLog2m());
        derivedHllFieldToOriginMap = hllConfig.getDerivedHllFieldToOriginMap();
    }
    String timeColumn = config.getTimeColumnName();
    if (indexCreationInfoMap.get(timeColumn) != null) {
        properties.setProperty(SEGMENT_START_TIME, indexCreationInfoMap.get(timeColumn).getMin());
        properties.setProperty(SEGMENT_END_TIME, indexCreationInfoMap.get(timeColumn).getMax());
        properties.setProperty(TIME_UNIT, config.getSegmentTimeUnit());
    }
    if (config.containsCustomProperty(SEGMENT_START_TIME)) {
        properties.setProperty(SEGMENT_START_TIME, config.getStartTime());
    }
    if (config.containsCustomProperty(SEGMENT_END_TIME)) {
        properties.setProperty(SEGMENT_END_TIME, config.getEndTime());
    }
    if (config.containsCustomProperty(TIME_UNIT)) {
        properties.setProperty(TIME_UNIT, config.getSegmentTimeUnit());
    }
    for (Map.Entry<String, String> entry : config.getCustomProperties().entrySet()) {
        properties.setProperty(entry.getKey(), entry.getValue());
    }
    for (Map.Entry<String, ColumnIndexCreationInfo> entry : indexCreationInfoMap.entrySet()) {
        String column = entry.getKey();
        ColumnIndexCreationInfo columnIndexCreationInfo = entry.getValue();
        SegmentDictionaryCreator dictionaryCreator = dictionaryCreatorMap.get(column);
        int dictionaryElementSize = (dictionaryCreator != null) ? dictionaryCreator.getStringColumnMaxLength() : 0;
        // TODO: after fixing the server-side dependency on HAS_INVERTED_INDEX and deployed, set HAS_INVERTED_INDEX properly
        // The hasInvertedIndex flag in segment metadata is picked up in ColumnMetadata, and will be used during the query
        // plan phase. If it is set to false, then inverted indexes are not used in queries even if they are created via table
        // configs on segment load. So, we set it to true here for now, until we fix the server to update the value inside
        // ColumnMetadata, export information to the query planner that the inverted index available is current and can be used.
        //
        //    boolean hasInvertedIndex = invertedIndexCreatorMap.containsKey();
        boolean hasInvertedIndex = true;
        String hllOriginColumn = null;
        if (derivedHllFieldToOriginMap != null) {
            hllOriginColumn = derivedHllFieldToOriginMap.get(column);
        }
        addColumnMetadataInfo(properties, column, columnIndexCreationInfo, totalDocs, totalRawDocs, totalAggDocs, schema.getFieldSpecFor(column), dictionaryCreatorMap.containsKey(column), dictionaryElementSize, hasInvertedIndex, hllOriginColumn);
    }
    properties.save();
}
Also used : HllConfig(com.linkedin.pinot.core.startree.hll.HllConfig) PropertiesConfiguration(org.apache.commons.configuration.PropertiesConfiguration) StarTreeIndexSpec(com.linkedin.pinot.common.data.StarTreeIndexSpec) ColumnIndexCreationInfo(com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

HllConfig (com.linkedin.pinot.core.startree.hll.HllConfig)5 File (java.io.File)2 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)1 StarTreeIndexSpec (com.linkedin.pinot.common.data.StarTreeIndexSpec)1 StarTreeMetadata (com.linkedin.pinot.common.segment.StarTreeMetadata)1 IndexSegment (com.linkedin.pinot.core.indexsegment.IndexSegment)1 ColumnIndexCreationInfo (com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo)1 SegmentIndexCreationInfo (com.linkedin.pinot.core.segment.creator.SegmentIndexCreationInfo)1 SegmentWithHllIndexCreateHelper (com.linkedin.pinot.core.startree.hll.SegmentWithHllIndexCreateHelper)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 PropertiesConfiguration (org.apache.commons.configuration.PropertiesConfiguration)1 Test (org.testng.annotations.Test)1