use of com.linkedin.pinot.core.startree.hll.HllConfig in project pinot by linkedin.
the class SegmentIndexCreationDriverImpl method populateDefaultDerivedColumnValues.
private void populateDefaultDerivedColumnValues(GenericRow row) throws IOException {
//add default hll value in each row
if (createHllIndex) {
HllConfig hllConfig = config.getHllConfig();
for (Entry<String, String> entry : hllConfig.getDerivedHllFieldToOriginMap().entrySet()) {
String derivedFieldName = entry.getKey();
String originFieldName = entry.getValue();
row.putField(derivedFieldName, HllUtil.singleValueHllAsString(hllConfig.getHllLog2m(), row.getValue(originFieldName)));
}
}
}
use of com.linkedin.pinot.core.startree.hll.HllConfig in project pinot by linkedin.
the class ColumnMetadataTest method testHllIndexRelatedMetadata.
@Test
public void testHllIndexRelatedMetadata() throws Exception {
SegmentWithHllIndexCreateHelper helper = null;
try {
// Build the Segment metadata.
helper = new SegmentWithHllIndexCreateHelper("testHllIndexRelatedMetadata", getClass().getClassLoader().getResource("data/test_data-sv.avro"), "daysSinceEpoch", TimeUnit.DAYS, "starTreeSegment");
helper.build(true, new HllConfig(9, new HashSet<String>(Arrays.asList("column7")), "_hllSuffix"));
// Load segment metadata.
IndexSegment segment = Loaders.IndexSegment.load(helper.getSegmentDirectory(), ReadMode.mmap);
SegmentMetadataImpl metadata = (SegmentMetadataImpl) segment.getSegmentMetadata();
Assert.assertEquals(metadata.getHllLog2m(), 9);
// Verify Hll Related Info
StarTreeMetadata starTreeMetadata = metadata.getStarTreeMetadata();
Assert.assertNotNull(starTreeMetadata);
ColumnMetadata column = metadata.getColumnMetadataFor("column7_hllSuffix");
Assert.assertEquals(column.getDerivedMetricType(), MetricFieldSpec.DerivedMetricType.HLL);
Assert.assertEquals(column.getOriginColumnName(), "column7");
} finally {
if (helper != null) {
helper.cleanTempDir();
}
}
}
use of com.linkedin.pinot.core.startree.hll.HllConfig in project pinot by linkedin.
the class SegmentIndexCreationDriverImpl method addDerivedFieldsInSchema.
private void addDerivedFieldsInSchema() {
if (createHllIndex) {
Collection<String> columnNames = dataSchema.getColumnNames();
HllConfig hllConfig = config.getHllConfig();
for (String derivedFieldName : hllConfig.getDerivedHllFieldToOriginMap().keySet()) {
if (columnNames.contains(derivedFieldName)) {
throw new IllegalArgumentException("Cannot add derived field: " + derivedFieldName + " since it already exists in schema.");
} else {
dataSchema.addField(new MetricFieldSpec(derivedFieldName, FieldSpec.DataType.STRING, hllConfig.getHllFieldSize(), MetricFieldSpec.DerivedMetricType.HLL));
}
}
}
}
use of com.linkedin.pinot.core.startree.hll.HllConfig in project pinot by linkedin.
the class SegmentIndexCreationDriverImpl method init.
public void init(SegmentGeneratorConfig config, SegmentCreationDataSource dataSource) throws Exception {
this.config = config;
this.createStarTree = config.isEnableStarTreeIndex();
recordReader = dataSource.getRecordReader();
dataSchema = recordReader.getSchema();
if (config.getHllConfig() != null) {
HllConfig hllConfig = config.getHllConfig();
// generate HLL fields
if (hllConfig.getColumnsToDeriveHllFields() != null && !hllConfig.getColumnsToDeriveHllFields().isEmpty()) {
if (!createStarTree) {
throw new IllegalArgumentException("Derived HLL fields generation will not work if StarTree is not enabled.");
} else {
createHllIndex = true;
}
}
// else columnsToDeriveHllFields is null...don't do anything in this case
// segment seal() will write the log2m value to the metadata
}
addDerivedFieldsInSchema();
extractor = FieldExtractorFactory.getPlainFieldExtractor(dataSchema);
// Initialize stats collection
if (!createStarTree) {
// For star tree, the stats are gathered in buildStarTree()
segmentStats = dataSource.gatherStats(extractor);
totalDocs = segmentStats.getTotalDocCount();
totalRawDocs = segmentStats.getRawDocCount();
totalAggDocs = segmentStats.getAggregatedDocCount();
}
// Initialize index creation
segmentIndexCreationInfo = new SegmentIndexCreationInfo();
indexCreationInfoMap = new HashMap<>();
// Check if has star tree
indexCreator = new SegmentColumnarIndexCreator();
// Ensure that the output directory exists
final File indexDir = new File(config.getOutDir());
if (!indexDir.exists()) {
indexDir.mkdirs();
}
// Create a temporary directory used in segment creation
tempIndexDir = new File(indexDir, com.linkedin.pinot.common.utils.FileUtils.getRandomFileName());
starTreeTempDir = new File(indexDir, com.linkedin.pinot.common.utils.FileUtils.getRandomFileName());
LOGGER.debug("tempIndexDir:{}", tempIndexDir);
LOGGER.debug("starTreeTempDir:{}", starTreeTempDir);
}
use of com.linkedin.pinot.core.startree.hll.HllConfig in project pinot by linkedin.
the class SegmentColumnarIndexCreator method writeMetadata.
void writeMetadata() throws ConfigurationException {
PropertiesConfiguration properties = new PropertiesConfiguration(new File(file, V1Constants.MetadataKeys.METADATA_FILE_NAME));
properties.setProperty(SEGMENT_CREATOR_VERSION, config.getCreatorVersion());
properties.setProperty(SEGMENT_PADDING_CHARACTER, StringEscapeUtils.escapeJava(Character.toString(config.getPaddingCharacter())));
properties.setProperty(SEGMENT_NAME, segmentName);
properties.setProperty(TABLE_NAME, config.getTableName());
properties.setProperty(DIMENSIONS, config.getDimensions());
properties.setProperty(METRICS, config.getMetrics());
properties.setProperty(TIME_COLUMN_NAME, config.getTimeColumnName());
properties.setProperty(TIME_INTERVAL, "not_there");
properties.setProperty(SEGMENT_TOTAL_RAW_DOCS, String.valueOf(totalRawDocs));
properties.setProperty(SEGMENT_TOTAL_AGGREGATE_DOCS, String.valueOf(totalAggDocs));
properties.setProperty(SEGMENT_TOTAL_DOCS, String.valueOf(totalDocs));
properties.setProperty(STAR_TREE_ENABLED, String.valueOf(config.isEnableStarTreeIndex()));
properties.setProperty(SEGMENT_TOTAL_ERRORS, String.valueOf(totalErrors));
properties.setProperty(SEGMENT_TOTAL_NULLS, String.valueOf(totalNulls));
properties.setProperty(SEGMENT_TOTAL_CONVERSIONS, String.valueOf(totalConversions));
properties.setProperty(SEGMENT_TOTAL_NULL_COLS, String.valueOf(totalNullCols));
StarTreeIndexSpec starTreeIndexSpec = config.getStarTreeIndexSpec();
if (starTreeIndexSpec != null) {
properties.setProperty(STAR_TREE_SPLIT_ORDER, starTreeIndexSpec.getDimensionsSplitOrder());
properties.setProperty(STAR_TREE_MAX_LEAF_RECORDS, starTreeIndexSpec.getMaxLeafRecords());
properties.setProperty(STAR_TREE_SKIP_STAR_NODE_CREATION_FOR_DIMENSIONS, starTreeIndexSpec.getSkipStarNodeCreationForDimensions());
properties.setProperty(STAR_TREE_SKIP_MATERIALIZATION_CARDINALITY, starTreeIndexSpec.getskipMaterializationCardinalityThreshold());
properties.setProperty(STAR_TREE_SKIP_MATERIALIZATION_FOR_DIMENSIONS, starTreeIndexSpec.getskipMaterializationForDimensions());
}
HllConfig hllConfig = config.getHllConfig();
Map<String, String> derivedHllFieldToOriginMap = null;
if (hllConfig != null) {
properties.setProperty(SEGMENT_HLL_LOG2M, hllConfig.getHllLog2m());
derivedHllFieldToOriginMap = hllConfig.getDerivedHllFieldToOriginMap();
}
String timeColumn = config.getTimeColumnName();
if (indexCreationInfoMap.get(timeColumn) != null) {
properties.setProperty(SEGMENT_START_TIME, indexCreationInfoMap.get(timeColumn).getMin());
properties.setProperty(SEGMENT_END_TIME, indexCreationInfoMap.get(timeColumn).getMax());
properties.setProperty(TIME_UNIT, config.getSegmentTimeUnit());
}
if (config.containsCustomProperty(SEGMENT_START_TIME)) {
properties.setProperty(SEGMENT_START_TIME, config.getStartTime());
}
if (config.containsCustomProperty(SEGMENT_END_TIME)) {
properties.setProperty(SEGMENT_END_TIME, config.getEndTime());
}
if (config.containsCustomProperty(TIME_UNIT)) {
properties.setProperty(TIME_UNIT, config.getSegmentTimeUnit());
}
for (Map.Entry<String, String> entry : config.getCustomProperties().entrySet()) {
properties.setProperty(entry.getKey(), entry.getValue());
}
for (Map.Entry<String, ColumnIndexCreationInfo> entry : indexCreationInfoMap.entrySet()) {
String column = entry.getKey();
ColumnIndexCreationInfo columnIndexCreationInfo = entry.getValue();
SegmentDictionaryCreator dictionaryCreator = dictionaryCreatorMap.get(column);
int dictionaryElementSize = (dictionaryCreator != null) ? dictionaryCreator.getStringColumnMaxLength() : 0;
// TODO: after fixing the server-side dependency on HAS_INVERTED_INDEX and deployed, set HAS_INVERTED_INDEX properly
// The hasInvertedIndex flag in segment metadata is picked up in ColumnMetadata, and will be used during the query
// plan phase. If it is set to false, then inverted indexes are not used in queries even if they are created via table
// configs on segment load. So, we set it to true here for now, until we fix the server to update the value inside
// ColumnMetadata, export information to the query planner that the inverted index available is current and can be used.
//
// boolean hasInvertedIndex = invertedIndexCreatorMap.containsKey();
boolean hasInvertedIndex = true;
String hllOriginColumn = null;
if (derivedHllFieldToOriginMap != null) {
hllOriginColumn = derivedHllFieldToOriginMap.get(column);
}
addColumnMetadataInfo(properties, column, columnIndexCreationInfo, totalDocs, totalRawDocs, totalAggDocs, schema.getFieldSpecFor(column), dictionaryCreatorMap.containsKey(column), dictionaryElementSize, hasInvertedIndex, hllOriginColumn);
}
properties.save();
}
Aggregations