Search in sources :

Example 36 with DimensionFieldSpec

use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.

the class DataFetcherTest method setup.

@BeforeClass
private void setup() throws Exception {
    GenericRow[] segmentData = new GenericRow[NUM_ROWS];
    // Generate random dimension and metric values.
    for (int i = 0; i < NUM_ROWS; i++) {
        double randomDouble = _random.nextDouble();
        String randomDoubleString = String.valueOf(randomDouble);
        _dimensionValues[i] = randomDoubleString;
        _intMetricValues[i] = (int) randomDouble;
        _longMetricValues[i] = (long) randomDouble;
        _floatMetricValues[i] = (float) randomDouble;
        _doubleMetricValues[i] = randomDouble;
        HashMap<String, Object> map = new HashMap<>();
        map.put(DIMENSION_NAME, _dimensionValues[i]);
        map.put(INT_METRIC_NAME, _intMetricValues[i]);
        map.put(LONG_METRIC_NAME, _longMetricValues[i]);
        map.put(FLOAT_METRIC_NAME, _floatMetricValues[i]);
        map.put(DOUBLE_METRIC_NAME, _doubleMetricValues[i]);
        map.put(NO_DICT_INT_METRIC_NAME, _intMetricValues[i]);
        map.put(NO_DICT_LONG_METRIC_NAME, _longMetricValues[i]);
        map.put(NO_DICT_FLOAT_METRIC_NAME, _floatMetricValues[i]);
        map.put(NO_DICT_DOUBLE_METRIC_NAME, _doubleMetricValues[i]);
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        segmentData[i] = genericRow;
    }
    // Create an index segment with the random dimension and metric values.
    final Schema schema = new Schema();
    schema.addField(new DimensionFieldSpec(DIMENSION_NAME, FieldSpec.DataType.STRING, true));
    schema.addField(new MetricFieldSpec(INT_METRIC_NAME, FieldSpec.DataType.INT));
    schema.addField(new MetricFieldSpec(LONG_METRIC_NAME, FieldSpec.DataType.LONG));
    schema.addField(new MetricFieldSpec(FLOAT_METRIC_NAME, FieldSpec.DataType.FLOAT));
    schema.addField(new MetricFieldSpec(DOUBLE_METRIC_NAME, FieldSpec.DataType.DOUBLE));
    schema.addField(new MetricFieldSpec(NO_DICT_INT_METRIC_NAME, FieldSpec.DataType.INT));
    schema.addField(new MetricFieldSpec(NO_DICT_LONG_METRIC_NAME, FieldSpec.DataType.LONG));
    schema.addField(new MetricFieldSpec(NO_DICT_FLOAT_METRIC_NAME, FieldSpec.DataType.FLOAT));
    schema.addField(new MetricFieldSpec(NO_DICT_DOUBLE_METRIC_NAME, FieldSpec.DataType.DOUBLE));
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    FileUtils.deleteQuietly(new File(INDEX_DIR_PATH));
    config.setOutDir(INDEX_DIR_PATH);
    config.setSegmentName(SEGMENT_NAME);
    config.setRawIndexCreationColumns(Arrays.asList(NO_DICT_INT_METRIC_NAME, NO_DICT_LONG_METRIC_NAME, NO_DICT_FLOAT_METRIC_NAME, NO_DICT_DOUBLE_METRIC_NAME));
    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    driver.init(config, new TestDataRecordReader(schema, segmentData));
    driver.build();
    IndexSegment indexSegment = Loaders.IndexSegment.load(new File(INDEX_DIR_PATH, SEGMENT_NAME), ReadMode.heap);
    Map<String, BaseOperator> dataSourceMap = new HashMap<>();
    for (String column : indexSegment.getColumnNames()) {
        dataSourceMap.put(column, indexSegment.getDataSource(column));
    }
    // Get a data fetcher for the index segment.
    _dataFetcher = new DataFetcher(dataSourceMap);
}
Also used : BaseOperator(com.linkedin.pinot.core.operator.BaseOperator) HashMap(java.util.HashMap) IndexSegment(com.linkedin.pinot.core.indexsegment.IndexSegment) Schema(com.linkedin.pinot.common.data.Schema) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) GenericRow(com.linkedin.pinot.core.data.GenericRow) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) File(java.io.File) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TestDataRecordReader(com.linkedin.pinot.util.TestDataRecordReader) BeforeClass(org.testng.annotations.BeforeClass)

Example 37 with DimensionFieldSpec

use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.

the class PinotSegmentRecordReaderTest method createPinotSchema.

private Schema createPinotSchema() {
    Schema testSchema = new Schema();
    testSchema.setSchemaName("schema");
    FieldSpec spec;
    spec = new DimensionFieldSpec(D_SV_1, DataType.STRING, true);
    testSchema.addField(spec);
    spec = new DimensionFieldSpec(D_MV_1, DataType.STRING, false);
    testSchema.addField(spec);
    spec = new MetricFieldSpec(M1, DataType.INT);
    testSchema.addField(spec);
    spec = new MetricFieldSpec(M2, DataType.FLOAT);
    testSchema.addField(spec);
    spec = new TimeFieldSpec(new TimeGranularitySpec(DataType.LONG, TimeUnit.HOURS, TIME));
    testSchema.addField(spec);
    return testSchema;
}
Also used : TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 38 with DimensionFieldSpec

use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.

the class SegmentInfoProvider method readOneSegment.

/**
   * Read the metadata of the given segmentFile and collect:
   * - Unique metric columns
   * - Unique single-value dimension columns
   * - Unique values for each single-value dimension columns
   *
   * @param segmentFile segment file.
   * @param uniqueMetrics unique metric columns buffer.
   * @param uniqueSingleValueDimensions unique single-value dimension columns buffer.
   * @param singleValueDimensionValuesMap single-value dimension columns to unique values map buffer.
   * @throws Exception
   */
private void readOneSegment(File segmentFile, Set<String> uniqueMetrics, Set<String> uniqueSingleValueDimensions, Map<String, Set<Object>> singleValueDimensionValuesMap) throws Exception {
    // Get segment directory from segment file (decompress if necessary).
    File segmentDir;
    File tmpDir = null;
    if (segmentFile.isFile()) {
        tmpDir = File.createTempFile(SEGMENT_INFO_PROVIDER, null, new File(TMP_DIR));
        FileUtils.deleteQuietly(tmpDir);
        tmpDir.mkdir();
        TarGzCompressionUtils.unTar(segmentFile, tmpDir);
        segmentDir = tmpDir.listFiles()[0];
    } else {
        segmentDir = segmentFile;
    }
    IndexSegment indexSegment = Loaders.IndexSegment.load(segmentDir, ReadMode.heap);
    Schema schema = indexSegment.getSegmentMetadata().getSchema();
    // Add time column if exists.
    String timeColumn = schema.getTimeColumnName();
    if (timeColumn != null) {
        uniqueSingleValueDimensions.add(timeColumn);
        loadValuesForSingleValueDimension(indexSegment, singleValueDimensionValuesMap, timeColumn);
    }
    // Add all metric columns.
    uniqueMetrics.addAll(schema.getMetricNames());
    // Add all single-value dimension columns.
    for (DimensionFieldSpec fieldSpec : schema.getDimensionFieldSpecs()) {
        if (!fieldSpec.isSingleValueField()) {
            continue;
        }
        String column = fieldSpec.getName();
        uniqueSingleValueDimensions.add(column);
        loadValuesForSingleValueDimension(indexSegment, singleValueDimensionValuesMap, column);
    }
    if (tmpDir != null) {
        FileUtils.deleteQuietly(tmpDir);
    }
}
Also used : IndexSegment(com.linkedin.pinot.core.indexsegment.IndexSegment) Schema(com.linkedin.pinot.common.data.Schema) File(java.io.File) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Aggregations

DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)38 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)27 Schema (com.linkedin.pinot.common.data.Schema)18 Test (org.testng.annotations.Test)17 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)16 File (java.io.File)16 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)13 HashMap (java.util.HashMap)9 GenericRow (com.linkedin.pinot.core.data.GenericRow)7 Random (java.util.Random)7 TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)6 AbstractColumnStatisticsCollector (com.linkedin.pinot.core.segment.creator.AbstractColumnStatisticsCollector)6 SegmentDictionaryCreator (com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator)6 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)5 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)5 ArrayList (java.util.ArrayList)4 FieldType (com.linkedin.pinot.common.data.FieldSpec.FieldType)3 IndexSegment (com.linkedin.pinot.core.indexsegment.IndexSegment)3 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)2 RecordReader (com.linkedin.pinot.core.data.readers.RecordReader)2