Search in sources :

Example 16 with DimensionFieldSpec

use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.

the class DefaultGroupKeyGeneratorTest method setup.

@BeforeClass
private void setup() throws Exception {
    GenericRow[] segmentData = new GenericRow[NUM_ROWS];
    int value = _random.nextInt(MAX_STEP_LENGTH);
    // Generate random values for the segment.
    for (int i = 0; i < UNIQUE_ROWS; i++) {
        Map<String, Object> map = new HashMap<>();
        for (String singleValueColumn : SINGLE_VALUE_COLUMNS) {
            map.put(singleValueColumn, value);
            value += 1 + _random.nextInt(MAX_STEP_LENGTH);
        }
        for (String multiValueColumn : MULTI_VALUE_COLUMNS) {
            int numMultiValues = 1 + _random.nextInt(MAX_NUM_MULTI_VALUES);
            Integer[] values = new Integer[numMultiValues];
            for (int k = 0; k < numMultiValues; k++) {
                values[k] = value;
                value += 1 + _random.nextInt(MAX_STEP_LENGTH);
            }
            map.put(multiValueColumn, values);
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        segmentData[i] = genericRow;
    }
    for (int i = UNIQUE_ROWS; i < NUM_ROWS; i += UNIQUE_ROWS) {
        System.arraycopy(segmentData, 0, segmentData, i, UNIQUE_ROWS);
    }
    // Create an index segment with the random values.
    Schema schema = new Schema();
    for (String singleValueColumn : SINGLE_VALUE_COLUMNS) {
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(singleValueColumn, FieldSpec.DataType.INT, true);
        schema.addField(dimensionFieldSpec);
    }
    for (String multiValueColumn : MULTI_VALUE_COLUMNS) {
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(multiValueColumn, FieldSpec.DataType.INT, false);
        schema.addField(dimensionFieldSpec);
    }
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    FileUtils.deleteQuietly(new File(INDEX_DIR_PATH));
    config.setOutDir(INDEX_DIR_PATH);
    config.setSegmentName(SEGMENT_NAME);
    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    driver.init(config, new TestDataRecordReader(schema, segmentData));
    driver.build();
    IndexSegment indexSegment = Loaders.IndexSegment.load(new File(INDEX_DIR_PATH, SEGMENT_NAME), ReadMode.heap);
    // Get a data fetcher for the index segment.
    Map<String, BaseOperator> dataSourceMap = new HashMap<>();
    Map<String, Block> blockMap = new HashMap<>();
    for (String column : indexSegment.getColumnNames()) {
        DataSource dataSource = indexSegment.getDataSource(column);
        dataSourceMap.put(column, dataSource);
        blockMap.put(column, dataSource.getNextBlock());
    }
    // Generate a random test doc id set.
    int num1 = _random.nextInt(50);
    int num2 = num1 + 1 + _random.nextInt(50);
    for (int i = 0; i < 20; i += 2) {
        _testDocIdSet[i] = num1 + 50 * i;
        _testDocIdSet[i + 1] = num2 + 50 * i;
    }
    DataFetcher dataFetcher = new DataFetcher(dataSourceMap);
    DocIdSetBlock docIdSetBlock = new DocIdSetBlock(_testDocIdSet, _testDocIdSet.length);
    ProjectionBlock projectionBlock = new ProjectionBlock(blockMap, new DataBlockCache(dataFetcher), docIdSetBlock);
    _transformBlock = new TransformBlock(projectionBlock, new HashMap<String, BlockValSet>());
}
Also used : BaseOperator(com.linkedin.pinot.core.operator.BaseOperator) HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) GenericRow(com.linkedin.pinot.core.data.GenericRow) ProjectionBlock(com.linkedin.pinot.core.operator.blocks.ProjectionBlock) TransformBlock(com.linkedin.pinot.core.operator.blocks.TransformBlock) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) IndexSegment(com.linkedin.pinot.core.indexsegment.IndexSegment) DocIdSetBlock(com.linkedin.pinot.core.operator.blocks.DocIdSetBlock) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) DataSource(com.linkedin.pinot.core.common.DataSource) Block(com.linkedin.pinot.core.common.Block) DocIdSetBlock(com.linkedin.pinot.core.operator.blocks.DocIdSetBlock) TransformBlock(com.linkedin.pinot.core.operator.blocks.TransformBlock) ProjectionBlock(com.linkedin.pinot.core.operator.blocks.ProjectionBlock) DataFetcher(com.linkedin.pinot.core.common.DataFetcher) DataBlockCache(com.linkedin.pinot.core.common.DataBlockCache) File(java.io.File) TestDataRecordReader(com.linkedin.pinot.util.TestDataRecordReader) BeforeClass(org.testng.annotations.BeforeClass)

Example 17 with DimensionFieldSpec

use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.

the class TestOffheapStarTreeBuilder method testRandom.

@Test
public void testRandom() throws Exception {
    int ROWS = 100;
    int numDimensions = 6;
    int numMetrics = 6;
    StarTreeBuilderConfig builderConfig = new StarTreeBuilderConfig();
    Schema schema = new Schema();
    builderConfig.dimensionsSplitOrder = new ArrayList<>();
    for (int i = 0; i < numDimensions; i++) {
        String dimName = "d" + (i + 1);
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(dimName, DataType.INT, true);
        schema.addField(dimensionFieldSpec);
        builderConfig.dimensionsSplitOrder.add(dimName);
    }
    schema.setTimeFieldSpec(new TimeFieldSpec("daysSinceEpoch", DataType.INT, TimeUnit.DAYS));
    for (int i = 0; i < numMetrics; i++) {
        String metricName = "n" + (i + 1);
        MetricFieldSpec metricFieldSpec = new MetricFieldSpec(metricName, DataType.INT);
        schema.addField(metricFieldSpec);
    }
    builderConfig.maxLeafRecords = 10;
    builderConfig.schema = schema;
    builderConfig.outDir = new File("/tmp/startree");
    OffHeapStarTreeBuilder builder = new OffHeapStarTreeBuilder();
    builder.init(builderConfig);
    Random r = new Random();
    HashMap<String, Object> map = new HashMap<>();
    for (int row = 0; row < ROWS; row++) {
        for (int i = 0; i < numDimensions; i++) {
            String dimName = schema.getDimensionFieldSpecs().get(i).getName();
            map.put(dimName, dimName + "-v" + r.nextInt((numDimensions - i + 2)));
        }
        //time
        map.put("daysSinceEpoch", r.nextInt(1000));
        for (int i = 0; i < numMetrics; i++) {
            String metName = schema.getMetricFieldSpecs().get(i).getName();
            map.put(metName, r.nextInt((numDimensions - i + 2)));
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        builder.append(genericRow);
    }
    builder.build();
    FileUtils.deleteDirectory(builderConfig.outDir);
}
Also used : HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) GenericRow(com.linkedin.pinot.core.data.GenericRow) Random(java.util.Random) File(java.io.File) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Test(org.testng.annotations.Test)

Example 18 with DimensionFieldSpec

use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.

the class TestOffheapStarTreeBuilder method testSimpleCore.

private void testSimpleCore(int numDimensions, int numMetrics, int numSkipMaterializationDimensions) throws Exception {
    int ROWS = (int) MathUtils.factorial(numDimensions);
    StarTreeBuilderConfig builderConfig = new StarTreeBuilderConfig();
    Schema schema = new Schema();
    builderConfig.dimensionsSplitOrder = new ArrayList<>();
    builderConfig.setSkipMaterializationForDimensions(new HashSet<String>());
    Set<String> skipMaterializationForDimensions = builderConfig.getSkipMaterializationForDimensions();
    for (int i = 0; i < numDimensions; i++) {
        String dimName = "d" + (i + 1);
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(dimName, DataType.STRING, true);
        schema.addField(dimensionFieldSpec);
        if (i < (numDimensions - numSkipMaterializationDimensions)) {
            builderConfig.dimensionsSplitOrder.add(dimName);
        } else {
            builderConfig.getSkipMaterializationForDimensions().add(dimName);
        }
    }
    schema.setTimeFieldSpec(new TimeFieldSpec("daysSinceEpoch", DataType.INT, TimeUnit.DAYS));
    for (int i = 0; i < numMetrics; i++) {
        String metricName = "m" + (i + 1);
        MetricFieldSpec metricFieldSpec = new MetricFieldSpec(metricName, DataType.INT);
        schema.addField(metricFieldSpec);
    }
    builderConfig.maxLeafRecords = 10;
    builderConfig.schema = schema;
    builderConfig.outDir = new File("/tmp/startree");
    OffHeapStarTreeBuilder builder = new OffHeapStarTreeBuilder();
    builder.init(builderConfig);
    HashMap<String, Object> map = new HashMap<>();
    for (int row = 0; row < ROWS; row++) {
        for (int i = 0; i < numDimensions; i++) {
            String dimName = schema.getDimensionFieldSpecs().get(i).getName();
            map.put(dimName, dimName + "-v" + row % (numDimensions - i));
        }
        //time
        map.put("daysSinceEpoch", 1);
        for (int i = 0; i < numMetrics; i++) {
            String metName = schema.getMetricFieldSpecs().get(i).getName();
            map.put(metName, 1);
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        builder.append(genericRow);
    }
    builder.build();
    int totalDocs = builder.getTotalRawDocumentCount() + builder.getTotalAggregateDocumentCount();
    Iterator<GenericRow> iterator = builder.iterator(0, totalDocs);
    while (iterator.hasNext()) {
        GenericRow row = iterator.next();
    //      System.out.println(row);
    }
    iterator = builder.iterator(builder.getTotalRawDocumentCount(), totalDocs);
    while (iterator.hasNext()) {
        GenericRow row = iterator.next();
        for (String skipDimension : skipMaterializationForDimensions) {
            String rowValue = (String) row.getValue(skipDimension);
            assert (rowValue.equals("ALL"));
        }
    }
    FileUtils.deleteDirectory(builderConfig.outDir);
}
Also used : HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) GenericRow(com.linkedin.pinot.core.data.GenericRow) File(java.io.File) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 19 with DimensionFieldSpec

use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.

the class MultiValueDictionaryTest method testMultiValueIndexing.

public void testMultiValueIndexing(final long seed) throws Exception {
    final FieldSpec mvIntFs = new DimensionFieldSpec(COL_NAME, FieldSpec.DataType.LONG, false);
    final LongMutableDictionary dict = new LongMutableDictionary(COL_NAME);
    final FixedByteSingleColumnMultiValueReaderWriter indexer = new FixedByteSingleColumnMultiValueReaderWriter(NROWS, Integer.SIZE / 8, MAX_N_VALUES, 2);
    // Insert rows into the indexer and dictionary
    Random random = new Random(seed);
    for (int row = 0; row < NROWS; row++) {
        int nValues = Math.abs(random.nextInt()) % MAX_N_VALUES;
        Long[] val = new Long[nValues];
        for (int i = 0; i < nValues; i++) {
            val[i] = random.nextLong();
        }
        dict.index(val);
        int[] dictIds = new int[nValues];
        for (int i = 0; i < nValues; i++) {
            dictIds[i] = dict.indexOf(val[i]);
        }
        indexer.setIntArray(row, dictIds);
    }
    // Read back rows and make sure that the values are good.
    random = new Random(seed);
    final int[] dictIds = new int[MAX_N_VALUES];
    for (int row = 0; row < NROWS; row++) {
        int nValues = indexer.getIntArray(row, dictIds);
        Assert.assertEquals(nValues, Math.abs(random.nextInt()) % MAX_N_VALUES, "Mismatching number of values, random seed is: " + seed);
        for (int i = 0; i < nValues; i++) {
            Long val = dict.getLongValue(dictIds[i]);
            Assert.assertEquals(val.longValue(), random.nextLong(), "Value mismatch at row " + row + ", random seed is: " + seed);
        }
    }
}
Also used : FixedByteSingleColumnMultiValueReaderWriter(com.linkedin.pinot.core.io.readerwriter.impl.FixedByteSingleColumnMultiValueReaderWriter) Random(java.util.Random) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 20 with DimensionFieldSpec

use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.

the class SegmentWithHllIndexCreateHelper method printSchema.

private static void printSchema(Schema schema) {
    LOGGER.info("schemaName: {}", schema.getSchemaName());
    LOGGER.info("Dimension columnNames: ");
    int i = 0;
    for (DimensionFieldSpec spec : schema.getDimensionFieldSpecs()) {
        String columnInfo = i + " " + spec.getName();
        if (!spec.isSingleValueField()) {
            LOGGER.info(columnInfo + " Multi-Value.");
        } else {
            LOGGER.info(columnInfo);
        }
        i += 1;
    }
    LOGGER.info("Metric columnNames: ");
    i = 0;
    for (MetricFieldSpec spec : schema.getMetricFieldSpecs()) {
        String columnInfo = i + " " + spec.getName();
        if (!spec.isSingleValueField()) {
            LOGGER.info(columnInfo + " Multi-Value.");
        } else {
            LOGGER.info(columnInfo);
        }
        i += 1;
    }
    LOGGER.info("Time column: {}", schema.getTimeColumnName());
}
Also used : MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Aggregations

DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)38 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)27 Schema (com.linkedin.pinot.common.data.Schema)18 Test (org.testng.annotations.Test)17 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)16 File (java.io.File)16 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)13 HashMap (java.util.HashMap)9 GenericRow (com.linkedin.pinot.core.data.GenericRow)7 Random (java.util.Random)7 TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)6 AbstractColumnStatisticsCollector (com.linkedin.pinot.core.segment.creator.AbstractColumnStatisticsCollector)6 SegmentDictionaryCreator (com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator)6 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)5 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)5 ArrayList (java.util.ArrayList)4 FieldType (com.linkedin.pinot.common.data.FieldSpec.FieldType)3 IndexSegment (com.linkedin.pinot.core.indexsegment.IndexSegment)3 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)2 RecordReader (com.linkedin.pinot.core.data.readers.RecordReader)2