Search in sources :

Example 16 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class RealtimeFileBasedReaderTest method testDataSourceWithoutPredicateForSingleValueDimensionColumns.

private void testDataSourceWithoutPredicateForSingleValueDimensionColumns() {
    for (FieldSpec spec : schema.getAllFieldSpecs()) {
        if (spec.isSingleValueField() && spec.getFieldType() == FieldType.DIMENSION) {
            DataSource offlineDS = offlineSegment.getDataSource(spec.getName());
            DataSource realtimeDS = realtimeSegment.getDataSource(spec.getName());
            Block offlineBlock = offlineDS.nextBlock();
            Block realtimeBlock = realtimeDS.nextBlock();
            BlockMetadata offlineMetadata = offlineBlock.getMetadata();
            BlockMetadata realtimeMetadata = realtimeBlock.getMetadata();
            BlockSingleValIterator offlineValIterator = (BlockSingleValIterator) offlineBlock.getBlockValueSet().iterator();
            BlockSingleValIterator realtimeValIterator = (BlockSingleValIterator) realtimeBlock.getBlockValueSet().iterator();
            Assert.assertEquals(offlineSegment.getSegmentMetadata().getTotalDocs(), realtimeSegment.getAggregateDocumentCount());
            while (realtimeValIterator.hasNext()) {
                int offlineDicId = offlineValIterator.nextIntVal();
                int realtimeDicId = realtimeValIterator.nextIntVal();
                try {
                    Assert.assertEquals(offlineMetadata.getDictionary().get(offlineDicId), realtimeMetadata.getDictionary().get(realtimeDicId));
                } catch (AssertionError e) {
                    LOGGER.info("column : {}", spec.getName());
                    LOGGER.info("realtimeDicId : {}, rawValue : {}", realtimeDicId, realtimeMetadata.getDictionary().get(realtimeDicId));
                    LOGGER.info("offlineDicId : {}, rawValue : {}", offlineDicId, offlineMetadata.getDictionary().get(offlineDicId));
                    throw e;
                }
            }
            Assert.assertEquals(offlineValIterator.hasNext(), realtimeValIterator.hasNext());
        }
    }
}
Also used : BlockSingleValIterator(com.linkedin.pinot.core.common.BlockSingleValIterator) BlockMetadata(com.linkedin.pinot.core.common.BlockMetadata) Block(com.linkedin.pinot.core.common.Block) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DataSource(com.linkedin.pinot.core.common.DataSource)

Example 17 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class NoDictionaryGroupKeyGeneratorTest method buildSegment.

/**
   * Helper method to build a segment as follows:
   * <ul>
   *   <li> One string column without dictionary. </li>
   *   <li> One integer column with dictionary. </li>
   * </ul>
   *
   * It also computes the unique group keys while it generates the index.
   *
   * @return Set containing unique group keys from the created segment.
   *
   * @throws Exception
   */
private TestRecordReader buildSegment() throws Exception {
    Schema schema = new Schema();
    for (int i = 0; i < COLUMN_NAMES.length; i++) {
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(COLUMN_NAMES[i], DATA_TYPES[i], true);
        schema.addField(dimensionFieldSpec);
    }
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setRawIndexCreationColumns(Arrays.asList(NO_DICT_COLUMN_NAMES));
    config.setOutDir(SEGMENT_DIR_NAME);
    config.setSegmentName(SEGMENT_NAME);
    Random random = new Random();
    List<GenericRow> rows = new ArrayList<>(NUM_ROWS);
    for (int i = 0; i < NUM_ROWS; i++) {
        Map<String, Object> map = new HashMap<>(NUM_COLUMNS);
        for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
            String column = fieldSpec.getName();
            FieldSpec.DataType dataType = fieldSpec.getDataType();
            switch(dataType) {
                case INT:
                    map.put(column, random.nextInt());
                    break;
                case LONG:
                    map.put(column, random.nextLong());
                    break;
                case FLOAT:
                    map.put(column, random.nextFloat());
                    break;
                case DOUBLE:
                    map.put(column, random.nextDouble());
                    break;
                case STRING:
                    map.put(column, "value_" + i);
                    break;
                default:
                    throw new IllegalArgumentException("Illegal data type specified: " + dataType);
            }
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        rows.add(genericRow);
    }
    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    _recordReader = new TestRecordReader(rows, schema);
    driver.init(config, _recordReader);
    driver.build();
    return _recordReader;
}
Also used : TestRecordReader(com.linkedin.pinot.core.data.readers.TestRecordReader) HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) ArrayList(java.util.ArrayList) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) GenericRow(com.linkedin.pinot.core.data.GenericRow) Random(java.util.Random) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 18 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class HybridClusterIntegrationTest method makeSortedColumn.

/**
   * Pick one column at random (or null) out of dimensions and return it as a sorted column.
   * @note: Change this method to return a specific sorted column (or null) to debug failed tests
   *
   * @return sorted column name or null if none is to be used for this run.
   */
protected String makeSortedColumn() {
    List<String> dimensions = schema.getDimensionNames();
    final int nDimensions = dimensions.size();
    int ntries = nDimensions;
    int rand = random.nextInt();
    if (rand % 5 == 0) {
        // Return no sorted column 20% of the time
        return null;
    }
    while (ntries-- > 0) {
        int dimPos = random.nextInt(dimensions.size() + 1);
        if (dimPos == nDimensions) {
            continue;
        }
        String sortedColumn = dimensions.get(dimPos);
        FieldSpec fieldSpec = schema.getFieldSpecFor(sortedColumn);
        if (fieldSpec.isSingleValueField()) {
            return sortedColumn;
        }
    }
    return null;
}
Also used : FieldSpec(com.linkedin.pinot.common.data.FieldSpec)

Example 19 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class DictionariesTest method testIntColumnPreIndexStatsCollector.

@Test
public void testIntColumnPreIndexStatsCollector() throws Exception {
    FieldSpec spec = new DimensionFieldSpec("column1", DataType.INT, true);
    AbstractColumnStatisticsCollector statsCollector = new IntColumnPreIndexStatsCollector(spec);
    statsCollector.collect(new Integer(1));
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect(new Float(2));
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect(new Long(3));
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect(new Double(4));
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect(new Integer(4));
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect(new Float(2));
    Assert.assertFalse(statsCollector.isSorted());
    statsCollector.collect(new Double(40));
    Assert.assertFalse(statsCollector.isSorted());
    statsCollector.collect(new Double(20));
    Assert.assertFalse(statsCollector.isSorted());
    statsCollector.seal();
    Assert.assertEquals(statsCollector.getCardinality(), 6);
    Assert.assertEquals(((Number) statsCollector.getMinValue()).intValue(), 1);
    Assert.assertEquals(((Number) statsCollector.getMaxValue()).intValue(), 40);
    Assert.assertFalse(statsCollector.isSorted());
}
Also used : IntColumnPreIndexStatsCollector(com.linkedin.pinot.core.segment.creator.impl.stats.IntColumnPreIndexStatsCollector) AbstractColumnStatisticsCollector(com.linkedin.pinot.core.segment.creator.AbstractColumnStatisticsCollector) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Test(org.testng.annotations.Test)

Example 20 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class DictionariesTest method testStringColumnPreIndexStatsCollectorForBoolean.

@Test
public void testStringColumnPreIndexStatsCollectorForBoolean() throws Exception {
    FieldSpec spec = new DimensionFieldSpec("column1", DataType.BOOLEAN, true);
    AbstractColumnStatisticsCollector statsCollector = new StringColumnPreIndexStatsCollector(spec);
    statsCollector.collect("false");
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect("false");
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect("false");
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect("true");
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect("true");
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect("false");
    Assert.assertFalse(statsCollector.isSorted());
    statsCollector.collect("false");
    Assert.assertFalse(statsCollector.isSorted());
    statsCollector.collect("true");
    Assert.assertFalse(statsCollector.isSorted());
    statsCollector.seal();
    Assert.assertEquals(statsCollector.getCardinality(), 2);
    Assert.assertEquals((statsCollector.getMinValue()).toString(), "false");
    Assert.assertEquals((statsCollector.getMaxValue()).toString(), "true");
    Assert.assertFalse(statsCollector.isSorted());
}
Also used : StringColumnPreIndexStatsCollector(com.linkedin.pinot.core.segment.creator.impl.stats.StringColumnPreIndexStatsCollector) AbstractColumnStatisticsCollector(com.linkedin.pinot.core.segment.creator.AbstractColumnStatisticsCollector) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Test(org.testng.annotations.Test)

Aggregations

FieldSpec (com.linkedin.pinot.common.data.FieldSpec)52 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)28 Test (org.testng.annotations.Test)15 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)14 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)13 File (java.io.File)11 Schema (com.linkedin.pinot.common.data.Schema)10 SegmentDictionaryCreator (com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator)7 HashMap (java.util.HashMap)7 TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)6 AbstractColumnStatisticsCollector (com.linkedin.pinot.core.segment.creator.AbstractColumnStatisticsCollector)6 Random (java.util.Random)5 Block (com.linkedin.pinot.core.common.Block)4 BlockMetadata (com.linkedin.pinot.core.common.BlockMetadata)4 DataSource (com.linkedin.pinot.core.common.DataSource)4 GenericRow (com.linkedin.pinot.core.data.GenericRow)4 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)4 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)4 ArrayList (java.util.ArrayList)4 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)3