Search in sources :

Example 31 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class RealtimeFileBasedReaderTest method testDataSourceWithoutPredicateForMultiValueDimensionColumns.

private void testDataSourceWithoutPredicateForMultiValueDimensionColumns() {
    for (FieldSpec spec : schema.getAllFieldSpecs()) {
        if (!spec.isSingleValueField()) {
            DataSource offlineDS = offlineSegment.getDataSource(spec.getName());
            DataSource realtimeDS = realtimeSegment.getDataSource(spec.getName());
            Block offlineBlock = offlineDS.nextBlock();
            Block realtimeBlock = realtimeDS.nextBlock();
            BlockMetadata offlineMetadata = offlineBlock.getMetadata();
            BlockMetadata realtimeMetadata = realtimeBlock.getMetadata();
            BlockMultiValIterator offlineValIterator = (BlockMultiValIterator) offlineBlock.getBlockValueSet().iterator();
            BlockMultiValIterator realtimeValIterator = (BlockMultiValIterator) realtimeBlock.getBlockValueSet().iterator();
            Assert.assertEquals(offlineSegment.getSegmentMetadata().getTotalDocs(), realtimeSegment.getAggregateDocumentCount());
            while (realtimeValIterator.hasNext()) {
                int[] offlineIds = new int[offlineBlock.getMetadata().getMaxNumberOfMultiValues()];
                int[] realtimeIds = new int[realtimeBlock.getMetadata().getMaxNumberOfMultiValues()];
                int Olen = offlineValIterator.nextIntVal(offlineIds);
                int Rlen = realtimeValIterator.nextIntVal(realtimeIds);
                Assert.assertEquals(Olen, Rlen);
                for (int i = 0; i < Olen; i++) {
                    Assert.assertEquals(offlineMetadata.getDictionary().get(offlineIds[i]), realtimeMetadata.getDictionary().get(realtimeIds[i]));
                }
            }
        }
    }
}
Also used : BlockMultiValIterator(com.linkedin.pinot.core.common.BlockMultiValIterator) BlockMetadata(com.linkedin.pinot.core.common.BlockMetadata) Block(com.linkedin.pinot.core.common.Block) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DataSource(com.linkedin.pinot.core.common.DataSource)

Example 32 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class ThirdeyeAvroUtilsTest method testConstructAvroSchemaFromPinotSchema.

@Test
public void testConstructAvroSchemaFromPinotSchema() throws Exception {
    com.linkedin.pinot.common.data.Schema pinotSchema = new com.linkedin.pinot.common.data.Schema();
    pinotSchema.setSchemaName("test");
    FieldSpec spec = new DimensionFieldSpec("d1", DataType.STRING, true);
    pinotSchema.addField("d1", spec);
    spec = new MetricFieldSpec("m1", DataType.DOUBLE);
    pinotSchema.addField("m1", spec);
    spec = new TimeFieldSpec(new TimeGranularitySpec(DataType.LONG, TimeUnit.HOURS, "t"));
    pinotSchema.addField("t", spec);
    Schema avroSchema = ThirdeyeAvroUtils.constructAvroSchemaFromPinotSchema(pinotSchema);
    String dType = ThirdeyeAvroUtils.getDataTypeForField("d1", avroSchema);
    Assert.assertEquals(dType, "STRING", "Avro schema constructed incorrectly");
    dType = ThirdeyeAvroUtils.getDataTypeForField("m1", avroSchema);
    Assert.assertEquals(dType, "DOUBLE", "Avro schema constructed incorrectly");
    dType = ThirdeyeAvroUtils.getDataTypeForField("t", avroSchema);
    Assert.assertEquals(dType, "LONG", "Avro schema constructed incorrectly");
}
Also used : TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) Schema(org.apache.avro.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Test(org.testng.annotations.Test)

Example 33 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class ThirdeyeAvroUtils method constructAvroSchemaFromPinotSchema.

/**
   * Constructs an avro schema from a pinot schema
   * @param schema
   * @return
   */
public static Schema constructAvroSchemaFromPinotSchema(com.linkedin.pinot.common.data.Schema schema) {
    Schema avroSchema = null;
    RecordBuilder<Schema> recordBuilder = SchemaBuilder.record("record");
    FieldAssembler<Schema> fieldAssembler = recordBuilder.fields();
    for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
        String fieldName = fieldSpec.getName();
        DataType dataType = fieldSpec.getDataType();
        BaseFieldTypeBuilder<Schema> baseFieldTypeBuilder = fieldAssembler.name(fieldName).type().nullable();
        switch(dataType) {
            case BOOLEAN:
                fieldAssembler = baseFieldTypeBuilder.booleanType().noDefault();
                break;
            case DOUBLE:
                fieldAssembler = baseFieldTypeBuilder.doubleType().noDefault();
                break;
            case FLOAT:
                fieldAssembler = baseFieldTypeBuilder.floatType().noDefault();
                break;
            case INT:
                fieldAssembler = baseFieldTypeBuilder.intType().noDefault();
                break;
            case LONG:
                fieldAssembler = baseFieldTypeBuilder.longType().noDefault();
                break;
            case STRING:
                fieldAssembler = baseFieldTypeBuilder.stringType().noDefault();
                break;
            default:
                break;
        }
    }
    avroSchema = fieldAssembler.endRecord();
    LOGGER.info("Avro Schema {}", avroSchema.toString(true));
    return avroSchema;
}
Also used : Schema(org.apache.avro.Schema) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) FieldSpec(com.linkedin.pinot.common.data.FieldSpec)

Example 34 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class ThirdeyePinotSchemaUtils method createSchema.

/**
   * Transforms the thirdeyeConfig to pinot schema
   * Adds default __COUNT metric if not already present
   * Adds additional columns for all dimensions which
   * are wither specified as topk or whitelist
   * and hence have a transformed new column_raw
   * @param thirdeyeConfig
   * @return
   */
public static Schema createSchema(ThirdEyeConfig thirdeyeConfig) {
    Schema schema = new Schema();
    Set<String> transformDimensions = thirdeyeConfig.getTransformDimensions();
    for (DimensionSpec dimensionSpec : thirdeyeConfig.getDimensions()) {
        FieldSpec fieldSpec = new DimensionFieldSpec();
        String dimensionName = dimensionSpec.getName();
        fieldSpec.setName(dimensionName);
        fieldSpec.setDataType(DataType.STRING);
        fieldSpec.setSingleValueField(true);
        schema.addField(dimensionName, fieldSpec);
        if (transformDimensions.contains(dimensionName)) {
            fieldSpec = new DimensionFieldSpec();
            dimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
            fieldSpec.setName(dimensionName);
            fieldSpec.setDataType(DataType.STRING);
            fieldSpec.setSingleValueField(true);
            schema.addField(dimensionName, fieldSpec);
        }
    }
    boolean countIncluded = false;
    for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
        FieldSpec fieldSpec = new MetricFieldSpec();
        String metricName = metricSpec.getName();
        if (metricName.equals(ThirdEyeConstants.AUTO_METRIC_COUNT)) {
            countIncluded = true;
        }
        fieldSpec.setName(metricName);
        fieldSpec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
        fieldSpec.setSingleValueField(true);
        schema.addField(metricName, fieldSpec);
    }
    if (!countIncluded) {
        FieldSpec fieldSpec = new MetricFieldSpec();
        String metricName = ThirdEyeConstants.AUTO_METRIC_COUNT;
        fieldSpec.setName(metricName);
        fieldSpec.setDataType(DataType.LONG);
        fieldSpec.setDefaultNullValue(1);
        schema.addField(metricName, fieldSpec);
    }
    TimeGranularitySpec incoming = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
    TimeGranularitySpec outgoing = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
    schema.addField(thirdeyeConfig.getTime().getColumnName(), new TimeFieldSpec(incoming, outgoing));
    schema.setSchemaName(thirdeyeConfig.getCollection());
    return schema;
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) Schema(com.linkedin.pinot.common.data.Schema) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 35 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class RealtimeTableDataManager method isValid.

/**
   * Validate a schema against the table config for real-time record consumption.
   * Ideally, we should validate these things when schema is added or table is created, but either of these
   * may be changed while the table is already provisioned. For the change to take effect, we need to restart the
   * servers, so  validation at this place is fine.
   *
   * As of now, the following validations are done:
   * 1. Make sure that the sorted column, if specified, is not multi-valued.
   * 2. Validate the schema itself
   *
   * We allow the user to specify multiple sorted columns, but only consider the first one for now.
   * (secondary sort is not yet implemented).
   *
   * If we add more validations, it may make sense to split this method into multiple validation methods.
   * But then, we are trying to figure out all the invalid cases before we return from this method...
   *
   * @param schema
   * @param indexingConfig
   * @return true if schema is valid.
   */
private boolean isValid(Schema schema, IndexingConfig indexingConfig) {
    // 1. Make sure that the sorted column is not a multi-value field.
    List<String> sortedColumns = indexingConfig.getSortedColumn();
    boolean isValid = true;
    if (!sortedColumns.isEmpty()) {
        final String sortedColumn = sortedColumns.get(0);
        if (sortedColumns.size() > 1) {
            LOGGER.warn("More than one sorted column configured. Using {}", sortedColumn);
        }
        FieldSpec fieldSpec = schema.getFieldSpecFor(sortedColumn);
        if (!fieldSpec.isSingleValueField()) {
            LOGGER.error("Cannot configure multi-valued column {} as sorted column", sortedColumn);
            isValid = false;
        }
    }
    // 2. We want to get the schema errors, if any, even if isValid is false;
    if (!schema.validate(LOGGER)) {
        isValid = false;
    }
    return isValid;
}
Also used : FieldSpec(com.linkedin.pinot.common.data.FieldSpec)

Aggregations

FieldSpec (com.linkedin.pinot.common.data.FieldSpec)52 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)28 Test (org.testng.annotations.Test)15 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)14 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)13 File (java.io.File)11 Schema (com.linkedin.pinot.common.data.Schema)10 SegmentDictionaryCreator (com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator)7 HashMap (java.util.HashMap)7 TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)6 AbstractColumnStatisticsCollector (com.linkedin.pinot.core.segment.creator.AbstractColumnStatisticsCollector)6 Random (java.util.Random)5 Block (com.linkedin.pinot.core.common.Block)4 BlockMetadata (com.linkedin.pinot.core.common.BlockMetadata)4 DataSource (com.linkedin.pinot.core.common.DataSource)4 GenericRow (com.linkedin.pinot.core.data.GenericRow)4 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)4 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)4 ArrayList (java.util.ArrayList)4 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)3