Search in sources :

Example 11 with TimeGranularitySpec

use of com.linkedin.pinot.common.data.TimeGranularitySpec in project pinot by linkedin.

the class AutoloadPinotMetricsServiceTest method testAddNewDataset.

@Test
public void testAddNewDataset() throws Exception {
    testAutoLoadPinotMetricsService = new AutoLoadPinotMetricsService();
    schema = Schema.fromInputSteam(ClassLoader.getSystemResourceAsStream("sample-pinot-schema.json"));
    testAutoLoadPinotMetricsService.addPinotDataset(dataset, schema, datasetConfig);
    Assert.assertEquals(datasetConfigDAO.findAll().size(), 1);
    datasetConfig = datasetConfigDAO.findByDataset(dataset);
    Assert.assertEquals(datasetConfig.getDataset(), dataset);
    Assert.assertEquals(datasetConfig.getDimensions(), schema.getDimensionNames());
    Assert.assertEquals(datasetConfig.getTimeColumn(), schema.getTimeColumnName());
    TimeGranularitySpec timeGranularitySpec = schema.getTimeFieldSpec().getOutgoingGranularitySpec();
    Assert.assertEquals(datasetConfig.getTimeUnit(), timeGranularitySpec.getTimeType());
    Assert.assertEquals(datasetConfig.getTimeDuration(), new Integer(timeGranularitySpec.getTimeUnitSize()));
    Assert.assertEquals(datasetConfig.getTimeFormat(), timeGranularitySpec.getTimeFormat());
    Assert.assertEquals(datasetConfig.getTimezone(), "UTC");
    List<MetricConfigDTO> metricConfigs = metricConfigDAO.findByDataset(dataset);
    List<String> schemaMetricNames = schema.getMetricNames();
    List<Long> metricIds = new ArrayList<>();
    Assert.assertEquals(metricConfigs.size(), schemaMetricNames.size());
    for (MetricConfigDTO metricConfig : metricConfigs) {
        Assert.assertTrue(schemaMetricNames.contains(metricConfig.getName()));
        metricIds.add(metricConfig.getId());
    }
    DashboardConfigDTO dashboardConfig = dashboardConfigDAO.findByName(DashboardConfigBean.DEFAULT_DASHBOARD_PREFIX + dataset);
    Assert.assertEquals(dashboardConfig.getMetricIds(), metricIds);
}
Also used : TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) MetricConfigDTO(com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO) ArrayList(java.util.ArrayList) DashboardConfigDTO(com.linkedin.thirdeye.datalayer.dto.DashboardConfigDTO) Test(org.testng.annotations.Test)

Example 12 with TimeGranularitySpec

use of com.linkedin.pinot.common.data.TimeGranularitySpec in project pinot by linkedin.

the class ThirdeyePinotSchemaUtils method createSchema.

/**
   * Transforms the thirdeyeConfig to pinot schema
   * Adds default __COUNT metric if not already present
   * Adds additional columns for all dimensions which
   * are wither specified as topk or whitelist
   * and hence have a transformed new column_raw
   * @param thirdeyeConfig
   * @return
   */
public static Schema createSchema(ThirdEyeConfig thirdeyeConfig) {
    Schema schema = new Schema();
    Set<String> transformDimensions = thirdeyeConfig.getTransformDimensions();
    for (DimensionSpec dimensionSpec : thirdeyeConfig.getDimensions()) {
        FieldSpec fieldSpec = new DimensionFieldSpec();
        String dimensionName = dimensionSpec.getName();
        fieldSpec.setName(dimensionName);
        fieldSpec.setDataType(DataType.STRING);
        fieldSpec.setSingleValueField(true);
        schema.addField(dimensionName, fieldSpec);
        if (transformDimensions.contains(dimensionName)) {
            fieldSpec = new DimensionFieldSpec();
            dimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
            fieldSpec.setName(dimensionName);
            fieldSpec.setDataType(DataType.STRING);
            fieldSpec.setSingleValueField(true);
            schema.addField(dimensionName, fieldSpec);
        }
    }
    boolean countIncluded = false;
    for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
        FieldSpec fieldSpec = new MetricFieldSpec();
        String metricName = metricSpec.getName();
        if (metricName.equals(ThirdEyeConstants.AUTO_METRIC_COUNT)) {
            countIncluded = true;
        }
        fieldSpec.setName(metricName);
        fieldSpec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
        fieldSpec.setSingleValueField(true);
        schema.addField(metricName, fieldSpec);
    }
    if (!countIncluded) {
        FieldSpec fieldSpec = new MetricFieldSpec();
        String metricName = ThirdEyeConstants.AUTO_METRIC_COUNT;
        fieldSpec.setName(metricName);
        fieldSpec.setDataType(DataType.LONG);
        fieldSpec.setDefaultNullValue(1);
        schema.addField(metricName, fieldSpec);
    }
    TimeGranularitySpec incoming = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
    TimeGranularitySpec outgoing = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
    schema.addField(thirdeyeConfig.getTime().getColumnName(), new TimeFieldSpec(incoming, outgoing));
    schema.setSchemaName(thirdeyeConfig.getCollection());
    return schema;
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) Schema(com.linkedin.pinot.common.data.Schema) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 13 with TimeGranularitySpec

use of com.linkedin.pinot.common.data.TimeGranularitySpec in project pinot by linkedin.

the class PinotSegmentRecordReader method getSchema.

@Override
public Schema getSchema() {
    Schema schema = new Schema();
    schema.setSchemaName(segmentMetadata.getName());
    for (String column : columns) {
        ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
        String columnName = columnMetadata.getColumnName();
        DataType dataType = columnMetadata.getDataType();
        FieldType fieldType = columnMetadata.getFieldType();
        FieldSpec fieldSpec = null;
        switch(fieldType) {
            case DIMENSION:
                boolean isSingleValue = columnMetadata.isSingleValue();
                fieldSpec = new DimensionFieldSpec(columnName, dataType, isSingleValue);
                break;
            case METRIC:
                fieldSpec = new MetricFieldSpec(columnName, dataType);
                break;
            case TIME:
                TimeUnit timeType = columnMetadata.getTimeUnit();
                TimeGranularitySpec incomingGranularitySpec = new TimeGranularitySpec(dataType, timeType, columnName);
                fieldSpec = new TimeFieldSpec(incomingGranularitySpec);
                break;
            default:
                break;
        }
        schema.addField(fieldSpec);
    }
    return schema;
}
Also used : TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) TimeUnit(java.util.concurrent.TimeUnit) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType)

Example 14 with TimeGranularitySpec

use of com.linkedin.pinot.common.data.TimeGranularitySpec in project pinot by linkedin.

the class SegmentTestUtils method extractSchemaFromAvro.

public static Schema extractSchemaFromAvro(File avroFile, Map<String, FieldType> fieldTypeMap, TimeUnit granularity) throws IOException {
    DataFileStream<GenericRecord> dataStream = new DataFileStream<>(new FileInputStream(avroFile), new GenericDatumReader<GenericRecord>());
    Schema schema = new Schema();
    for (final Field field : dataStream.getSchema().getFields()) {
        final String columnName = field.name();
        FieldType fieldType = fieldTypeMap.get(columnName);
        Preconditions.checkNotNull(fieldType);
        switch(fieldType) {
            case TIME:
                final TimeGranularitySpec gSpec = new TimeGranularitySpec(getColumnType(field), granularity, columnName);
                final TimeFieldSpec fSpec = new TimeFieldSpec(gSpec);
                schema.addField(fSpec);
                continue;
            case DIMENSION:
                final FieldSpec dimensionFieldSpec = new DimensionFieldSpec(columnName, getColumnType(field), isSingleValueField(field));
                schema.addField(dimensionFieldSpec);
                continue;
            case METRIC:
                final FieldSpec metricFieldSpec = new MetricFieldSpec(columnName, getColumnType(field));
                schema.addField(metricFieldSpec);
                continue;
            default:
                throw new UnsupportedOperationException("Unsupported field type: " + fieldType);
        }
    }
    dataStream.close();
    return schema;
}
Also used : TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) DataFileStream(org.apache.avro.file.DataFileStream) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) FileInputStream(java.io.FileInputStream) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType) Field(org.apache.avro.Schema.Field) GenericRecord(org.apache.avro.generic.GenericRecord) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 15 with TimeGranularitySpec

use of com.linkedin.pinot.common.data.TimeGranularitySpec in project pinot by linkedin.

the class ThirdeyeAvroUtilsTest method testConstructAvroSchemaFromPinotSchema.

@Test
public void testConstructAvroSchemaFromPinotSchema() throws Exception {
    com.linkedin.pinot.common.data.Schema pinotSchema = new com.linkedin.pinot.common.data.Schema();
    pinotSchema.setSchemaName("test");
    FieldSpec spec = new DimensionFieldSpec("d1", DataType.STRING, true);
    pinotSchema.addField("d1", spec);
    spec = new MetricFieldSpec("m1", DataType.DOUBLE);
    pinotSchema.addField("m1", spec);
    spec = new TimeFieldSpec(new TimeGranularitySpec(DataType.LONG, TimeUnit.HOURS, "t"));
    pinotSchema.addField("t", spec);
    Schema avroSchema = ThirdeyeAvroUtils.constructAvroSchemaFromPinotSchema(pinotSchema);
    String dType = ThirdeyeAvroUtils.getDataTypeForField("d1", avroSchema);
    Assert.assertEquals(dType, "STRING", "Avro schema constructed incorrectly");
    dType = ThirdeyeAvroUtils.getDataTypeForField("m1", avroSchema);
    Assert.assertEquals(dType, "DOUBLE", "Avro schema constructed incorrectly");
    dType = ThirdeyeAvroUtils.getDataTypeForField("t", avroSchema);
    Assert.assertEquals(dType, "LONG", "Avro schema constructed incorrectly");
}
Also used : TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) Schema(org.apache.avro.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Test(org.testng.annotations.Test)

Aggregations

TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)15 Test (org.testng.annotations.Test)8 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)7 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)6 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)6 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)6 Schema (com.linkedin.pinot.common.data.Schema)5 FieldType (com.linkedin.pinot.common.data.FieldSpec.FieldType)2 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)1 ColumnMetadata (com.linkedin.pinot.core.segment.index.ColumnMetadata)1 CollectionSchema (com.linkedin.thirdeye.api.CollectionSchema)1 DimensionSpec (com.linkedin.thirdeye.api.DimensionSpec)1 MetricSpec (com.linkedin.thirdeye.api.MetricSpec)1 TimeSpec (com.linkedin.thirdeye.api.TimeSpec)1 DashboardConfigDTO (com.linkedin.thirdeye.datalayer.dto.DashboardConfigDTO)1 DatasetConfigDTO (com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO)1 MetricConfigDTO (com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO)1 DimensionSpec (com.linkedin.thirdeye.hadoop.config.DimensionSpec)1 MetricSpec (com.linkedin.thirdeye.hadoop.config.MetricSpec)1 FileInputStream (java.io.FileInputStream)1