Search in sources :

Example 11 with MetricFieldSpec

use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.

the class SegmentWithHllIndexCreateHelper method printSchema.

private static void printSchema(Schema schema) {
    LOGGER.info("schemaName: {}", schema.getSchemaName());
    LOGGER.info("Dimension columnNames: ");
    int i = 0;
    for (DimensionFieldSpec spec : schema.getDimensionFieldSpecs()) {
        String columnInfo = i + " " + spec.getName();
        if (!spec.isSingleValueField()) {
            LOGGER.info(columnInfo + " Multi-Value.");
        } else {
            LOGGER.info(columnInfo);
        }
        i += 1;
    }
    LOGGER.info("Metric columnNames: ");
    i = 0;
    for (MetricFieldSpec spec : schema.getMetricFieldSpecs()) {
        String columnInfo = i + " " + spec.getName();
        if (!spec.isSingleValueField()) {
            LOGGER.info(columnInfo + " Multi-Value.");
        } else {
            LOGGER.info(columnInfo);
        }
        i += 1;
    }
    LOGGER.info("Time column: {}", schema.getTimeColumnName());
}
Also used : MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 12 with MetricFieldSpec

use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.

the class ThirdeyeAvroUtilsTest method testConstructAvroSchemaFromPinotSchema.

@Test
public void testConstructAvroSchemaFromPinotSchema() throws Exception {
    com.linkedin.pinot.common.data.Schema pinotSchema = new com.linkedin.pinot.common.data.Schema();
    pinotSchema.setSchemaName("test");
    FieldSpec spec = new DimensionFieldSpec("d1", DataType.STRING, true);
    pinotSchema.addField("d1", spec);
    spec = new MetricFieldSpec("m1", DataType.DOUBLE);
    pinotSchema.addField("m1", spec);
    spec = new TimeFieldSpec(new TimeGranularitySpec(DataType.LONG, TimeUnit.HOURS, "t"));
    pinotSchema.addField("t", spec);
    Schema avroSchema = ThirdeyeAvroUtils.constructAvroSchemaFromPinotSchema(pinotSchema);
    String dType = ThirdeyeAvroUtils.getDataTypeForField("d1", avroSchema);
    Assert.assertEquals(dType, "STRING", "Avro schema constructed incorrectly");
    dType = ThirdeyeAvroUtils.getDataTypeForField("m1", avroSchema);
    Assert.assertEquals(dType, "DOUBLE", "Avro schema constructed incorrectly");
    dType = ThirdeyeAvroUtils.getDataTypeForField("t", avroSchema);
    Assert.assertEquals(dType, "LONG", "Avro schema constructed incorrectly");
}
Also used : TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) Schema(org.apache.avro.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Test(org.testng.annotations.Test)

Example 13 with MetricFieldSpec

use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.

the class ThirdeyePinotSchemaUtils method createSchema.

/**
   * Transforms the thirdeyeConfig to pinot schema
   * Adds default __COUNT metric if not already present
   * Adds additional columns for all dimensions which
   * are wither specified as topk or whitelist
   * and hence have a transformed new column_raw
   * @param thirdeyeConfig
   * @return
   */
public static Schema createSchema(ThirdEyeConfig thirdeyeConfig) {
    Schema schema = new Schema();
    Set<String> transformDimensions = thirdeyeConfig.getTransformDimensions();
    for (DimensionSpec dimensionSpec : thirdeyeConfig.getDimensions()) {
        FieldSpec fieldSpec = new DimensionFieldSpec();
        String dimensionName = dimensionSpec.getName();
        fieldSpec.setName(dimensionName);
        fieldSpec.setDataType(DataType.STRING);
        fieldSpec.setSingleValueField(true);
        schema.addField(dimensionName, fieldSpec);
        if (transformDimensions.contains(dimensionName)) {
            fieldSpec = new DimensionFieldSpec();
            dimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
            fieldSpec.setName(dimensionName);
            fieldSpec.setDataType(DataType.STRING);
            fieldSpec.setSingleValueField(true);
            schema.addField(dimensionName, fieldSpec);
        }
    }
    boolean countIncluded = false;
    for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
        FieldSpec fieldSpec = new MetricFieldSpec();
        String metricName = metricSpec.getName();
        if (metricName.equals(ThirdEyeConstants.AUTO_METRIC_COUNT)) {
            countIncluded = true;
        }
        fieldSpec.setName(metricName);
        fieldSpec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
        fieldSpec.setSingleValueField(true);
        schema.addField(metricName, fieldSpec);
    }
    if (!countIncluded) {
        FieldSpec fieldSpec = new MetricFieldSpec();
        String metricName = ThirdEyeConstants.AUTO_METRIC_COUNT;
        fieldSpec.setName(metricName);
        fieldSpec.setDataType(DataType.LONG);
        fieldSpec.setDefaultNullValue(1);
        schema.addField(metricName, fieldSpec);
    }
    TimeGranularitySpec incoming = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
    TimeGranularitySpec outgoing = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
    schema.addField(thirdeyeConfig.getTime().getColumnName(), new TimeFieldSpec(incoming, outgoing));
    schema.setSchemaName(thirdeyeConfig.getCollection());
    return schema;
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) Schema(com.linkedin.pinot.common.data.Schema) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 14 with MetricFieldSpec

use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.

the class PlainFieldExtractor method transform.

@Override
public GenericRow transform(GenericRow row, GenericRow destinationRow) {
    boolean hasError = false;
    boolean hasNull = false;
    boolean hasConversion = false;
    for (String column : _schema.getColumnNames()) {
        FieldSpec fieldSpec = _schema.getFieldSpecFor(column);
        // Ignore transform of DerivedMetric
        if (fieldSpec instanceof MetricFieldSpec && ((MetricFieldSpec) fieldSpec).isDerivedMetric()) {
            continue;
        }
        Object value;
        // Fetch value for this column.
        if (column.equals(_outgoingTimeColumnName) && _timeConverter != null) {
            // Convert incoming time to outgoing time.
            value = row.getValue(_incomingTimeColumnName);
            if (value == null) {
                hasNull = true;
                _totalNullCols++;
            } else {
                try {
                    value = _timeConverter.convert(value);
                } catch (Exception e) {
                    LOGGER.debug("Caught exception while converting incoming time value: {}", value, e);
                    value = null;
                    hasError = true;
                    _errorCount.put(column, _errorCount.get(column) + 1);
                }
            }
        } else {
            value = row.getValue(column);
            if (value == null) {
                hasNull = true;
                _totalNullCols++;
            }
        }
        // Convert value if necessary.
        PinotDataType dest = _columnType.get(column);
        PinotDataType source = null;
        if (value != null) {
            if (value instanceof Object[]) {
                // Multi-value.
                Object[] valueArray = (Object[]) value;
                if (valueArray.length > 0) {
                    source = MULTI_VALUE_TYPE_MAP.get(valueArray[0].getClass());
                    if (source == null) {
                        source = PinotDataType.OBJECT_ARRAY;
                    }
                } else {
                    LOGGER.debug("Got 0 length array.");
                    // Use default value for 0 length array.
                    value = null;
                    hasError = true;
                    _errorCount.put(column, _errorCount.get(column) + 1);
                }
            } else {
                // Single-value.
                source = SINGLE_VALUE_TYPE_MAP.get(value.getClass());
                if (source == null) {
                    source = PinotDataType.OBJECT;
                }
            }
            if (value != null && source != dest) {
                Object before = value;
                try {
                    value = dest.convert(before, source);
                    hasConversion = true;
                } catch (Exception e) {
                    LOGGER.debug("Caught exception while converting value: {} from: {} to: {}", before, source, dest);
                    value = null;
                    hasError = true;
                    _errorCount.put(column, _errorCount.get(column) + 1);
                }
            }
            // Allowing this can cause multiple values to map to the same padded value, breaking segment generation.
            if (dest == PinotDataType.STRING) {
                value = StringUtil.trimTrailingNulls((String) value);
            }
        }
        // Assign default value for null value.
        if (value == null) {
            if (fieldSpec.isSingleValueField()) {
                // Single-value field.
                value = fieldSpec.getDefaultNullValue();
            } else {
                // Multi-value field.
                value = new Object[] { fieldSpec.getDefaultNullValue() };
            }
        }
        destinationRow.putField(column, value);
    }
    if (hasError) {
        _totalErrors++;
    }
    if (hasNull) {
        _totalNulls++;
    }
    if (hasConversion) {
        _totalConversions++;
    }
    return destinationRow;
}
Also used : MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec)

Example 15 with MetricFieldSpec

use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.

the class PinotSegmentRecordReader method getSchema.

@Override
public Schema getSchema() {
    Schema schema = new Schema();
    schema.setSchemaName(segmentMetadata.getName());
    for (String column : columns) {
        ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
        String columnName = columnMetadata.getColumnName();
        DataType dataType = columnMetadata.getDataType();
        FieldType fieldType = columnMetadata.getFieldType();
        FieldSpec fieldSpec = null;
        switch(fieldType) {
            case DIMENSION:
                boolean isSingleValue = columnMetadata.isSingleValue();
                fieldSpec = new DimensionFieldSpec(columnName, dataType, isSingleValue);
                break;
            case METRIC:
                fieldSpec = new MetricFieldSpec(columnName, dataType);
                break;
            case TIME:
                TimeUnit timeType = columnMetadata.getTimeUnit();
                TimeGranularitySpec incomingGranularitySpec = new TimeGranularitySpec(dataType, timeType, columnName);
                fieldSpec = new TimeFieldSpec(incomingGranularitySpec);
                break;
            default:
                break;
        }
        schema.addField(fieldSpec);
    }
    return schema;
}
Also used : TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) TimeUnit(java.util.concurrent.TimeUnit) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType)

Aggregations

MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)25 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)16 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)14 Schema (com.linkedin.pinot.common.data.Schema)13 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)11 TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)6 DashboardConfigDTO (com.linkedin.thirdeye.datalayer.dto.DashboardConfigDTO)4 MetricConfigDTO (com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO)4 File (java.io.File)4 FieldType (com.linkedin.pinot.common.data.FieldSpec.FieldType)3 GenericRow (com.linkedin.pinot.core.data.GenericRow)3 HashMap (java.util.HashMap)3 Field (org.apache.avro.Schema.Field)3 Test (org.testng.annotations.Test)3 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)2 DatasetConfigDTO (com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO)2 FileInputStream (java.io.FileInputStream)2 ByteBuffer (java.nio.ByteBuffer)2 ArrayList (java.util.ArrayList)2 DataFileStream (org.apache.avro.file.DataFileStream)2