Search in sources :

Example 6 with TimeFieldSpec

use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.

the class AvroUtils method getPinotSchemaFromAvroSchema.

/**
   * Given an avro schema object along with column field types and time unit, return the equivalent
   * pinot schema object.
   *
   * @param avroSchema Avro schema for which to get the Pinot schema.
   * @param fieldTypes Map containing fieldTypes for each column.
   * @param timeUnit Time unit to be used for the time column.
   * @return Return the equivalent pinot schema for the given avro schema.
   */
private static Schema getPinotSchemaFromAvroSchema(org.apache.avro.Schema avroSchema, Map<String, FieldSpec.FieldType> fieldTypes, TimeUnit timeUnit) {
    Schema pinotSchema = new Schema();
    for (final Field field : avroSchema.getFields()) {
        String fieldName = field.name();
        FieldSpec.DataType dataType;
        try {
            dataType = AvroRecordReader.getColumnType(field);
        } catch (UnsupportedOperationException e) {
            LOGGER.warn("Unsupported field type for field {} schema {}, using String instead.", fieldName, field.schema());
            dataType = FieldSpec.DataType.STRING;
        }
        FieldSpec.FieldType fieldType = fieldTypes.get(fieldName);
        boolean isSingleValueField = AvroRecordReader.isSingleValueField(field);
        switch(fieldType) {
            case DIMENSION:
                pinotSchema.addField(new DimensionFieldSpec(fieldName, dataType, isSingleValueField));
                break;
            case METRIC:
                Preconditions.checkState(isSingleValueField, "Unsupported multi-value for metric field.");
                pinotSchema.addField(new MetricFieldSpec(fieldName, dataType));
                break;
            case TIME:
                Preconditions.checkState(isSingleValueField, "Unsupported multi-value for time field.");
                pinotSchema.addField(new TimeFieldSpec(field.name(), dataType, timeUnit));
                break;
            default:
                throw new UnsupportedOperationException("Unsupported field type: " + fieldType + " for field: " + fieldName);
        }
    }
    return pinotSchema;
}
Also used : Field(org.apache.avro.Schema.Field) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 7 with TimeFieldSpec

use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.

the class KafkaJSONMessageDecoder method decode.

@Override
public GenericRow decode(byte[] payload, GenericRow destination) {
    try {
        String text = new String(payload, "UTF-8");
        JSONObject message = new JSONObject(text);
        for (FieldSpec dimensionSpec : schema.getDimensionFieldSpecs()) {
            if (message.has(dimensionSpec.getName())) {
                Object entry;
                if (dimensionSpec.isSingleValueField()) {
                    entry = stringToDataType(dimensionSpec, message.getString(dimensionSpec.getName()));
                } else {
                    JSONArray jsonArray = message.getJSONArray(dimensionSpec.getName());
                    Object[] array = new Object[jsonArray.length()];
                    for (int i = 0; i < array.length; i++) {
                        array[i] = stringToDataType(dimensionSpec, jsonArray.getString(i));
                    }
                    if (array.length == 0) {
                        entry = new Object[] { AvroRecordReader.getDefaultNullValue(dimensionSpec) };
                    } else {
                        entry = array;
                    }
                }
                destination.putField(dimensionSpec.getName(), entry);
            } else {
                Object entry = AvroRecordReader.getDefaultNullValue(dimensionSpec);
                destination.putField(dimensionSpec.getName(), entry);
            }
        }
        for (FieldSpec metricSpec : schema.getMetricFieldSpecs()) {
            if (message.has(metricSpec.getName())) {
                Object entry = stringToDataType(metricSpec, message.getString(metricSpec.getName()));
                destination.putField(metricSpec.getName(), entry);
            } else {
                Object entry = AvroRecordReader.getDefaultNullValue(metricSpec);
                destination.putField(metricSpec.getName(), entry);
            }
        }
        TimeFieldSpec timeSpec = schema.getTimeFieldSpec();
        if (message.has(timeSpec.getName())) {
            Object entry = stringToDataType(timeSpec, message.getString(timeSpec.getName()));
            destination.putField(timeSpec.getName(), entry);
        } else {
            Object entry = AvroRecordReader.getDefaultNullValue(timeSpec);
            destination.putField(timeSpec.getName(), entry);
        }
        return destination;
    } catch (Exception e) {
        LOGGER.error("error decoding , ", e);
    }
    return null;
}
Also used : JSONObject(org.json.JSONObject) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) JSONArray(org.json.JSONArray) JSONObject(org.json.JSONObject) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec)

Example 8 with TimeFieldSpec

use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.

the class OffHeapStarTreeBuilder method init.

public void init(StarTreeBuilderConfig builderConfig) throws Exception {
    schema = builderConfig.schema;
    timeColumnName = schema.getTimeColumnName();
    this.dimensionsSplitOrder = builderConfig.dimensionsSplitOrder;
    skipStarNodeCreationForDimensions = builderConfig.getSkipStarNodeCreationForDimensions();
    skipMaterializationForDimensions = builderConfig.getSkipMaterializationForDimensions();
    skipMaterializationCardinalityThreshold = builderConfig.getSkipMaterializationCardinalityThreshold();
    enableOffHeapFormat = builderConfig.isEnableOffHealpFormat();
    this.maxLeafRecords = builderConfig.maxLeafRecords;
    this.outDir = builderConfig.getOutDir();
    if (outDir == null) {
        outDir = new File(System.getProperty("java.io.tmpdir"), V1Constants.STAR_TREE_INDEX_DIR + "_" + DateTime.now());
    }
    LOG.info("Index output directory:{}", outDir);
    dimensionTypes = new ArrayList<>();
    dimensionNames = new ArrayList<>();
    dimensionNameToIndexMap = HashBiMap.create();
    dimensionNameToStarValueMap = new HashMap<>();
    dictionaryMap = new HashMap<>();
    // READ DIMENSIONS COLUMNS
    List<DimensionFieldSpec> dimensionFieldSpecs = schema.getDimensionFieldSpecs();
    for (int index = 0; index < dimensionFieldSpecs.size(); index++) {
        DimensionFieldSpec spec = dimensionFieldSpecs.get(index);
        String dimensionName = spec.getName();
        dimensionNames.add(dimensionName);
        dimensionNameToIndexMap.put(dimensionName, index);
        Object starValue;
        starValue = getAllStarValue(spec);
        dimensionNameToStarValueMap.put(dimensionName, starValue);
        dimensionTypes.add(spec.getDataType());
        HashBiMap<Object, Integer> dictionary = HashBiMap.create();
        dictionaryMap.put(dimensionName, dictionary);
    }
    // this dimension unless explicitly specified in split order
    if (timeColumnName != null) {
        dimensionNames.add(timeColumnName);
        TimeFieldSpec timeFieldSpec = schema.getTimeFieldSpec();
        dimensionTypes.add(timeFieldSpec.getDataType());
        int index = dimensionNameToIndexMap.size();
        dimensionNameToIndexMap.put(timeColumnName, index);
        Object starValue;
        starValue = getAllStarValue(timeFieldSpec);
        dimensionNameToStarValueMap.put(timeColumnName, starValue);
        HashBiMap<Object, Integer> dictionary = HashBiMap.create();
        dictionaryMap.put(schema.getTimeColumnName(), dictionary);
    }
    dimensionSizeBytes = dimensionNames.size() * Integer.SIZE / 8;
    this.numDimensions = dimensionNames.size();
    // READ METRIC COLUMNS
    this.metricNames = new ArrayList<>();
    this.metricNameToIndexMap = new HashMap<>();
    this.metricSizeBytes = 0;
    List<MetricFieldSpec> metricFieldSpecs = schema.getMetricFieldSpecs();
    for (int index = 0; index < metricFieldSpecs.size(); index++) {
        MetricFieldSpec spec = metricFieldSpecs.get(index);
        String metricName = spec.getName();
        metricNames.add(metricName);
        metricNameToIndexMap.put(metricName, index);
        metricSizeBytes += spec.getFieldSize();
    }
    numMetrics = metricNames.size();
    builderConfig.getOutDir().mkdirs();
    dataFile = new File(outDir, "star-tree.buf");
    LOG.info("StarTree output data file: {}", dataFile.getAbsolutePath());
    dataBuffer = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(dataFile)));
    // INITIALIZE THE ROOT NODE
    this.starTreeRootIndexNode = new StarTreeIndexNode();
    this.starTreeRootIndexNode.setDimensionName(StarTreeIndexNodeInterf.ALL);
    this.starTreeRootIndexNode.setDimensionValue(StarTreeIndexNodeInterf.ALL);
    this.starTreeRootIndexNode.setLevel(0);
    LOG.info("dimensionNames:{}", dimensionNames);
    LOG.info("metricNames:{}", metricNames);
}
Also used : DataOutputStream(java.io.DataOutputStream) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) FileOutputStream(java.io.FileOutputStream) JSONObject(org.json.JSONObject) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 9 with TimeFieldSpec

use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.

the class PinotSegmentRecordReaderTest method createPinotSchema.

private Schema createPinotSchema() {
    Schema testSchema = new Schema();
    testSchema.setSchemaName("schema");
    FieldSpec spec;
    spec = new DimensionFieldSpec(D_SV_1, DataType.STRING, true);
    testSchema.addField(spec);
    spec = new DimensionFieldSpec(D_MV_1, DataType.STRING, false);
    testSchema.addField(spec);
    spec = new MetricFieldSpec(M1, DataType.INT);
    testSchema.addField(spec);
    spec = new MetricFieldSpec(M2, DataType.FLOAT);
    testSchema.addField(spec);
    spec = new TimeFieldSpec(new TimeGranularitySpec(DataType.LONG, TimeUnit.HOURS, TIME));
    testSchema.addField(spec);
    return testSchema;
}
Also used : TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 10 with TimeFieldSpec

use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.

the class ThirdeyePinotSchemaUtils method createSchema.

/**
   * Transforms the thirdeyeConfig to pinot schema
   * Adds default __COUNT metric if not already present
   * Adds additional columns for all dimensions which
   * are wither specified as topk or whitelist
   * and hence have a transformed new column_raw
   * @param thirdeyeConfig
   * @return
   */
public static Schema createSchema(ThirdEyeConfig thirdeyeConfig) {
    Schema schema = new Schema();
    Set<String> transformDimensions = thirdeyeConfig.getTransformDimensions();
    for (DimensionSpec dimensionSpec : thirdeyeConfig.getDimensions()) {
        FieldSpec fieldSpec = new DimensionFieldSpec();
        String dimensionName = dimensionSpec.getName();
        fieldSpec.setName(dimensionName);
        fieldSpec.setDataType(DataType.STRING);
        fieldSpec.setSingleValueField(true);
        schema.addField(dimensionName, fieldSpec);
        if (transformDimensions.contains(dimensionName)) {
            fieldSpec = new DimensionFieldSpec();
            dimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
            fieldSpec.setName(dimensionName);
            fieldSpec.setDataType(DataType.STRING);
            fieldSpec.setSingleValueField(true);
            schema.addField(dimensionName, fieldSpec);
        }
    }
    boolean countIncluded = false;
    for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
        FieldSpec fieldSpec = new MetricFieldSpec();
        String metricName = metricSpec.getName();
        if (metricName.equals(ThirdEyeConstants.AUTO_METRIC_COUNT)) {
            countIncluded = true;
        }
        fieldSpec.setName(metricName);
        fieldSpec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
        fieldSpec.setSingleValueField(true);
        schema.addField(metricName, fieldSpec);
    }
    if (!countIncluded) {
        FieldSpec fieldSpec = new MetricFieldSpec();
        String metricName = ThirdEyeConstants.AUTO_METRIC_COUNT;
        fieldSpec.setName(metricName);
        fieldSpec.setDataType(DataType.LONG);
        fieldSpec.setDefaultNullValue(1);
        schema.addField(metricName, fieldSpec);
    }
    TimeGranularitySpec incoming = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
    TimeGranularitySpec outgoing = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
    schema.addField(thirdeyeConfig.getTime().getColumnName(), new TimeFieldSpec(incoming, outgoing));
    schema.setSchemaName(thirdeyeConfig.getCollection());
    return schema;
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) Schema(com.linkedin.pinot.common.data.Schema) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Aggregations

TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)17 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)12 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)12 Schema (com.linkedin.pinot.common.data.Schema)11 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)10 TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)7 File (java.io.File)4 FieldType (com.linkedin.pinot.common.data.FieldSpec.FieldType)3 JSONObject (org.json.JSONObject)3 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)2 GenericRow (com.linkedin.pinot.core.data.GenericRow)2 HashMap (java.util.HashMap)2 TimeUnit (java.util.concurrent.TimeUnit)2 Field (org.apache.avro.Schema.Field)2 Test (org.testng.annotations.Test)2 AbstractTableConfig (com.linkedin.pinot.common.config.AbstractTableConfig)1 IndexingConfig (com.linkedin.pinot.common.config.IndexingConfig)1 StarTreeIndexSpec (com.linkedin.pinot.common.data.StarTreeIndexSpec)1 CSVRecordReaderConfig (com.linkedin.pinot.core.data.readers.CSVRecordReaderConfig)1 ColumnMetadata (com.linkedin.pinot.core.segment.index.ColumnMetadata)1