Search in sources :

Example 1 with MetricFieldSpec

use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.

the class AvroUtils method getPinotSchemaFromAvroSchema.

/**
   * Given an avro schema object along with column field types and time unit, return the equivalent
   * pinot schema object.
   *
   * @param avroSchema Avro schema for which to get the Pinot schema.
   * @param fieldTypes Map containing fieldTypes for each column.
   * @param timeUnit Time unit to be used for the time column.
   * @return Return the equivalent pinot schema for the given avro schema.
   */
private static Schema getPinotSchemaFromAvroSchema(org.apache.avro.Schema avroSchema, Map<String, FieldSpec.FieldType> fieldTypes, TimeUnit timeUnit) {
    Schema pinotSchema = new Schema();
    for (final Field field : avroSchema.getFields()) {
        String fieldName = field.name();
        FieldSpec.DataType dataType;
        try {
            dataType = AvroRecordReader.getColumnType(field);
        } catch (UnsupportedOperationException e) {
            LOGGER.warn("Unsupported field type for field {} schema {}, using String instead.", fieldName, field.schema());
            dataType = FieldSpec.DataType.STRING;
        }
        FieldSpec.FieldType fieldType = fieldTypes.get(fieldName);
        boolean isSingleValueField = AvroRecordReader.isSingleValueField(field);
        switch(fieldType) {
            case DIMENSION:
                pinotSchema.addField(new DimensionFieldSpec(fieldName, dataType, isSingleValueField));
                break;
            case METRIC:
                Preconditions.checkState(isSingleValueField, "Unsupported multi-value for metric field.");
                pinotSchema.addField(new MetricFieldSpec(fieldName, dataType));
                break;
            case TIME:
                Preconditions.checkState(isSingleValueField, "Unsupported multi-value for time field.");
                pinotSchema.addField(new TimeFieldSpec(field.name(), dataType, timeUnit));
                break;
            default:
                throw new UnsupportedOperationException("Unsupported field type: " + fieldType + " for field: " + fieldName);
        }
    }
    return pinotSchema;
}
Also used : Field(org.apache.avro.Schema.Field) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 2 with MetricFieldSpec

use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.

the class OffHeapStarTreeBuilder method init.

public void init(StarTreeBuilderConfig builderConfig) throws Exception {
    schema = builderConfig.schema;
    timeColumnName = schema.getTimeColumnName();
    this.dimensionsSplitOrder = builderConfig.dimensionsSplitOrder;
    skipStarNodeCreationForDimensions = builderConfig.getSkipStarNodeCreationForDimensions();
    skipMaterializationForDimensions = builderConfig.getSkipMaterializationForDimensions();
    skipMaterializationCardinalityThreshold = builderConfig.getSkipMaterializationCardinalityThreshold();
    enableOffHeapFormat = builderConfig.isEnableOffHealpFormat();
    this.maxLeafRecords = builderConfig.maxLeafRecords;
    this.outDir = builderConfig.getOutDir();
    if (outDir == null) {
        outDir = new File(System.getProperty("java.io.tmpdir"), V1Constants.STAR_TREE_INDEX_DIR + "_" + DateTime.now());
    }
    LOG.info("Index output directory:{}", outDir);
    dimensionTypes = new ArrayList<>();
    dimensionNames = new ArrayList<>();
    dimensionNameToIndexMap = HashBiMap.create();
    dimensionNameToStarValueMap = new HashMap<>();
    dictionaryMap = new HashMap<>();
    // READ DIMENSIONS COLUMNS
    List<DimensionFieldSpec> dimensionFieldSpecs = schema.getDimensionFieldSpecs();
    for (int index = 0; index < dimensionFieldSpecs.size(); index++) {
        DimensionFieldSpec spec = dimensionFieldSpecs.get(index);
        String dimensionName = spec.getName();
        dimensionNames.add(dimensionName);
        dimensionNameToIndexMap.put(dimensionName, index);
        Object starValue;
        starValue = getAllStarValue(spec);
        dimensionNameToStarValueMap.put(dimensionName, starValue);
        dimensionTypes.add(spec.getDataType());
        HashBiMap<Object, Integer> dictionary = HashBiMap.create();
        dictionaryMap.put(dimensionName, dictionary);
    }
    // this dimension unless explicitly specified in split order
    if (timeColumnName != null) {
        dimensionNames.add(timeColumnName);
        TimeFieldSpec timeFieldSpec = schema.getTimeFieldSpec();
        dimensionTypes.add(timeFieldSpec.getDataType());
        int index = dimensionNameToIndexMap.size();
        dimensionNameToIndexMap.put(timeColumnName, index);
        Object starValue;
        starValue = getAllStarValue(timeFieldSpec);
        dimensionNameToStarValueMap.put(timeColumnName, starValue);
        HashBiMap<Object, Integer> dictionary = HashBiMap.create();
        dictionaryMap.put(schema.getTimeColumnName(), dictionary);
    }
    dimensionSizeBytes = dimensionNames.size() * Integer.SIZE / 8;
    this.numDimensions = dimensionNames.size();
    // READ METRIC COLUMNS
    this.metricNames = new ArrayList<>();
    this.metricNameToIndexMap = new HashMap<>();
    this.metricSizeBytes = 0;
    List<MetricFieldSpec> metricFieldSpecs = schema.getMetricFieldSpecs();
    for (int index = 0; index < metricFieldSpecs.size(); index++) {
        MetricFieldSpec spec = metricFieldSpecs.get(index);
        String metricName = spec.getName();
        metricNames.add(metricName);
        metricNameToIndexMap.put(metricName, index);
        metricSizeBytes += spec.getFieldSize();
    }
    numMetrics = metricNames.size();
    builderConfig.getOutDir().mkdirs();
    dataFile = new File(outDir, "star-tree.buf");
    LOG.info("StarTree output data file: {}", dataFile.getAbsolutePath());
    dataBuffer = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(dataFile)));
    // INITIALIZE THE ROOT NODE
    this.starTreeRootIndexNode = new StarTreeIndexNode();
    this.starTreeRootIndexNode.setDimensionName(StarTreeIndexNodeInterf.ALL);
    this.starTreeRootIndexNode.setDimensionValue(StarTreeIndexNodeInterf.ALL);
    this.starTreeRootIndexNode.setLevel(0);
    LOG.info("dimensionNames:{}", dimensionNames);
    LOG.info("metricNames:{}", metricNames);
}
Also used : DataOutputStream(java.io.DataOutputStream) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) FileOutputStream(java.io.FileOutputStream) JSONObject(org.json.JSONObject) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 3 with MetricFieldSpec

use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.

the class SegmentTestUtils method extractSchemaFromAvroWithoutTime.

public static Schema extractSchemaFromAvroWithoutTime(File avroFile) throws FileNotFoundException, IOException {
    DataFileStream<GenericRecord> dataStream = new DataFileStream<GenericRecord>(new FileInputStream(avroFile), new GenericDatumReader<GenericRecord>());
    Schema schema = new Schema();
    for (final Field field : dataStream.getSchema().getFields()) {
        try {
            getColumnType(field);
        } catch (Exception e) {
            LOGGER.warn("Caught exception while converting Avro field {} of type {}, field will not be in schema.", field.name(), field.schema().getType());
            continue;
        }
        final String columnName = field.name();
        final String pinotType = field.getProp("pinotType");
        final FieldSpec fieldSpec;
        if (pinotType != null && "METRIC".equals(pinotType)) {
            fieldSpec = new MetricFieldSpec();
        } else {
            fieldSpec = new DimensionFieldSpec();
        }
        fieldSpec.setName(columnName);
        fieldSpec.setDataType(getColumnType(dataStream.getSchema().getField(columnName)));
        fieldSpec.setSingleValueField(isSingleValueField(dataStream.getSchema().getField(columnName)));
        schema.addField(fieldSpec);
    }
    dataStream.close();
    return schema;
}
Also used : Field(org.apache.avro.Schema.Field) Schema(com.linkedin.pinot.common.data.Schema) DataFileStream(org.apache.avro.file.DataFileStream) GenericRecord(org.apache.avro.generic.GenericRecord) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 4 with MetricFieldSpec

use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.

the class DataGenerator method buildSpec.

private FieldSpec buildSpec(DataGeneratorSpec genSpec, String column) {
    DataType dataType = genSpec.getDataTypesMap().get(column);
    FieldType fieldType = genSpec.getFieldTypesMap().get(column);
    FieldSpec spec;
    switch(fieldType) {
        case DIMENSION:
            spec = new DimensionFieldSpec();
            break;
        case METRIC:
            spec = new MetricFieldSpec();
            break;
        case TIME:
            spec = new TimeFieldSpec(column, dataType, genSpec.getTimeUnitMap().get(column));
            break;
        default:
            throw new RuntimeException("Invalid Field type.");
    }
    spec.setName(column);
    spec.setDataType(dataType);
    spec.setSingleValueField(true);
    return spec;
}
Also used : TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType)

Example 5 with MetricFieldSpec

use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.

the class AutoLoadPinotMetricsService method addNewDataset.

/**
   * Adds a new dataset to the thirdeye database
   * @param dataset
   * @param schema
   */
private void addNewDataset(String dataset, Schema schema) throws Exception {
    List<MetricFieldSpec> metricSpecs = schema.getMetricFieldSpecs();
    // Create DatasetConfig
    DatasetConfigDTO datasetConfigDTO = ConfigGenerator.generateDatasetConfig(dataset, schema);
    LOG.info("Creating dataset for {}", dataset);
    DAO_REGISTRY.getDatasetConfigDAO().save(datasetConfigDTO);
    // Create MetricConfig
    for (MetricFieldSpec metricFieldSpec : metricSpecs) {
        MetricConfigDTO metricConfigDTO = ConfigGenerator.generateMetricConfig(metricFieldSpec, dataset);
        LOG.info("Creating metric {} for {}", metricConfigDTO.getName(), dataset);
        DAO_REGISTRY.getMetricConfigDAO().save(metricConfigDTO);
    }
    // Create Default DashboardConfig
    List<Long> metricIds = ConfigGenerator.getMetricIdsFromMetricConfigs(DAO_REGISTRY.getMetricConfigDAO().findByDataset(dataset));
    DashboardConfigDTO dashboardConfigDTO = ConfigGenerator.generateDefaultDashboardConfig(dataset, metricIds);
    LOG.info("Creating default dashboard for dataset {}", dataset);
    DAO_REGISTRY.getDashboardConfigDAO().save(dashboardConfigDTO);
}
Also used : DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) MetricConfigDTO(com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DashboardConfigDTO(com.linkedin.thirdeye.datalayer.dto.DashboardConfigDTO)

Aggregations

MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)25 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)16 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)14 Schema (com.linkedin.pinot.common.data.Schema)13 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)11 TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)6 DashboardConfigDTO (com.linkedin.thirdeye.datalayer.dto.DashboardConfigDTO)4 MetricConfigDTO (com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO)4 File (java.io.File)4 FieldType (com.linkedin.pinot.common.data.FieldSpec.FieldType)3 GenericRow (com.linkedin.pinot.core.data.GenericRow)3 HashMap (java.util.HashMap)3 Field (org.apache.avro.Schema.Field)3 Test (org.testng.annotations.Test)3 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)2 DatasetConfigDTO (com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO)2 FileInputStream (java.io.FileInputStream)2 ByteBuffer (java.nio.ByteBuffer)2 ArrayList (java.util.ArrayList)2 DataFileStream (org.apache.avro.file.DataFileStream)2