Search in sources :

Example 11 with TimeFieldSpec

use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.

the class PinotSegmentRecordReader method getSchema.

@Override
public Schema getSchema() {
    Schema schema = new Schema();
    schema.setSchemaName(segmentMetadata.getName());
    for (String column : columns) {
        ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
        String columnName = columnMetadata.getColumnName();
        DataType dataType = columnMetadata.getDataType();
        FieldType fieldType = columnMetadata.getFieldType();
        FieldSpec fieldSpec = null;
        switch(fieldType) {
            case DIMENSION:
                boolean isSingleValue = columnMetadata.isSingleValue();
                fieldSpec = new DimensionFieldSpec(columnName, dataType, isSingleValue);
                break;
            case METRIC:
                fieldSpec = new MetricFieldSpec(columnName, dataType);
                break;
            case TIME:
                TimeUnit timeType = columnMetadata.getTimeUnit();
                TimeGranularitySpec incomingGranularitySpec = new TimeGranularitySpec(dataType, timeType, columnName);
                fieldSpec = new TimeFieldSpec(incomingGranularitySpec);
                break;
            default:
                break;
        }
        schema.addField(fieldSpec);
    }
    return schema;
}
Also used : TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) TimeUnit(java.util.concurrent.TimeUnit) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType)

Example 12 with TimeFieldSpec

use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.

the class SegmentGeneratorConfig method loadConfigFiles.

/**
   * @deprecated Load outside the class and use the setter for schema setting.
   * @throws IOException
   */
@Deprecated
public void loadConfigFiles() throws IOException {
    ObjectMapper objectMapper = new ObjectMapper();
    Schema schema;
    if (_schemaFile != null) {
        schema = Schema.fromFile(new File(_schemaFile));
        setSchema(schema);
    } else if (_format == FileFormat.AVRO) {
        schema = AvroUtils.extractSchemaFromAvro(new File(_inputFilePath));
        setSchema(schema);
    } else {
        throw new RuntimeException("Input format " + _format + " requires schema.");
    }
    setTimeColumnName(schema.getTimeColumnName());
    TimeFieldSpec timeFieldSpec = schema.getTimeFieldSpec();
    if (timeFieldSpec != null) {
        setSegmentTimeUnit(timeFieldSpec.getIncomingGranularitySpec().getTimeType());
    } else {
        setSegmentTimeUnit(TimeUnit.DAYS);
    }
    if (_readerConfigFile != null) {
        setReaderConfig(objectMapper.readValue(new File(_readerConfigFile), CSVRecordReaderConfig.class));
    }
    if (_starTreeIndexSpecFile != null) {
        setStarTreeIndexSpec(objectMapper.readValue(new File(_starTreeIndexSpecFile), StarTreeIndexSpec.class));
    }
}
Also used : CSVRecordReaderConfig(com.linkedin.pinot.core.data.readers.CSVRecordReaderConfig) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) File(java.io.File) StarTreeIndexSpec(com.linkedin.pinot.common.data.StarTreeIndexSpec) ObjectMapper(org.codehaus.jackson.map.ObjectMapper)

Example 13 with TimeFieldSpec

use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.

the class SegmentTestUtils method extractSchemaFromAvro.

public static Schema extractSchemaFromAvro(File avroFile, Map<String, FieldType> fieldTypeMap, TimeUnit granularity) throws IOException {
    DataFileStream<GenericRecord> dataStream = new DataFileStream<>(new FileInputStream(avroFile), new GenericDatumReader<GenericRecord>());
    Schema schema = new Schema();
    for (final Field field : dataStream.getSchema().getFields()) {
        final String columnName = field.name();
        FieldType fieldType = fieldTypeMap.get(columnName);
        Preconditions.checkNotNull(fieldType);
        switch(fieldType) {
            case TIME:
                final TimeGranularitySpec gSpec = new TimeGranularitySpec(getColumnType(field), granularity, columnName);
                final TimeFieldSpec fSpec = new TimeFieldSpec(gSpec);
                schema.addField(fSpec);
                continue;
            case DIMENSION:
                final FieldSpec dimensionFieldSpec = new DimensionFieldSpec(columnName, getColumnType(field), isSingleValueField(field));
                schema.addField(dimensionFieldSpec);
                continue;
            case METRIC:
                final FieldSpec metricFieldSpec = new MetricFieldSpec(columnName, getColumnType(field));
                schema.addField(metricFieldSpec);
                continue;
            default:
                throw new UnsupportedOperationException("Unsupported field type: " + fieldType);
        }
    }
    dataStream.close();
    return schema;
}
Also used : TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) DataFileStream(org.apache.avro.file.DataFileStream) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) FileInputStream(java.io.FileInputStream) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType) Field(org.apache.avro.Schema.Field) GenericRecord(org.apache.avro.generic.GenericRecord) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 14 with TimeFieldSpec

use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.

the class TransformGroupByTest method buildSchema.

/**
   * Helper method to build a schema with one string dimension, and one double metric columns.
   */
private static Schema buildSchema() {
    Schema schema = new Schema();
    DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(DIMENSION_NAME, FieldSpec.DataType.STRING, true);
    schema.addField(dimensionFieldSpec);
    MetricFieldSpec metricFieldSpec = new MetricFieldSpec(METRIC_NAME, FieldSpec.DataType.DOUBLE);
    schema.addField(metricFieldSpec);
    TimeFieldSpec timeFieldSpec = new TimeFieldSpec(TIME_COLUMN_NAME, FieldSpec.DataType.LONG, TimeUnit.MILLISECONDS);
    schema.setTimeFieldSpec(timeFieldSpec);
    return schema;
}
Also used : Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 15 with TimeFieldSpec

use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.

the class TestOffheapStarTreeBuilder method testRandom.

@Test
public void testRandom() throws Exception {
    int ROWS = 100;
    int numDimensions = 6;
    int numMetrics = 6;
    StarTreeBuilderConfig builderConfig = new StarTreeBuilderConfig();
    Schema schema = new Schema();
    builderConfig.dimensionsSplitOrder = new ArrayList<>();
    for (int i = 0; i < numDimensions; i++) {
        String dimName = "d" + (i + 1);
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(dimName, DataType.INT, true);
        schema.addField(dimensionFieldSpec);
        builderConfig.dimensionsSplitOrder.add(dimName);
    }
    schema.setTimeFieldSpec(new TimeFieldSpec("daysSinceEpoch", DataType.INT, TimeUnit.DAYS));
    for (int i = 0; i < numMetrics; i++) {
        String metricName = "n" + (i + 1);
        MetricFieldSpec metricFieldSpec = new MetricFieldSpec(metricName, DataType.INT);
        schema.addField(metricFieldSpec);
    }
    builderConfig.maxLeafRecords = 10;
    builderConfig.schema = schema;
    builderConfig.outDir = new File("/tmp/startree");
    OffHeapStarTreeBuilder builder = new OffHeapStarTreeBuilder();
    builder.init(builderConfig);
    Random r = new Random();
    HashMap<String, Object> map = new HashMap<>();
    for (int row = 0; row < ROWS; row++) {
        for (int i = 0; i < numDimensions; i++) {
            String dimName = schema.getDimensionFieldSpecs().get(i).getName();
            map.put(dimName, dimName + "-v" + r.nextInt((numDimensions - i + 2)));
        }
        //time
        map.put("daysSinceEpoch", r.nextInt(1000));
        for (int i = 0; i < numMetrics; i++) {
            String metName = schema.getMetricFieldSpecs().get(i).getName();
            map.put(metName, r.nextInt((numDimensions - i + 2)));
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        builder.append(genericRow);
    }
    builder.build();
    FileUtils.deleteDirectory(builderConfig.outDir);
}
Also used : HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) GenericRow(com.linkedin.pinot.core.data.GenericRow) Random(java.util.Random) File(java.io.File) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Test(org.testng.annotations.Test)

Aggregations

TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)17 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)12 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)12 Schema (com.linkedin.pinot.common.data.Schema)11 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)10 TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)7 File (java.io.File)4 FieldType (com.linkedin.pinot.common.data.FieldSpec.FieldType)3 JSONObject (org.json.JSONObject)3 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)2 GenericRow (com.linkedin.pinot.core.data.GenericRow)2 HashMap (java.util.HashMap)2 TimeUnit (java.util.concurrent.TimeUnit)2 Field (org.apache.avro.Schema.Field)2 Test (org.testng.annotations.Test)2 AbstractTableConfig (com.linkedin.pinot.common.config.AbstractTableConfig)1 IndexingConfig (com.linkedin.pinot.common.config.IndexingConfig)1 StarTreeIndexSpec (com.linkedin.pinot.common.data.StarTreeIndexSpec)1 CSVRecordReaderConfig (com.linkedin.pinot.core.data.readers.CSVRecordReaderConfig)1 ColumnMetadata (com.linkedin.pinot.core.segment.index.ColumnMetadata)1