Search in sources :

Example 1 with FieldType

use of com.linkedin.pinot.common.data.FieldSpec.FieldType in project pinot by linkedin.

the class Schema method validate.

/**
   * Validates a pinot schema. The following validations are performed:
   * <p>- For dimension and time fields, support {@link DataType}: INT, LONG, FLOAT, DOUBLE, STRING.
   * <p>- For metric fields (non-derived), support {@link DataType}: INT, LONG, FLOAT, DOUBLE.
   * <p>- All fields must have a default null value.
   *
   * @param ctxLogger logger used to log the message (if null, the current class logger is used).
   * @return whether schema is valid.
   */
public boolean validate(Logger ctxLogger) {
    if (ctxLogger == null) {
        ctxLogger = LOGGER;
    }
    boolean isValid = true;
    // Log ALL the schema errors that may be present.
    for (FieldSpec fieldSpec : fieldSpecMap.values()) {
        FieldType fieldType = fieldSpec.getFieldType();
        DataType dataType = fieldSpec.getDataType();
        String fieldName = fieldSpec.getName();
        try {
            switch(fieldType) {
                case DIMENSION:
                case TIME:
                    switch(dataType) {
                        case INT:
                        case LONG:
                        case FLOAT:
                        case DOUBLE:
                        case STRING:
                            // Check getDefaultNullValue() does not throw exception.
                            fieldSpec.getDefaultNullValue();
                            break;
                        default:
                            ctxLogger.error("Unsupported data type: {} in dimension/time field: {}", dataType, fieldName);
                            isValid = false;
                            break;
                    }
                    break;
                case METRIC:
                    switch(dataType) {
                        case INT:
                        case LONG:
                        case FLOAT:
                        case DOUBLE:
                            // Check getDefaultNullValue() does not throw exception.
                            fieldSpec.getDefaultNullValue();
                            break;
                        default:
                            ctxLogger.error("Unsupported data type: {} in metric field: {}", dataType, fieldName);
                            isValid = false;
                            break;
                    }
                    break;
                default:
                    ctxLogger.error("Unsupported field type: {} for field: {}", dataType, fieldName);
                    isValid = false;
                    break;
            }
        } catch (Exception e) {
            ctxLogger.error("Caught exception while validating {} field {} dataType {}", fieldType, fieldName, dataType, e);
            isValid = false;
        }
    }
    return isValid;
}
Also used : DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) IOException(java.io.IOException) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType)

Example 2 with FieldType

use of com.linkedin.pinot.common.data.FieldSpec.FieldType in project pinot by linkedin.

the class FileBasedSentineTest method setup.

@BeforeClass
public void setup() throws Exception {
    url = new URL("http://localhost:" + FileBasedServerBrokerStarters.BROKER_CLIENT_PORT + "/query");
    // lets generate data
    final String[] columns = { "dimention1", "dimention2", "dimention3", "dimention4", "metric1", "daysSinceEpoch" };
    final Map<String, DataType> dataTypes = new HashMap<String, FieldSpec.DataType>();
    final Map<String, FieldType> fieldTypes = new HashMap<String, FieldType>();
    final Map<String, TimeUnit> timeUnits = new HashMap<String, TimeUnit>();
    final Map<String, Integer> cardinality = new HashMap<String, Integer>();
    // Crate empty range map as the signature of DataGeneratorSpec has changed, and this test does not
    // use metric/time as fieldType.
    final Map<String, IntRange> range = new HashMap<String, IntRange>();
    for (final String col : columns) {
        if (col.equals("dimention1")) {
            dataTypes.put(col, DataType.STRING);
            cardinality.put(col, 1000);
        } else {
            dataTypes.put(col, DataType.INT);
            cardinality.put(col, 1000);
        }
        fieldTypes.put(col, FieldType.DIMENSION);
    }
    if (avroDataDir.exists()) {
        FileUtils.deleteDirectory(avroDataDir);
    }
    final DataGeneratorSpec spec = new DataGeneratorSpec(Arrays.asList(columns), cardinality, range, dataTypes, fieldTypes, timeUnits, FileFormat.AVRO, avroDataDir.getAbsolutePath(), true);
    generator = new DataGenerator();
    generator.init(spec);
    generator.generate(100000L, 2);
    // lets make segments now
    final File bootstrapDir = new File(FileBasedServerBrokerStarters.SERVER_BOOTSTRAP_DIR);
    if (bootstrapDir.exists()) {
        FileUtils.deleteDirectory(bootstrapDir);
    }
    bootstrapDir.mkdir();
    int counter = 0;
    for (final File avro : avroDataDir.listFiles()) {
        for (final String table : FileBasedServerBrokerStarters.TABLE_NAMES) {
            final SegmentGeneratorConfig genConfig = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(avro, new File(bootstrapDir, "segment-" + counter), "daysSinceEpoch", TimeUnit.DAYS, table);
            final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
            driver.init(genConfig);
            driver.build();
            counter++;
        }
    }
    // lets start the server and the broker now
    starter = new FileBasedServerBrokerStarters();
    starter.startAll();
    // pick some values from here if you need to use it for running filter queries
    final JSONObject selectionRequestResponse = postQuery("select * from 'table1' limit 100", "http://localhost:" + FileBasedServerBrokerStarters.BROKER_CLIENT_PORT);
//    System.out.println(selectionRequestResponse.toString(1));
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) HashMap(java.util.HashMap) IntRange(org.apache.commons.lang.math.IntRange) URL(java.net.URL) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType) JSONObject(org.json.JSONObject) DataGenerator(com.linkedin.pinot.tools.data.generator.DataGenerator) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) TimeUnit(java.util.concurrent.TimeUnit) File(java.io.File) DataGeneratorSpec(com.linkedin.pinot.tools.data.generator.DataGeneratorSpec) BeforeClass(org.testng.annotations.BeforeClass)

Example 3 with FieldType

use of com.linkedin.pinot.common.data.FieldSpec.FieldType in project pinot by linkedin.

the class DataGenerator method buildSpec.

private FieldSpec buildSpec(DataGeneratorSpec genSpec, String column) {
    DataType dataType = genSpec.getDataTypesMap().get(column);
    FieldType fieldType = genSpec.getFieldTypesMap().get(column);
    FieldSpec spec;
    switch(fieldType) {
        case DIMENSION:
            spec = new DimensionFieldSpec();
            break;
        case METRIC:
            spec = new MetricFieldSpec();
            break;
        case TIME:
            spec = new TimeFieldSpec(column, dataType, genSpec.getTimeUnitMap().get(column));
            break;
        default:
            throw new RuntimeException("Invalid Field type.");
    }
    spec.setName(column);
    spec.setDataType(dataType);
    spec.setSingleValueField(true);
    return spec;
}
Also used : TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType)

Example 4 with FieldType

use of com.linkedin.pinot.common.data.FieldSpec.FieldType in project pinot by linkedin.

the class Schema method addField.

public void addField(@Nonnull FieldSpec fieldSpec) {
    Preconditions.checkNotNull(fieldSpec);
    String columnName = fieldSpec.getName();
    Preconditions.checkNotNull(columnName);
    Preconditions.checkState(!fieldSpecMap.containsKey(columnName), "Field spec already exists for column: " + columnName);
    FieldType fieldType = fieldSpec.getFieldType();
    switch(fieldType) {
        case DIMENSION:
            dimensions.add(columnName);
            dimensionFieldSpecs.add((DimensionFieldSpec) fieldSpec);
            break;
        case METRIC:
            metrics.add(columnName);
            metricFieldSpecs.add((MetricFieldSpec) fieldSpec);
            break;
        case TIME:
            Preconditions.checkState(timeFieldSpec == null, "Already defined the time column: " + timeFieldSpec);
            timeFieldSpec = (TimeFieldSpec) fieldSpec;
            break;
        default:
            throw new UnsupportedOperationException("Unsupported field type: " + fieldType);
    }
    fieldSpecMap.put(columnName, fieldSpec);
}
Also used : FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType)

Example 5 with FieldType

use of com.linkedin.pinot.common.data.FieldSpec.FieldType in project pinot by linkedin.

the class PinotSegmentRecordReader method getSchema.

@Override
public Schema getSchema() {
    Schema schema = new Schema();
    schema.setSchemaName(segmentMetadata.getName());
    for (String column : columns) {
        ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
        String columnName = columnMetadata.getColumnName();
        DataType dataType = columnMetadata.getDataType();
        FieldType fieldType = columnMetadata.getFieldType();
        FieldSpec fieldSpec = null;
        switch(fieldType) {
            case DIMENSION:
                boolean isSingleValue = columnMetadata.isSingleValue();
                fieldSpec = new DimensionFieldSpec(columnName, dataType, isSingleValue);
                break;
            case METRIC:
                fieldSpec = new MetricFieldSpec(columnName, dataType);
                break;
            case TIME:
                TimeUnit timeType = columnMetadata.getTimeUnit();
                TimeGranularitySpec incomingGranularitySpec = new TimeGranularitySpec(dataType, timeType, columnName);
                fieldSpec = new TimeFieldSpec(incomingGranularitySpec);
                break;
            default:
                break;
        }
        schema.addField(fieldSpec);
    }
    return schema;
}
Also used : TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) TimeUnit(java.util.concurrent.TimeUnit) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType)

Aggregations

FieldType (com.linkedin.pinot.common.data.FieldSpec.FieldType)8 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)5 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)5 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)3 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)3 Schema (com.linkedin.pinot.common.data.Schema)3 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)3 File (java.io.File)3 HashMap (java.util.HashMap)3 TimeUnit (java.util.concurrent.TimeUnit)3 TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)2 DataGenerator (com.linkedin.pinot.tools.data.generator.DataGenerator)2 DataGeneratorSpec (com.linkedin.pinot.tools.data.generator.DataGeneratorSpec)2 IntRange (org.apache.commons.lang.math.IntRange)2 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)1 SegmentIndexCreationDriver (com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver)1 ColumnMetadata (com.linkedin.pinot.core.segment.index.ColumnMetadata)1 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 URL (java.net.URL)1