Search in sources :

Example 1 with DataType

use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.

the class DataGenerator method buildSpec.

private FieldSpec buildSpec(DataGeneratorSpec genSpec, String column) {
    DataType dataType = genSpec.getDataTypesMap().get(column);
    FieldType fieldType = genSpec.getFieldTypesMap().get(column);
    FieldSpec spec;
    switch(fieldType) {
        case DIMENSION:
            spec = new DimensionFieldSpec();
            break;
        case METRIC:
            spec = new MetricFieldSpec();
            break;
        case TIME:
            spec = new TimeFieldSpec(column, dataType, genSpec.getTimeUnitMap().get(column));
            break;
        default:
            throw new RuntimeException("Invalid Field type.");
    }
    spec.setName(column);
    spec.setDataType(dataType);
    spec.setSingleValueField(true);
    return spec;
}
Also used : TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType)

Example 2 with DataType

use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.

the class Schema method validate.

/**
   * Validates a pinot schema. The following validations are performed:
   * <p>- For dimension and time fields, support {@link DataType}: INT, LONG, FLOAT, DOUBLE, STRING.
   * <p>- For metric fields (non-derived), support {@link DataType}: INT, LONG, FLOAT, DOUBLE.
   * <p>- All fields must have a default null value.
   *
   * @param ctxLogger logger used to log the message (if null, the current class logger is used).
   * @return whether schema is valid.
   */
public boolean validate(Logger ctxLogger) {
    if (ctxLogger == null) {
        ctxLogger = LOGGER;
    }
    boolean isValid = true;
    // Log ALL the schema errors that may be present.
    for (FieldSpec fieldSpec : fieldSpecMap.values()) {
        FieldType fieldType = fieldSpec.getFieldType();
        DataType dataType = fieldSpec.getDataType();
        String fieldName = fieldSpec.getName();
        try {
            switch(fieldType) {
                case DIMENSION:
                case TIME:
                    switch(dataType) {
                        case INT:
                        case LONG:
                        case FLOAT:
                        case DOUBLE:
                        case STRING:
                            // Check getDefaultNullValue() does not throw exception.
                            fieldSpec.getDefaultNullValue();
                            break;
                        default:
                            ctxLogger.error("Unsupported data type: {} in dimension/time field: {}", dataType, fieldName);
                            isValid = false;
                            break;
                    }
                    break;
                case METRIC:
                    switch(dataType) {
                        case INT:
                        case LONG:
                        case FLOAT:
                        case DOUBLE:
                            // Check getDefaultNullValue() does not throw exception.
                            fieldSpec.getDefaultNullValue();
                            break;
                        default:
                            ctxLogger.error("Unsupported data type: {} in metric field: {}", dataType, fieldName);
                            isValid = false;
                            break;
                    }
                    break;
                default:
                    ctxLogger.error("Unsupported field type: {} for field: {}", dataType, fieldName);
                    isValid = false;
                    break;
            }
        } catch (Exception e) {
            ctxLogger.error("Caught exception while validating {} field {} dataType {}", fieldType, fieldName, dataType, e);
            isValid = false;
        }
    }
    return isValid;
}
Also used : DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) IOException(java.io.IOException) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType)

Example 3 with DataType

use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.

the class FieldSpecTest method testTimeFieldSpecConstructor.

/**
   * Test {@link TimeFieldSpec} constructors.
   */
@Test
public void testTimeFieldSpecConstructor() {
    String incomingName = "incoming";
    DataType incomingDataType = DataType.LONG;
    TimeUnit incomingTimeUnit = TimeUnit.HOURS;
    int incomingTimeUnitSize = 1;
    TimeGranularitySpec incomingTimeGranularitySpec = new TimeGranularitySpec(incomingDataType, incomingTimeUnitSize, incomingTimeUnit, incomingName);
    String outgoingName = "outgoing";
    DataType outgoingDataType = DataType.INT;
    TimeUnit outgoingTimeUnit = TimeUnit.DAYS;
    int outgoingTimeUnitSize = 1;
    TimeGranularitySpec outgoingTimeGranularitySpec = new TimeGranularitySpec(outgoingDataType, outgoingTimeUnitSize, outgoingTimeUnit, outgoingName);
    int defaultNullValue = 17050;
    TimeFieldSpec timeFieldSpec1 = new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnit);
    TimeFieldSpec timeFieldSpec2 = new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnit, defaultNullValue);
    TimeFieldSpec timeFieldSpec3 = new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnit, outgoingName, outgoingDataType, outgoingTimeUnit);
    TimeFieldSpec timeFieldSpec4 = new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnit, outgoingName, outgoingDataType, outgoingTimeUnit, defaultNullValue);
    TimeFieldSpec timeFieldSpec5 = new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnitSize, incomingTimeUnit);
    TimeFieldSpec timeFieldSpec6 = new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnitSize, incomingTimeUnit, defaultNullValue);
    TimeFieldSpec timeFieldSpec7 = new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnitSize, incomingTimeUnit, outgoingName, outgoingDataType, outgoingTimeUnitSize, outgoingTimeUnit);
    TimeFieldSpec timeFieldSpec8 = new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnitSize, incomingTimeUnit, outgoingName, outgoingDataType, outgoingTimeUnitSize, outgoingTimeUnit, defaultNullValue);
    TimeFieldSpec timeFieldSpec9 = new TimeFieldSpec(incomingTimeGranularitySpec);
    TimeFieldSpec timeFieldSpec10 = new TimeFieldSpec(incomingTimeGranularitySpec, defaultNullValue);
    TimeFieldSpec timeFieldSpec11 = new TimeFieldSpec(incomingTimeGranularitySpec, outgoingTimeGranularitySpec);
    TimeFieldSpec timeFieldSpec12 = new TimeFieldSpec(incomingTimeGranularitySpec, outgoingTimeGranularitySpec, defaultNullValue);
    Assert.assertEquals(timeFieldSpec1, timeFieldSpec5);
    Assert.assertEquals(timeFieldSpec1, timeFieldSpec9);
    Assert.assertEquals(timeFieldSpec2, timeFieldSpec6);
    Assert.assertEquals(timeFieldSpec2, timeFieldSpec10);
    Assert.assertEquals(timeFieldSpec3, timeFieldSpec7);
    Assert.assertEquals(timeFieldSpec3, timeFieldSpec11);
    Assert.assertEquals(timeFieldSpec4, timeFieldSpec8);
    Assert.assertEquals(timeFieldSpec4, timeFieldSpec12);
    // Before adding default null value.
    Assert.assertFalse(timeFieldSpec1.equals(timeFieldSpec2));
    Assert.assertFalse(timeFieldSpec3.equals(timeFieldSpec4));
    Assert.assertFalse(timeFieldSpec5.equals(timeFieldSpec6));
    Assert.assertFalse(timeFieldSpec7.equals(timeFieldSpec8));
    Assert.assertFalse(timeFieldSpec9.equals(timeFieldSpec10));
    Assert.assertFalse(timeFieldSpec11.equals(timeFieldSpec12));
    // After adding default null value.
    timeFieldSpec1.setDefaultNullValue(defaultNullValue);
    timeFieldSpec3.setDefaultNullValue(defaultNullValue);
    timeFieldSpec5.setDefaultNullValue(defaultNullValue);
    timeFieldSpec7.setDefaultNullValue(defaultNullValue);
    timeFieldSpec9.setDefaultNullValue(defaultNullValue);
    timeFieldSpec11.setDefaultNullValue(defaultNullValue);
    Assert.assertEquals(timeFieldSpec1, timeFieldSpec2);
    Assert.assertEquals(timeFieldSpec3, timeFieldSpec4);
    Assert.assertEquals(timeFieldSpec5, timeFieldSpec6);
    Assert.assertEquals(timeFieldSpec7, timeFieldSpec8);
    Assert.assertEquals(timeFieldSpec9, timeFieldSpec10);
    Assert.assertEquals(timeFieldSpec11, timeFieldSpec12);
}
Also used : DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) TimeUnit(java.util.concurrent.TimeUnit) Test(org.testng.annotations.Test)

Example 4 with DataType

use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.

the class AvroQueryGenerator method getColumnType.

public static DataType getColumnType(Field field) {
    org.apache.avro.Schema fieldSchema = field.schema();
    fieldSchema = extractSchemaFromUnionIfNeeded(fieldSchema);
    final Type type = fieldSchema.getType();
    if (type == Type.ARRAY) {
        org.apache.avro.Schema elementSchema = extractSchemaFromUnionIfNeeded(fieldSchema.getElementType());
        if (elementSchema.getType() == Type.RECORD) {
            if (elementSchema.getFields().size() == 1) {
                elementSchema = elementSchema.getFields().get(0).schema();
            } else {
                throw new RuntimeException("More than one schema in Multi-value column!");
            }
            elementSchema = extractSchemaFromUnionIfNeeded(elementSchema);
        }
        return DataType.valueOf(elementSchema.getType());
    } else {
        return DataType.valueOf(type);
    }
}
Also used : DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) Type(org.apache.avro.Schema.Type) Schema(org.apache.avro.Schema)

Example 5 with DataType

use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.

the class AvroRecordReader method getColumnType.

public static DataType getColumnType(Field field) {
    org.apache.avro.Schema fieldSchema = field.schema();
    fieldSchema = extractSchemaFromUnionIfNeeded(fieldSchema);
    final Type type = fieldSchema.getType();
    if (type == Type.ARRAY) {
        org.apache.avro.Schema elementSchema = extractSchemaFromUnionIfNeeded(fieldSchema.getElementType());
        if (elementSchema.getType() == Type.RECORD) {
            if (elementSchema.getFields().size() == 1) {
                elementSchema = elementSchema.getFields().get(0).schema();
            } else {
                throw new RuntimeException("More than one schema in Multi-value column!");
            }
            elementSchema = extractSchemaFromUnionIfNeeded(elementSchema);
        }
        return DataType.valueOf(elementSchema.getType());
    } else {
        return DataType.valueOf(type);
    }
}
Also used : Type(org.apache.avro.Schema.Type) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType)

Aggregations

DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)16 FieldType (com.linkedin.pinot.common.data.FieldSpec.FieldType)6 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)4 TimeUnit (java.util.concurrent.TimeUnit)4 DataSchema (com.linkedin.pinot.common.utils.DataSchema)3 File (java.io.File)3 Nonnull (javax.annotation.Nonnull)3 Type (org.apache.avro.Schema.Type)3 IntRange (org.apache.commons.lang.math.IntRange)3 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)2 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)2 Schema (com.linkedin.pinot.common.data.Schema)2 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)2 DataGenerator (com.linkedin.pinot.tools.data.generator.DataGenerator)2 DataGeneratorSpec (com.linkedin.pinot.tools.data.generator.DataGeneratorSpec)2 Serializable (java.io.Serializable)2 HashMap (java.util.HashMap)2 Schema (org.apache.avro.Schema)2 Test (org.testng.annotations.Test)2 DerivedMetricType (com.linkedin.pinot.common.data.MetricFieldSpec.DerivedMetricType)1