Search in sources :

Example 11 with DataType

use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.

the class ThirdeyeAvroUtils method constructAvroSchemaFromPinotSchema.

/**
   * Constructs an avro schema from a pinot schema
   * @param schema
   * @return
   */
public static Schema constructAvroSchemaFromPinotSchema(com.linkedin.pinot.common.data.Schema schema) {
    Schema avroSchema = null;
    RecordBuilder<Schema> recordBuilder = SchemaBuilder.record("record");
    FieldAssembler<Schema> fieldAssembler = recordBuilder.fields();
    for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
        String fieldName = fieldSpec.getName();
        DataType dataType = fieldSpec.getDataType();
        BaseFieldTypeBuilder<Schema> baseFieldTypeBuilder = fieldAssembler.name(fieldName).type().nullable();
        switch(dataType) {
            case BOOLEAN:
                fieldAssembler = baseFieldTypeBuilder.booleanType().noDefault();
                break;
            case DOUBLE:
                fieldAssembler = baseFieldTypeBuilder.doubleType().noDefault();
                break;
            case FLOAT:
                fieldAssembler = baseFieldTypeBuilder.floatType().noDefault();
                break;
            case INT:
                fieldAssembler = baseFieldTypeBuilder.intType().noDefault();
                break;
            case LONG:
                fieldAssembler = baseFieldTypeBuilder.longType().noDefault();
                break;
            case STRING:
                fieldAssembler = baseFieldTypeBuilder.stringType().noDefault();
                break;
            default:
                break;
        }
    }
    avroSchema = fieldAssembler.endRecord();
    LOGGER.info("Avro Schema {}", avroSchema.toString(true));
    return avroSchema;
}
Also used : Schema(org.apache.avro.Schema) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) FieldSpec(com.linkedin.pinot.common.data.FieldSpec)

Example 12 with DataType

use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.

the class PinotSegmentRecordReader method getSchema.

@Override
public Schema getSchema() {
    Schema schema = new Schema();
    schema.setSchemaName(segmentMetadata.getName());
    for (String column : columns) {
        ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
        String columnName = columnMetadata.getColumnName();
        DataType dataType = columnMetadata.getDataType();
        FieldType fieldType = columnMetadata.getFieldType();
        FieldSpec fieldSpec = null;
        switch(fieldType) {
            case DIMENSION:
                boolean isSingleValue = columnMetadata.isSingleValue();
                fieldSpec = new DimensionFieldSpec(columnName, dataType, isSingleValue);
                break;
            case METRIC:
                fieldSpec = new MetricFieldSpec(columnName, dataType);
                break;
            case TIME:
                TimeUnit timeType = columnMetadata.getTimeUnit();
                TimeGranularitySpec incomingGranularitySpec = new TimeGranularitySpec(dataType, timeType, columnName);
                fieldSpec = new TimeFieldSpec(incomingGranularitySpec);
                break;
            default:
                break;
        }
        schema.addField(fieldSpec);
    }
    return schema;
}
Also used : TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) TimeUnit(java.util.concurrent.TimeUnit) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType)

Example 13 with DataType

use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.

the class SegmentTestUtils method getColumnType.

public static DataType getColumnType(Field field) {
    org.apache.avro.Schema fieldSchema = field.schema();
    fieldSchema = extractSchemaFromUnionIfNeeded(fieldSchema);
    final Type type = fieldSchema.getType();
    if (type == Type.ARRAY) {
        org.apache.avro.Schema elementSchema = extractSchemaFromUnionIfNeeded(fieldSchema.getElementType());
        if (elementSchema.getType() == Type.RECORD) {
            if (elementSchema.getFields().size() == 1) {
                elementSchema = elementSchema.getFields().get(0).schema();
            } else {
                throw new RuntimeException("More than one schema in Multi-value column!");
            }
            elementSchema = extractSchemaFromUnionIfNeeded(elementSchema);
        }
        return DataType.valueOf(elementSchema.getType());
    } else {
        return DataType.valueOf(type);
    }
}
Also used : Type(org.apache.avro.Schema.Type) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType)

Example 14 with DataType

use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.

the class DataTableSerDeTest method testAllDataTypes.

@Test
public void testAllDataTypes() throws IOException {
    DataType[] columnTypes = DataType.values();
    int numColumns = columnTypes.length;
    String[] columnNames = new String[numColumns];
    for (int i = 0; i < numColumns; i++) {
        columnNames[i] = columnTypes[i].name();
    }
    DataSchema dataSchema = new DataSchema(columnNames, columnTypes);
    DataTableBuilder dataTableBuilder = new DataTableBuilder(dataSchema);
    boolean[] booleans = new boolean[NUM_ROWS];
    byte[] bytes = new byte[NUM_ROWS];
    char[] chars = new char[NUM_ROWS];
    short[] shorts = new short[NUM_ROWS];
    int[] ints = new int[NUM_ROWS];
    long[] longs = new long[NUM_ROWS];
    float[] floats = new float[NUM_ROWS];
    double[] doubles = new double[NUM_ROWS];
    String[] strings = new String[NUM_ROWS];
    Object[] objects = new Object[NUM_ROWS];
    byte[][] byteArrays = new byte[NUM_ROWS][];
    char[][] charArrays = new char[NUM_ROWS][];
    short[][] shortArrays = new short[NUM_ROWS][];
    int[][] intArrays = new int[NUM_ROWS][];
    long[][] longArrays = new long[NUM_ROWS][];
    float[][] floatArrays = new float[NUM_ROWS][];
    double[][] doubleArrays = new double[NUM_ROWS][];
    String[][] stringArrays = new String[NUM_ROWS][];
    for (int rowId = 0; rowId < NUM_ROWS; rowId++) {
        dataTableBuilder.startRow();
        for (int colId = 0; colId < numColumns; colId++) {
            switch(columnTypes[colId]) {
                case BOOLEAN:
                    booleans[rowId] = RANDOM.nextBoolean();
                    dataTableBuilder.setColumn(colId, booleans[rowId]);
                    break;
                case BYTE:
                    bytes[rowId] = (byte) RANDOM.nextInt();
                    dataTableBuilder.setColumn(colId, bytes[rowId]);
                    break;
                case CHAR:
                    chars[rowId] = (char) RANDOM.nextInt();
                    dataTableBuilder.setColumn(colId, chars[rowId]);
                    break;
                case SHORT:
                    shorts[rowId] = (short) RANDOM.nextInt();
                    dataTableBuilder.setColumn(colId, shorts[rowId]);
                    break;
                case INT:
                    ints[rowId] = RANDOM.nextInt();
                    dataTableBuilder.setColumn(colId, ints[rowId]);
                    break;
                case LONG:
                    longs[rowId] = RANDOM.nextLong();
                    dataTableBuilder.setColumn(colId, longs[rowId]);
                    break;
                case FLOAT:
                    floats[rowId] = RANDOM.nextFloat();
                    dataTableBuilder.setColumn(colId, floats[rowId]);
                    break;
                case DOUBLE:
                    doubles[rowId] = RANDOM.nextDouble();
                    dataTableBuilder.setColumn(colId, doubles[rowId]);
                    break;
                case STRING:
                    strings[rowId] = RandomStringUtils.random(RANDOM.nextInt(20));
                    dataTableBuilder.setColumn(colId, strings[rowId]);
                    break;
                // Just test Double here, all object types will be covered in ObjectCustomSerDeTest.
                case OBJECT:
                    objects[rowId] = RANDOM.nextDouble();
                    dataTableBuilder.setColumn(colId, objects[rowId]);
                    break;
                case BYTE_ARRAY:
                    int length = RANDOM.nextInt(20);
                    byte[] byteArray = new byte[length];
                    for (int i = 0; i < length; i++) {
                        byteArray[i] = (byte) RANDOM.nextInt();
                    }
                    byteArrays[rowId] = byteArray;
                    dataTableBuilder.setColumn(colId, byteArray);
                    break;
                case CHAR_ARRAY:
                    length = RANDOM.nextInt(20);
                    char[] charArray = new char[length];
                    for (int i = 0; i < length; i++) {
                        charArray[i] = (char) RANDOM.nextInt();
                    }
                    charArrays[rowId] = charArray;
                    dataTableBuilder.setColumn(colId, charArray);
                    break;
                case SHORT_ARRAY:
                    length = RANDOM.nextInt(20);
                    short[] shortArray = new short[length];
                    for (int i = 0; i < length; i++) {
                        shortArray[i] = (short) RANDOM.nextInt();
                    }
                    shortArrays[rowId] = shortArray;
                    dataTableBuilder.setColumn(colId, shortArray);
                    break;
                case INT_ARRAY:
                    length = RANDOM.nextInt(20);
                    int[] intArray = new int[length];
                    for (int i = 0; i < length; i++) {
                        intArray[i] = RANDOM.nextInt();
                    }
                    intArrays[rowId] = intArray;
                    dataTableBuilder.setColumn(colId, intArray);
                    break;
                case LONG_ARRAY:
                    length = RANDOM.nextInt(20);
                    long[] longArray = new long[length];
                    for (int i = 0; i < length; i++) {
                        longArray[i] = RANDOM.nextLong();
                    }
                    longArrays[rowId] = longArray;
                    dataTableBuilder.setColumn(colId, longArray);
                    break;
                case FLOAT_ARRAY:
                    length = RANDOM.nextInt(20);
                    float[] floatArray = new float[length];
                    for (int i = 0; i < length; i++) {
                        floatArray[i] = RANDOM.nextFloat();
                    }
                    floatArrays[rowId] = floatArray;
                    dataTableBuilder.setColumn(colId, floatArray);
                    break;
                case DOUBLE_ARRAY:
                    length = RANDOM.nextInt(20);
                    double[] doubleArray = new double[length];
                    for (int i = 0; i < length; i++) {
                        doubleArray[i] = RANDOM.nextDouble();
                    }
                    doubleArrays[rowId] = doubleArray;
                    dataTableBuilder.setColumn(colId, doubleArray);
                    break;
                case STRING_ARRAY:
                    length = RANDOM.nextInt(20);
                    String[] stringArray = new String[length];
                    for (int i = 0; i < length; i++) {
                        stringArray[i] = RandomStringUtils.random(RANDOM.nextInt(20));
                    }
                    stringArrays[rowId] = stringArray;
                    dataTableBuilder.setColumn(colId, stringArray);
                    break;
            }
        }
        dataTableBuilder.finishRow();
    }
    DataTable dataTable = dataTableBuilder.build();
    DataTable newDataTable = DataTableFactory.getDataTable(dataTable.toBytes());
    Assert.assertEquals(newDataTable.getDataSchema(), dataSchema, ERROR_MESSAGE);
    Assert.assertEquals(newDataTable.getNumberOfRows(), NUM_ROWS, ERROR_MESSAGE);
    for (int rowId = 0; rowId < NUM_ROWS; rowId++) {
        for (int colId = 0; colId < numColumns; colId++) {
            switch(columnTypes[colId]) {
                case BOOLEAN:
                    Assert.assertEquals(newDataTable.getBoolean(rowId, colId), booleans[rowId], ERROR_MESSAGE);
                    break;
                case BYTE:
                    Assert.assertEquals(newDataTable.getByte(rowId, colId), bytes[rowId], ERROR_MESSAGE);
                    break;
                case CHAR:
                    Assert.assertEquals(newDataTable.getChar(rowId, colId), chars[rowId], ERROR_MESSAGE);
                    break;
                case SHORT:
                    Assert.assertEquals(newDataTable.getShort(rowId, colId), shorts[rowId], ERROR_MESSAGE);
                    break;
                case INT:
                    Assert.assertEquals(newDataTable.getInt(rowId, colId), ints[rowId], ERROR_MESSAGE);
                    break;
                case LONG:
                    Assert.assertEquals(newDataTable.getLong(rowId, colId), longs[rowId], ERROR_MESSAGE);
                    break;
                case FLOAT:
                    Assert.assertEquals(newDataTable.getFloat(rowId, colId), floats[rowId], ERROR_MESSAGE);
                    break;
                case DOUBLE:
                    Assert.assertEquals(newDataTable.getDouble(rowId, colId), doubles[rowId], ERROR_MESSAGE);
                    break;
                case STRING:
                    Assert.assertEquals(newDataTable.getString(rowId, colId), strings[rowId], ERROR_MESSAGE);
                    break;
                case OBJECT:
                    Assert.assertEquals(newDataTable.getObject(rowId, colId), objects[rowId], ERROR_MESSAGE);
                    break;
                case BYTE_ARRAY:
                    Assert.assertTrue(Arrays.equals(newDataTable.getByteArray(rowId, colId), byteArrays[rowId]), ERROR_MESSAGE);
                    break;
                case CHAR_ARRAY:
                    Assert.assertTrue(Arrays.equals(newDataTable.getCharArray(rowId, colId), charArrays[rowId]), ERROR_MESSAGE);
                    break;
                case SHORT_ARRAY:
                    Assert.assertTrue(Arrays.equals(newDataTable.getShortArray(rowId, colId), shortArrays[rowId]), ERROR_MESSAGE);
                    break;
                case INT_ARRAY:
                    Assert.assertTrue(Arrays.equals(newDataTable.getIntArray(rowId, colId), intArrays[rowId]), ERROR_MESSAGE);
                    break;
                case LONG_ARRAY:
                    Assert.assertTrue(Arrays.equals(newDataTable.getLongArray(rowId, colId), longArrays[rowId]), ERROR_MESSAGE);
                    break;
                case FLOAT_ARRAY:
                    Assert.assertTrue(Arrays.equals(newDataTable.getFloatArray(rowId, colId), floatArrays[rowId]), ERROR_MESSAGE);
                    break;
                case DOUBLE_ARRAY:
                    Assert.assertTrue(Arrays.equals(newDataTable.getDoubleArray(rowId, colId), doubleArrays[rowId]), ERROR_MESSAGE);
                    break;
                case STRING_ARRAY:
                    Assert.assertTrue(Arrays.equals(newDataTable.getStringArray(rowId, colId), stringArrays[rowId]), ERROR_MESSAGE);
                    break;
            }
        }
    }
}
Also used : DataTable(com.linkedin.pinot.common.utils.DataTable) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) DataSchema(com.linkedin.pinot.common.utils.DataSchema) Test(org.testng.annotations.Test)

Example 15 with DataType

use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.

the class GenerateDataCommand method execute.

@Override
public boolean execute() throws Exception {
    LOGGER.info("Executing command: " + toString());
    if ((_numRecords < 0) || (_numFiles < 0)) {
        throw new RuntimeException("Cannot generate negative number of records/files.");
    }
    Schema schema = Schema.fromFile(new File(_schemaFile));
    List<String> columns = new LinkedList<String>();
    final HashMap<String, DataType> dataTypes = new HashMap<String, DataType>();
    final HashMap<String, FieldType> fieldTypes = new HashMap<String, FieldType>();
    final HashMap<String, TimeUnit> timeUnits = new HashMap<String, TimeUnit>();
    final HashMap<String, Integer> cardinality = new HashMap<String, Integer>();
    final HashMap<String, IntRange> range = new HashMap<String, IntRange>();
    buildCardinalityRangeMaps(_schemaAnnFile, cardinality, range);
    final DataGeneratorSpec spec = buildDataGeneratorSpec(schema, columns, dataTypes, fieldTypes, timeUnits, cardinality, range);
    final DataGenerator gen = new DataGenerator();
    gen.init(spec);
    gen.generate(_numRecords, _numFiles);
    return true;
}
Also used : HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) IntRange(org.apache.commons.lang.math.IntRange) LinkedList(java.util.LinkedList) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType) DataGenerator(com.linkedin.pinot.tools.data.generator.DataGenerator) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) TimeUnit(java.util.concurrent.TimeUnit) File(java.io.File) DataGeneratorSpec(com.linkedin.pinot.tools.data.generator.DataGeneratorSpec)

Aggregations

DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)16 FieldType (com.linkedin.pinot.common.data.FieldSpec.FieldType)6 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)4 TimeUnit (java.util.concurrent.TimeUnit)4 DataSchema (com.linkedin.pinot.common.utils.DataSchema)3 File (java.io.File)3 Nonnull (javax.annotation.Nonnull)3 Type (org.apache.avro.Schema.Type)3 IntRange (org.apache.commons.lang.math.IntRange)3 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)2 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)2 Schema (com.linkedin.pinot.common.data.Schema)2 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)2 DataGenerator (com.linkedin.pinot.tools.data.generator.DataGenerator)2 DataGeneratorSpec (com.linkedin.pinot.tools.data.generator.DataGeneratorSpec)2 Serializable (java.io.Serializable)2 HashMap (java.util.HashMap)2 Schema (org.apache.avro.Schema)2 Test (org.testng.annotations.Test)2 DerivedMetricType (com.linkedin.pinot.common.data.MetricFieldSpec.DerivedMetricType)1