use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.
the class ThirdeyeAvroUtils method constructAvroSchemaFromPinotSchema.
/**
* Constructs an avro schema from a pinot schema
* @param schema
* @return
*/
public static Schema constructAvroSchemaFromPinotSchema(com.linkedin.pinot.common.data.Schema schema) {
Schema avroSchema = null;
RecordBuilder<Schema> recordBuilder = SchemaBuilder.record("record");
FieldAssembler<Schema> fieldAssembler = recordBuilder.fields();
for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
String fieldName = fieldSpec.getName();
DataType dataType = fieldSpec.getDataType();
BaseFieldTypeBuilder<Schema> baseFieldTypeBuilder = fieldAssembler.name(fieldName).type().nullable();
switch(dataType) {
case BOOLEAN:
fieldAssembler = baseFieldTypeBuilder.booleanType().noDefault();
break;
case DOUBLE:
fieldAssembler = baseFieldTypeBuilder.doubleType().noDefault();
break;
case FLOAT:
fieldAssembler = baseFieldTypeBuilder.floatType().noDefault();
break;
case INT:
fieldAssembler = baseFieldTypeBuilder.intType().noDefault();
break;
case LONG:
fieldAssembler = baseFieldTypeBuilder.longType().noDefault();
break;
case STRING:
fieldAssembler = baseFieldTypeBuilder.stringType().noDefault();
break;
default:
break;
}
}
avroSchema = fieldAssembler.endRecord();
LOGGER.info("Avro Schema {}", avroSchema.toString(true));
return avroSchema;
}
use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.
the class PinotSegmentRecordReader method getSchema.
@Override
public Schema getSchema() {
Schema schema = new Schema();
schema.setSchemaName(segmentMetadata.getName());
for (String column : columns) {
ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
String columnName = columnMetadata.getColumnName();
DataType dataType = columnMetadata.getDataType();
FieldType fieldType = columnMetadata.getFieldType();
FieldSpec fieldSpec = null;
switch(fieldType) {
case DIMENSION:
boolean isSingleValue = columnMetadata.isSingleValue();
fieldSpec = new DimensionFieldSpec(columnName, dataType, isSingleValue);
break;
case METRIC:
fieldSpec = new MetricFieldSpec(columnName, dataType);
break;
case TIME:
TimeUnit timeType = columnMetadata.getTimeUnit();
TimeGranularitySpec incomingGranularitySpec = new TimeGranularitySpec(dataType, timeType, columnName);
fieldSpec = new TimeFieldSpec(incomingGranularitySpec);
break;
default:
break;
}
schema.addField(fieldSpec);
}
return schema;
}
use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.
the class SegmentTestUtils method getColumnType.
public static DataType getColumnType(Field field) {
org.apache.avro.Schema fieldSchema = field.schema();
fieldSchema = extractSchemaFromUnionIfNeeded(fieldSchema);
final Type type = fieldSchema.getType();
if (type == Type.ARRAY) {
org.apache.avro.Schema elementSchema = extractSchemaFromUnionIfNeeded(fieldSchema.getElementType());
if (elementSchema.getType() == Type.RECORD) {
if (elementSchema.getFields().size() == 1) {
elementSchema = elementSchema.getFields().get(0).schema();
} else {
throw new RuntimeException("More than one schema in Multi-value column!");
}
elementSchema = extractSchemaFromUnionIfNeeded(elementSchema);
}
return DataType.valueOf(elementSchema.getType());
} else {
return DataType.valueOf(type);
}
}
use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.
the class DataTableSerDeTest method testAllDataTypes.
@Test
public void testAllDataTypes() throws IOException {
DataType[] columnTypes = DataType.values();
int numColumns = columnTypes.length;
String[] columnNames = new String[numColumns];
for (int i = 0; i < numColumns; i++) {
columnNames[i] = columnTypes[i].name();
}
DataSchema dataSchema = new DataSchema(columnNames, columnTypes);
DataTableBuilder dataTableBuilder = new DataTableBuilder(dataSchema);
boolean[] booleans = new boolean[NUM_ROWS];
byte[] bytes = new byte[NUM_ROWS];
char[] chars = new char[NUM_ROWS];
short[] shorts = new short[NUM_ROWS];
int[] ints = new int[NUM_ROWS];
long[] longs = new long[NUM_ROWS];
float[] floats = new float[NUM_ROWS];
double[] doubles = new double[NUM_ROWS];
String[] strings = new String[NUM_ROWS];
Object[] objects = new Object[NUM_ROWS];
byte[][] byteArrays = new byte[NUM_ROWS][];
char[][] charArrays = new char[NUM_ROWS][];
short[][] shortArrays = new short[NUM_ROWS][];
int[][] intArrays = new int[NUM_ROWS][];
long[][] longArrays = new long[NUM_ROWS][];
float[][] floatArrays = new float[NUM_ROWS][];
double[][] doubleArrays = new double[NUM_ROWS][];
String[][] stringArrays = new String[NUM_ROWS][];
for (int rowId = 0; rowId < NUM_ROWS; rowId++) {
dataTableBuilder.startRow();
for (int colId = 0; colId < numColumns; colId++) {
switch(columnTypes[colId]) {
case BOOLEAN:
booleans[rowId] = RANDOM.nextBoolean();
dataTableBuilder.setColumn(colId, booleans[rowId]);
break;
case BYTE:
bytes[rowId] = (byte) RANDOM.nextInt();
dataTableBuilder.setColumn(colId, bytes[rowId]);
break;
case CHAR:
chars[rowId] = (char) RANDOM.nextInt();
dataTableBuilder.setColumn(colId, chars[rowId]);
break;
case SHORT:
shorts[rowId] = (short) RANDOM.nextInt();
dataTableBuilder.setColumn(colId, shorts[rowId]);
break;
case INT:
ints[rowId] = RANDOM.nextInt();
dataTableBuilder.setColumn(colId, ints[rowId]);
break;
case LONG:
longs[rowId] = RANDOM.nextLong();
dataTableBuilder.setColumn(colId, longs[rowId]);
break;
case FLOAT:
floats[rowId] = RANDOM.nextFloat();
dataTableBuilder.setColumn(colId, floats[rowId]);
break;
case DOUBLE:
doubles[rowId] = RANDOM.nextDouble();
dataTableBuilder.setColumn(colId, doubles[rowId]);
break;
case STRING:
strings[rowId] = RandomStringUtils.random(RANDOM.nextInt(20));
dataTableBuilder.setColumn(colId, strings[rowId]);
break;
// Just test Double here, all object types will be covered in ObjectCustomSerDeTest.
case OBJECT:
objects[rowId] = RANDOM.nextDouble();
dataTableBuilder.setColumn(colId, objects[rowId]);
break;
case BYTE_ARRAY:
int length = RANDOM.nextInt(20);
byte[] byteArray = new byte[length];
for (int i = 0; i < length; i++) {
byteArray[i] = (byte) RANDOM.nextInt();
}
byteArrays[rowId] = byteArray;
dataTableBuilder.setColumn(colId, byteArray);
break;
case CHAR_ARRAY:
length = RANDOM.nextInt(20);
char[] charArray = new char[length];
for (int i = 0; i < length; i++) {
charArray[i] = (char) RANDOM.nextInt();
}
charArrays[rowId] = charArray;
dataTableBuilder.setColumn(colId, charArray);
break;
case SHORT_ARRAY:
length = RANDOM.nextInt(20);
short[] shortArray = new short[length];
for (int i = 0; i < length; i++) {
shortArray[i] = (short) RANDOM.nextInt();
}
shortArrays[rowId] = shortArray;
dataTableBuilder.setColumn(colId, shortArray);
break;
case INT_ARRAY:
length = RANDOM.nextInt(20);
int[] intArray = new int[length];
for (int i = 0; i < length; i++) {
intArray[i] = RANDOM.nextInt();
}
intArrays[rowId] = intArray;
dataTableBuilder.setColumn(colId, intArray);
break;
case LONG_ARRAY:
length = RANDOM.nextInt(20);
long[] longArray = new long[length];
for (int i = 0; i < length; i++) {
longArray[i] = RANDOM.nextLong();
}
longArrays[rowId] = longArray;
dataTableBuilder.setColumn(colId, longArray);
break;
case FLOAT_ARRAY:
length = RANDOM.nextInt(20);
float[] floatArray = new float[length];
for (int i = 0; i < length; i++) {
floatArray[i] = RANDOM.nextFloat();
}
floatArrays[rowId] = floatArray;
dataTableBuilder.setColumn(colId, floatArray);
break;
case DOUBLE_ARRAY:
length = RANDOM.nextInt(20);
double[] doubleArray = new double[length];
for (int i = 0; i < length; i++) {
doubleArray[i] = RANDOM.nextDouble();
}
doubleArrays[rowId] = doubleArray;
dataTableBuilder.setColumn(colId, doubleArray);
break;
case STRING_ARRAY:
length = RANDOM.nextInt(20);
String[] stringArray = new String[length];
for (int i = 0; i < length; i++) {
stringArray[i] = RandomStringUtils.random(RANDOM.nextInt(20));
}
stringArrays[rowId] = stringArray;
dataTableBuilder.setColumn(colId, stringArray);
break;
}
}
dataTableBuilder.finishRow();
}
DataTable dataTable = dataTableBuilder.build();
DataTable newDataTable = DataTableFactory.getDataTable(dataTable.toBytes());
Assert.assertEquals(newDataTable.getDataSchema(), dataSchema, ERROR_MESSAGE);
Assert.assertEquals(newDataTable.getNumberOfRows(), NUM_ROWS, ERROR_MESSAGE);
for (int rowId = 0; rowId < NUM_ROWS; rowId++) {
for (int colId = 0; colId < numColumns; colId++) {
switch(columnTypes[colId]) {
case BOOLEAN:
Assert.assertEquals(newDataTable.getBoolean(rowId, colId), booleans[rowId], ERROR_MESSAGE);
break;
case BYTE:
Assert.assertEquals(newDataTable.getByte(rowId, colId), bytes[rowId], ERROR_MESSAGE);
break;
case CHAR:
Assert.assertEquals(newDataTable.getChar(rowId, colId), chars[rowId], ERROR_MESSAGE);
break;
case SHORT:
Assert.assertEquals(newDataTable.getShort(rowId, colId), shorts[rowId], ERROR_MESSAGE);
break;
case INT:
Assert.assertEquals(newDataTable.getInt(rowId, colId), ints[rowId], ERROR_MESSAGE);
break;
case LONG:
Assert.assertEquals(newDataTable.getLong(rowId, colId), longs[rowId], ERROR_MESSAGE);
break;
case FLOAT:
Assert.assertEquals(newDataTable.getFloat(rowId, colId), floats[rowId], ERROR_MESSAGE);
break;
case DOUBLE:
Assert.assertEquals(newDataTable.getDouble(rowId, colId), doubles[rowId], ERROR_MESSAGE);
break;
case STRING:
Assert.assertEquals(newDataTable.getString(rowId, colId), strings[rowId], ERROR_MESSAGE);
break;
case OBJECT:
Assert.assertEquals(newDataTable.getObject(rowId, colId), objects[rowId], ERROR_MESSAGE);
break;
case BYTE_ARRAY:
Assert.assertTrue(Arrays.equals(newDataTable.getByteArray(rowId, colId), byteArrays[rowId]), ERROR_MESSAGE);
break;
case CHAR_ARRAY:
Assert.assertTrue(Arrays.equals(newDataTable.getCharArray(rowId, colId), charArrays[rowId]), ERROR_MESSAGE);
break;
case SHORT_ARRAY:
Assert.assertTrue(Arrays.equals(newDataTable.getShortArray(rowId, colId), shortArrays[rowId]), ERROR_MESSAGE);
break;
case INT_ARRAY:
Assert.assertTrue(Arrays.equals(newDataTable.getIntArray(rowId, colId), intArrays[rowId]), ERROR_MESSAGE);
break;
case LONG_ARRAY:
Assert.assertTrue(Arrays.equals(newDataTable.getLongArray(rowId, colId), longArrays[rowId]), ERROR_MESSAGE);
break;
case FLOAT_ARRAY:
Assert.assertTrue(Arrays.equals(newDataTable.getFloatArray(rowId, colId), floatArrays[rowId]), ERROR_MESSAGE);
break;
case DOUBLE_ARRAY:
Assert.assertTrue(Arrays.equals(newDataTable.getDoubleArray(rowId, colId), doubleArrays[rowId]), ERROR_MESSAGE);
break;
case STRING_ARRAY:
Assert.assertTrue(Arrays.equals(newDataTable.getStringArray(rowId, colId), stringArrays[rowId]), ERROR_MESSAGE);
break;
}
}
}
}
use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.
the class GenerateDataCommand method execute.
@Override
public boolean execute() throws Exception {
LOGGER.info("Executing command: " + toString());
if ((_numRecords < 0) || (_numFiles < 0)) {
throw new RuntimeException("Cannot generate negative number of records/files.");
}
Schema schema = Schema.fromFile(new File(_schemaFile));
List<String> columns = new LinkedList<String>();
final HashMap<String, DataType> dataTypes = new HashMap<String, DataType>();
final HashMap<String, FieldType> fieldTypes = new HashMap<String, FieldType>();
final HashMap<String, TimeUnit> timeUnits = new HashMap<String, TimeUnit>();
final HashMap<String, Integer> cardinality = new HashMap<String, Integer>();
final HashMap<String, IntRange> range = new HashMap<String, IntRange>();
buildCardinalityRangeMaps(_schemaAnnFile, cardinality, range);
final DataGeneratorSpec spec = buildDataGeneratorSpec(schema, columns, dataTypes, fieldTypes, timeUnits, cardinality, range);
final DataGenerator gen = new DataGenerator();
gen.init(spec);
gen.generate(_numRecords, _numFiles);
return true;
}
Aggregations