use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.
the class RealtimeFileBasedReaderTest method testDataSourceWithoutPredicateForMultiValueDimensionColumns.
private void testDataSourceWithoutPredicateForMultiValueDimensionColumns() {
for (FieldSpec spec : schema.getAllFieldSpecs()) {
if (!spec.isSingleValueField()) {
DataSource offlineDS = offlineSegment.getDataSource(spec.getName());
DataSource realtimeDS = realtimeSegment.getDataSource(spec.getName());
Block offlineBlock = offlineDS.nextBlock();
Block realtimeBlock = realtimeDS.nextBlock();
BlockMetadata offlineMetadata = offlineBlock.getMetadata();
BlockMetadata realtimeMetadata = realtimeBlock.getMetadata();
BlockMultiValIterator offlineValIterator = (BlockMultiValIterator) offlineBlock.getBlockValueSet().iterator();
BlockMultiValIterator realtimeValIterator = (BlockMultiValIterator) realtimeBlock.getBlockValueSet().iterator();
Assert.assertEquals(offlineSegment.getSegmentMetadata().getTotalDocs(), realtimeSegment.getAggregateDocumentCount());
while (realtimeValIterator.hasNext()) {
int[] offlineIds = new int[offlineBlock.getMetadata().getMaxNumberOfMultiValues()];
int[] realtimeIds = new int[realtimeBlock.getMetadata().getMaxNumberOfMultiValues()];
int Olen = offlineValIterator.nextIntVal(offlineIds);
int Rlen = realtimeValIterator.nextIntVal(realtimeIds);
Assert.assertEquals(Olen, Rlen);
for (int i = 0; i < Olen; i++) {
Assert.assertEquals(offlineMetadata.getDictionary().get(offlineIds[i]), realtimeMetadata.getDictionary().get(realtimeIds[i]));
}
}
}
}
}
use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.
the class ThirdeyeAvroUtilsTest method testConstructAvroSchemaFromPinotSchema.
@Test
public void testConstructAvroSchemaFromPinotSchema() throws Exception {
com.linkedin.pinot.common.data.Schema pinotSchema = new com.linkedin.pinot.common.data.Schema();
pinotSchema.setSchemaName("test");
FieldSpec spec = new DimensionFieldSpec("d1", DataType.STRING, true);
pinotSchema.addField("d1", spec);
spec = new MetricFieldSpec("m1", DataType.DOUBLE);
pinotSchema.addField("m1", spec);
spec = new TimeFieldSpec(new TimeGranularitySpec(DataType.LONG, TimeUnit.HOURS, "t"));
pinotSchema.addField("t", spec);
Schema avroSchema = ThirdeyeAvroUtils.constructAvroSchemaFromPinotSchema(pinotSchema);
String dType = ThirdeyeAvroUtils.getDataTypeForField("d1", avroSchema);
Assert.assertEquals(dType, "STRING", "Avro schema constructed incorrectly");
dType = ThirdeyeAvroUtils.getDataTypeForField("m1", avroSchema);
Assert.assertEquals(dType, "DOUBLE", "Avro schema constructed incorrectly");
dType = ThirdeyeAvroUtils.getDataTypeForField("t", avroSchema);
Assert.assertEquals(dType, "LONG", "Avro schema constructed incorrectly");
}
use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.
the class ThirdeyeAvroUtils method constructAvroSchemaFromPinotSchema.
/**
* Constructs an avro schema from a pinot schema
* @param schema
* @return
*/
public static Schema constructAvroSchemaFromPinotSchema(com.linkedin.pinot.common.data.Schema schema) {
Schema avroSchema = null;
RecordBuilder<Schema> recordBuilder = SchemaBuilder.record("record");
FieldAssembler<Schema> fieldAssembler = recordBuilder.fields();
for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
String fieldName = fieldSpec.getName();
DataType dataType = fieldSpec.getDataType();
BaseFieldTypeBuilder<Schema> baseFieldTypeBuilder = fieldAssembler.name(fieldName).type().nullable();
switch(dataType) {
case BOOLEAN:
fieldAssembler = baseFieldTypeBuilder.booleanType().noDefault();
break;
case DOUBLE:
fieldAssembler = baseFieldTypeBuilder.doubleType().noDefault();
break;
case FLOAT:
fieldAssembler = baseFieldTypeBuilder.floatType().noDefault();
break;
case INT:
fieldAssembler = baseFieldTypeBuilder.intType().noDefault();
break;
case LONG:
fieldAssembler = baseFieldTypeBuilder.longType().noDefault();
break;
case STRING:
fieldAssembler = baseFieldTypeBuilder.stringType().noDefault();
break;
default:
break;
}
}
avroSchema = fieldAssembler.endRecord();
LOGGER.info("Avro Schema {}", avroSchema.toString(true));
return avroSchema;
}
use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.
the class ThirdeyePinotSchemaUtils method createSchema.
/**
* Transforms the thirdeyeConfig to pinot schema
* Adds default __COUNT metric if not already present
* Adds additional columns for all dimensions which
* are wither specified as topk or whitelist
* and hence have a transformed new column_raw
* @param thirdeyeConfig
* @return
*/
public static Schema createSchema(ThirdEyeConfig thirdeyeConfig) {
Schema schema = new Schema();
Set<String> transformDimensions = thirdeyeConfig.getTransformDimensions();
for (DimensionSpec dimensionSpec : thirdeyeConfig.getDimensions()) {
FieldSpec fieldSpec = new DimensionFieldSpec();
String dimensionName = dimensionSpec.getName();
fieldSpec.setName(dimensionName);
fieldSpec.setDataType(DataType.STRING);
fieldSpec.setSingleValueField(true);
schema.addField(dimensionName, fieldSpec);
if (transformDimensions.contains(dimensionName)) {
fieldSpec = new DimensionFieldSpec();
dimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
fieldSpec.setName(dimensionName);
fieldSpec.setDataType(DataType.STRING);
fieldSpec.setSingleValueField(true);
schema.addField(dimensionName, fieldSpec);
}
}
boolean countIncluded = false;
for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
FieldSpec fieldSpec = new MetricFieldSpec();
String metricName = metricSpec.getName();
if (metricName.equals(ThirdEyeConstants.AUTO_METRIC_COUNT)) {
countIncluded = true;
}
fieldSpec.setName(metricName);
fieldSpec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
fieldSpec.setSingleValueField(true);
schema.addField(metricName, fieldSpec);
}
if (!countIncluded) {
FieldSpec fieldSpec = new MetricFieldSpec();
String metricName = ThirdEyeConstants.AUTO_METRIC_COUNT;
fieldSpec.setName(metricName);
fieldSpec.setDataType(DataType.LONG);
fieldSpec.setDefaultNullValue(1);
schema.addField(metricName, fieldSpec);
}
TimeGranularitySpec incoming = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
TimeGranularitySpec outgoing = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
schema.addField(thirdeyeConfig.getTime().getColumnName(), new TimeFieldSpec(incoming, outgoing));
schema.setSchemaName(thirdeyeConfig.getCollection());
return schema;
}
use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.
the class RealtimeTableDataManager method isValid.
/**
* Validate a schema against the table config for real-time record consumption.
* Ideally, we should validate these things when schema is added or table is created, but either of these
* may be changed while the table is already provisioned. For the change to take effect, we need to restart the
* servers, so validation at this place is fine.
*
* As of now, the following validations are done:
* 1. Make sure that the sorted column, if specified, is not multi-valued.
* 2. Validate the schema itself
*
* We allow the user to specify multiple sorted columns, but only consider the first one for now.
* (secondary sort is not yet implemented).
*
* If we add more validations, it may make sense to split this method into multiple validation methods.
* But then, we are trying to figure out all the invalid cases before we return from this method...
*
* @param schema
* @param indexingConfig
* @return true if schema is valid.
*/
private boolean isValid(Schema schema, IndexingConfig indexingConfig) {
// 1. Make sure that the sorted column is not a multi-value field.
List<String> sortedColumns = indexingConfig.getSortedColumn();
boolean isValid = true;
if (!sortedColumns.isEmpty()) {
final String sortedColumn = sortedColumns.get(0);
if (sortedColumns.size() > 1) {
LOGGER.warn("More than one sorted column configured. Using {}", sortedColumn);
}
FieldSpec fieldSpec = schema.getFieldSpecFor(sortedColumn);
if (!fieldSpec.isSingleValueField()) {
LOGGER.error("Cannot configure multi-valued column {} as sorted column", sortedColumn);
isValid = false;
}
}
// 2. We want to get the schema errors, if any, even if isValid is false;
if (!schema.validate(LOGGER)) {
isValid = false;
}
return isValid;
}
Aggregations