use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.
the class SegmentWithHllIndexCreateHelper method printSchema.
private static void printSchema(Schema schema) {
LOGGER.info("schemaName: {}", schema.getSchemaName());
LOGGER.info("Dimension columnNames: ");
int i = 0;
for (DimensionFieldSpec spec : schema.getDimensionFieldSpecs()) {
String columnInfo = i + " " + spec.getName();
if (!spec.isSingleValueField()) {
LOGGER.info(columnInfo + " Multi-Value.");
} else {
LOGGER.info(columnInfo);
}
i += 1;
}
LOGGER.info("Metric columnNames: ");
i = 0;
for (MetricFieldSpec spec : schema.getMetricFieldSpecs()) {
String columnInfo = i + " " + spec.getName();
if (!spec.isSingleValueField()) {
LOGGER.info(columnInfo + " Multi-Value.");
} else {
LOGGER.info(columnInfo);
}
i += 1;
}
LOGGER.info("Time column: {}", schema.getTimeColumnName());
}
use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.
the class ThirdeyeAvroUtilsTest method testConstructAvroSchemaFromPinotSchema.
@Test
public void testConstructAvroSchemaFromPinotSchema() throws Exception {
com.linkedin.pinot.common.data.Schema pinotSchema = new com.linkedin.pinot.common.data.Schema();
pinotSchema.setSchemaName("test");
FieldSpec spec = new DimensionFieldSpec("d1", DataType.STRING, true);
pinotSchema.addField("d1", spec);
spec = new MetricFieldSpec("m1", DataType.DOUBLE);
pinotSchema.addField("m1", spec);
spec = new TimeFieldSpec(new TimeGranularitySpec(DataType.LONG, TimeUnit.HOURS, "t"));
pinotSchema.addField("t", spec);
Schema avroSchema = ThirdeyeAvroUtils.constructAvroSchemaFromPinotSchema(pinotSchema);
String dType = ThirdeyeAvroUtils.getDataTypeForField("d1", avroSchema);
Assert.assertEquals(dType, "STRING", "Avro schema constructed incorrectly");
dType = ThirdeyeAvroUtils.getDataTypeForField("m1", avroSchema);
Assert.assertEquals(dType, "DOUBLE", "Avro schema constructed incorrectly");
dType = ThirdeyeAvroUtils.getDataTypeForField("t", avroSchema);
Assert.assertEquals(dType, "LONG", "Avro schema constructed incorrectly");
}
use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.
the class ThirdeyePinotSchemaUtils method createSchema.
/**
* Transforms the thirdeyeConfig to pinot schema
* Adds default __COUNT metric if not already present
* Adds additional columns for all dimensions which
* are wither specified as topk or whitelist
* and hence have a transformed new column_raw
* @param thirdeyeConfig
* @return
*/
public static Schema createSchema(ThirdEyeConfig thirdeyeConfig) {
Schema schema = new Schema();
Set<String> transformDimensions = thirdeyeConfig.getTransformDimensions();
for (DimensionSpec dimensionSpec : thirdeyeConfig.getDimensions()) {
FieldSpec fieldSpec = new DimensionFieldSpec();
String dimensionName = dimensionSpec.getName();
fieldSpec.setName(dimensionName);
fieldSpec.setDataType(DataType.STRING);
fieldSpec.setSingleValueField(true);
schema.addField(dimensionName, fieldSpec);
if (transformDimensions.contains(dimensionName)) {
fieldSpec = new DimensionFieldSpec();
dimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
fieldSpec.setName(dimensionName);
fieldSpec.setDataType(DataType.STRING);
fieldSpec.setSingleValueField(true);
schema.addField(dimensionName, fieldSpec);
}
}
boolean countIncluded = false;
for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
FieldSpec fieldSpec = new MetricFieldSpec();
String metricName = metricSpec.getName();
if (metricName.equals(ThirdEyeConstants.AUTO_METRIC_COUNT)) {
countIncluded = true;
}
fieldSpec.setName(metricName);
fieldSpec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
fieldSpec.setSingleValueField(true);
schema.addField(metricName, fieldSpec);
}
if (!countIncluded) {
FieldSpec fieldSpec = new MetricFieldSpec();
String metricName = ThirdEyeConstants.AUTO_METRIC_COUNT;
fieldSpec.setName(metricName);
fieldSpec.setDataType(DataType.LONG);
fieldSpec.setDefaultNullValue(1);
schema.addField(metricName, fieldSpec);
}
TimeGranularitySpec incoming = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
TimeGranularitySpec outgoing = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
schema.addField(thirdeyeConfig.getTime().getColumnName(), new TimeFieldSpec(incoming, outgoing));
schema.setSchemaName(thirdeyeConfig.getCollection());
return schema;
}
use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.
the class PlainFieldExtractor method transform.
@Override
public GenericRow transform(GenericRow row, GenericRow destinationRow) {
boolean hasError = false;
boolean hasNull = false;
boolean hasConversion = false;
for (String column : _schema.getColumnNames()) {
FieldSpec fieldSpec = _schema.getFieldSpecFor(column);
// Ignore transform of DerivedMetric
if (fieldSpec instanceof MetricFieldSpec && ((MetricFieldSpec) fieldSpec).isDerivedMetric()) {
continue;
}
Object value;
// Fetch value for this column.
if (column.equals(_outgoingTimeColumnName) && _timeConverter != null) {
// Convert incoming time to outgoing time.
value = row.getValue(_incomingTimeColumnName);
if (value == null) {
hasNull = true;
_totalNullCols++;
} else {
try {
value = _timeConverter.convert(value);
} catch (Exception e) {
LOGGER.debug("Caught exception while converting incoming time value: {}", value, e);
value = null;
hasError = true;
_errorCount.put(column, _errorCount.get(column) + 1);
}
}
} else {
value = row.getValue(column);
if (value == null) {
hasNull = true;
_totalNullCols++;
}
}
// Convert value if necessary.
PinotDataType dest = _columnType.get(column);
PinotDataType source = null;
if (value != null) {
if (value instanceof Object[]) {
// Multi-value.
Object[] valueArray = (Object[]) value;
if (valueArray.length > 0) {
source = MULTI_VALUE_TYPE_MAP.get(valueArray[0].getClass());
if (source == null) {
source = PinotDataType.OBJECT_ARRAY;
}
} else {
LOGGER.debug("Got 0 length array.");
// Use default value for 0 length array.
value = null;
hasError = true;
_errorCount.put(column, _errorCount.get(column) + 1);
}
} else {
// Single-value.
source = SINGLE_VALUE_TYPE_MAP.get(value.getClass());
if (source == null) {
source = PinotDataType.OBJECT;
}
}
if (value != null && source != dest) {
Object before = value;
try {
value = dest.convert(before, source);
hasConversion = true;
} catch (Exception e) {
LOGGER.debug("Caught exception while converting value: {} from: {} to: {}", before, source, dest);
value = null;
hasError = true;
_errorCount.put(column, _errorCount.get(column) + 1);
}
}
// Allowing this can cause multiple values to map to the same padded value, breaking segment generation.
if (dest == PinotDataType.STRING) {
value = StringUtil.trimTrailingNulls((String) value);
}
}
// Assign default value for null value.
if (value == null) {
if (fieldSpec.isSingleValueField()) {
// Single-value field.
value = fieldSpec.getDefaultNullValue();
} else {
// Multi-value field.
value = new Object[] { fieldSpec.getDefaultNullValue() };
}
}
destinationRow.putField(column, value);
}
if (hasError) {
_totalErrors++;
}
if (hasNull) {
_totalNulls++;
}
if (hasConversion) {
_totalConversions++;
}
return destinationRow;
}
use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.
the class PinotSegmentRecordReader method getSchema.
@Override
public Schema getSchema() {
Schema schema = new Schema();
schema.setSchemaName(segmentMetadata.getName());
for (String column : columns) {
ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
String columnName = columnMetadata.getColumnName();
DataType dataType = columnMetadata.getDataType();
FieldType fieldType = columnMetadata.getFieldType();
FieldSpec fieldSpec = null;
switch(fieldType) {
case DIMENSION:
boolean isSingleValue = columnMetadata.isSingleValue();
fieldSpec = new DimensionFieldSpec(columnName, dataType, isSingleValue);
break;
case METRIC:
fieldSpec = new MetricFieldSpec(columnName, dataType);
break;
case TIME:
TimeUnit timeType = columnMetadata.getTimeUnit();
TimeGranularitySpec incomingGranularitySpec = new TimeGranularitySpec(dataType, timeType, columnName);
fieldSpec = new TimeFieldSpec(incomingGranularitySpec);
break;
default:
break;
}
schema.addField(fieldSpec);
}
return schema;
}
Aggregations