use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.
the class AvroUtils method getPinotSchemaFromAvroSchema.
/**
* Given an avro schema object along with column field types and time unit, return the equivalent
* pinot schema object.
*
* @param avroSchema Avro schema for which to get the Pinot schema.
* @param fieldTypes Map containing fieldTypes for each column.
* @param timeUnit Time unit to be used for the time column.
* @return Return the equivalent pinot schema for the given avro schema.
*/
private static Schema getPinotSchemaFromAvroSchema(org.apache.avro.Schema avroSchema, Map<String, FieldSpec.FieldType> fieldTypes, TimeUnit timeUnit) {
Schema pinotSchema = new Schema();
for (final Field field : avroSchema.getFields()) {
String fieldName = field.name();
FieldSpec.DataType dataType;
try {
dataType = AvroRecordReader.getColumnType(field);
} catch (UnsupportedOperationException e) {
LOGGER.warn("Unsupported field type for field {} schema {}, using String instead.", fieldName, field.schema());
dataType = FieldSpec.DataType.STRING;
}
FieldSpec.FieldType fieldType = fieldTypes.get(fieldName);
boolean isSingleValueField = AvroRecordReader.isSingleValueField(field);
switch(fieldType) {
case DIMENSION:
pinotSchema.addField(new DimensionFieldSpec(fieldName, dataType, isSingleValueField));
break;
case METRIC:
Preconditions.checkState(isSingleValueField, "Unsupported multi-value for metric field.");
pinotSchema.addField(new MetricFieldSpec(fieldName, dataType));
break;
case TIME:
Preconditions.checkState(isSingleValueField, "Unsupported multi-value for time field.");
pinotSchema.addField(new TimeFieldSpec(field.name(), dataType, timeUnit));
break;
default:
throw new UnsupportedOperationException("Unsupported field type: " + fieldType + " for field: " + fieldName);
}
}
return pinotSchema;
}
use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.
the class KafkaJSONMessageDecoder method decode.
@Override
public GenericRow decode(byte[] payload, GenericRow destination) {
try {
String text = new String(payload, "UTF-8");
JSONObject message = new JSONObject(text);
for (FieldSpec dimensionSpec : schema.getDimensionFieldSpecs()) {
if (message.has(dimensionSpec.getName())) {
Object entry;
if (dimensionSpec.isSingleValueField()) {
entry = stringToDataType(dimensionSpec, message.getString(dimensionSpec.getName()));
} else {
JSONArray jsonArray = message.getJSONArray(dimensionSpec.getName());
Object[] array = new Object[jsonArray.length()];
for (int i = 0; i < array.length; i++) {
array[i] = stringToDataType(dimensionSpec, jsonArray.getString(i));
}
if (array.length == 0) {
entry = new Object[] { AvroRecordReader.getDefaultNullValue(dimensionSpec) };
} else {
entry = array;
}
}
destination.putField(dimensionSpec.getName(), entry);
} else {
Object entry = AvroRecordReader.getDefaultNullValue(dimensionSpec);
destination.putField(dimensionSpec.getName(), entry);
}
}
for (FieldSpec metricSpec : schema.getMetricFieldSpecs()) {
if (message.has(metricSpec.getName())) {
Object entry = stringToDataType(metricSpec, message.getString(metricSpec.getName()));
destination.putField(metricSpec.getName(), entry);
} else {
Object entry = AvroRecordReader.getDefaultNullValue(metricSpec);
destination.putField(metricSpec.getName(), entry);
}
}
TimeFieldSpec timeSpec = schema.getTimeFieldSpec();
if (message.has(timeSpec.getName())) {
Object entry = stringToDataType(timeSpec, message.getString(timeSpec.getName()));
destination.putField(timeSpec.getName(), entry);
} else {
Object entry = AvroRecordReader.getDefaultNullValue(timeSpec);
destination.putField(timeSpec.getName(), entry);
}
return destination;
} catch (Exception e) {
LOGGER.error("error decoding , ", e);
}
return null;
}
use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.
the class OffHeapStarTreeBuilder method init.
public void init(StarTreeBuilderConfig builderConfig) throws Exception {
schema = builderConfig.schema;
timeColumnName = schema.getTimeColumnName();
this.dimensionsSplitOrder = builderConfig.dimensionsSplitOrder;
skipStarNodeCreationForDimensions = builderConfig.getSkipStarNodeCreationForDimensions();
skipMaterializationForDimensions = builderConfig.getSkipMaterializationForDimensions();
skipMaterializationCardinalityThreshold = builderConfig.getSkipMaterializationCardinalityThreshold();
enableOffHeapFormat = builderConfig.isEnableOffHealpFormat();
this.maxLeafRecords = builderConfig.maxLeafRecords;
this.outDir = builderConfig.getOutDir();
if (outDir == null) {
outDir = new File(System.getProperty("java.io.tmpdir"), V1Constants.STAR_TREE_INDEX_DIR + "_" + DateTime.now());
}
LOG.info("Index output directory:{}", outDir);
dimensionTypes = new ArrayList<>();
dimensionNames = new ArrayList<>();
dimensionNameToIndexMap = HashBiMap.create();
dimensionNameToStarValueMap = new HashMap<>();
dictionaryMap = new HashMap<>();
// READ DIMENSIONS COLUMNS
List<DimensionFieldSpec> dimensionFieldSpecs = schema.getDimensionFieldSpecs();
for (int index = 0; index < dimensionFieldSpecs.size(); index++) {
DimensionFieldSpec spec = dimensionFieldSpecs.get(index);
String dimensionName = spec.getName();
dimensionNames.add(dimensionName);
dimensionNameToIndexMap.put(dimensionName, index);
Object starValue;
starValue = getAllStarValue(spec);
dimensionNameToStarValueMap.put(dimensionName, starValue);
dimensionTypes.add(spec.getDataType());
HashBiMap<Object, Integer> dictionary = HashBiMap.create();
dictionaryMap.put(dimensionName, dictionary);
}
// this dimension unless explicitly specified in split order
if (timeColumnName != null) {
dimensionNames.add(timeColumnName);
TimeFieldSpec timeFieldSpec = schema.getTimeFieldSpec();
dimensionTypes.add(timeFieldSpec.getDataType());
int index = dimensionNameToIndexMap.size();
dimensionNameToIndexMap.put(timeColumnName, index);
Object starValue;
starValue = getAllStarValue(timeFieldSpec);
dimensionNameToStarValueMap.put(timeColumnName, starValue);
HashBiMap<Object, Integer> dictionary = HashBiMap.create();
dictionaryMap.put(schema.getTimeColumnName(), dictionary);
}
dimensionSizeBytes = dimensionNames.size() * Integer.SIZE / 8;
this.numDimensions = dimensionNames.size();
// READ METRIC COLUMNS
this.metricNames = new ArrayList<>();
this.metricNameToIndexMap = new HashMap<>();
this.metricSizeBytes = 0;
List<MetricFieldSpec> metricFieldSpecs = schema.getMetricFieldSpecs();
for (int index = 0; index < metricFieldSpecs.size(); index++) {
MetricFieldSpec spec = metricFieldSpecs.get(index);
String metricName = spec.getName();
metricNames.add(metricName);
metricNameToIndexMap.put(metricName, index);
metricSizeBytes += spec.getFieldSize();
}
numMetrics = metricNames.size();
builderConfig.getOutDir().mkdirs();
dataFile = new File(outDir, "star-tree.buf");
LOG.info("StarTree output data file: {}", dataFile.getAbsolutePath());
dataBuffer = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(dataFile)));
// INITIALIZE THE ROOT NODE
this.starTreeRootIndexNode = new StarTreeIndexNode();
this.starTreeRootIndexNode.setDimensionName(StarTreeIndexNodeInterf.ALL);
this.starTreeRootIndexNode.setDimensionValue(StarTreeIndexNodeInterf.ALL);
this.starTreeRootIndexNode.setLevel(0);
LOG.info("dimensionNames:{}", dimensionNames);
LOG.info("metricNames:{}", metricNames);
}
use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.
the class PinotSegmentRecordReaderTest method createPinotSchema.
private Schema createPinotSchema() {
Schema testSchema = new Schema();
testSchema.setSchemaName("schema");
FieldSpec spec;
spec = new DimensionFieldSpec(D_SV_1, DataType.STRING, true);
testSchema.addField(spec);
spec = new DimensionFieldSpec(D_MV_1, DataType.STRING, false);
testSchema.addField(spec);
spec = new MetricFieldSpec(M1, DataType.INT);
testSchema.addField(spec);
spec = new MetricFieldSpec(M2, DataType.FLOAT);
testSchema.addField(spec);
spec = new TimeFieldSpec(new TimeGranularitySpec(DataType.LONG, TimeUnit.HOURS, TIME));
testSchema.addField(spec);
return testSchema;
}
use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.
the class ThirdeyePinotSchemaUtils method createSchema.
/**
* Transforms the thirdeyeConfig to pinot schema
* Adds default __COUNT metric if not already present
* Adds additional columns for all dimensions which
* are wither specified as topk or whitelist
* and hence have a transformed new column_raw
* @param thirdeyeConfig
* @return
*/
public static Schema createSchema(ThirdEyeConfig thirdeyeConfig) {
Schema schema = new Schema();
Set<String> transformDimensions = thirdeyeConfig.getTransformDimensions();
for (DimensionSpec dimensionSpec : thirdeyeConfig.getDimensions()) {
FieldSpec fieldSpec = new DimensionFieldSpec();
String dimensionName = dimensionSpec.getName();
fieldSpec.setName(dimensionName);
fieldSpec.setDataType(DataType.STRING);
fieldSpec.setSingleValueField(true);
schema.addField(dimensionName, fieldSpec);
if (transformDimensions.contains(dimensionName)) {
fieldSpec = new DimensionFieldSpec();
dimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
fieldSpec.setName(dimensionName);
fieldSpec.setDataType(DataType.STRING);
fieldSpec.setSingleValueField(true);
schema.addField(dimensionName, fieldSpec);
}
}
boolean countIncluded = false;
for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
FieldSpec fieldSpec = new MetricFieldSpec();
String metricName = metricSpec.getName();
if (metricName.equals(ThirdEyeConstants.AUTO_METRIC_COUNT)) {
countIncluded = true;
}
fieldSpec.setName(metricName);
fieldSpec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
fieldSpec.setSingleValueField(true);
schema.addField(metricName, fieldSpec);
}
if (!countIncluded) {
FieldSpec fieldSpec = new MetricFieldSpec();
String metricName = ThirdEyeConstants.AUTO_METRIC_COUNT;
fieldSpec.setName(metricName);
fieldSpec.setDataType(DataType.LONG);
fieldSpec.setDefaultNullValue(1);
schema.addField(metricName, fieldSpec);
}
TimeGranularitySpec incoming = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
TimeGranularitySpec outgoing = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
schema.addField(thirdeyeConfig.getTime().getColumnName(), new TimeFieldSpec(incoming, outgoing));
schema.setSchemaName(thirdeyeConfig.getCollection());
return schema;
}
Aggregations