use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.
the class AvroUtils method getPinotSchemaFromAvroSchema.
/**
* Given an avro schema object along with column field types and time unit, return the equivalent
* pinot schema object.
*
* @param avroSchema Avro schema for which to get the Pinot schema.
* @param fieldTypes Map containing fieldTypes for each column.
* @param timeUnit Time unit to be used for the time column.
* @return Return the equivalent pinot schema for the given avro schema.
*/
private static Schema getPinotSchemaFromAvroSchema(org.apache.avro.Schema avroSchema, Map<String, FieldSpec.FieldType> fieldTypes, TimeUnit timeUnit) {
Schema pinotSchema = new Schema();
for (final Field field : avroSchema.getFields()) {
String fieldName = field.name();
FieldSpec.DataType dataType;
try {
dataType = AvroRecordReader.getColumnType(field);
} catch (UnsupportedOperationException e) {
LOGGER.warn("Unsupported field type for field {} schema {}, using String instead.", fieldName, field.schema());
dataType = FieldSpec.DataType.STRING;
}
FieldSpec.FieldType fieldType = fieldTypes.get(fieldName);
boolean isSingleValueField = AvroRecordReader.isSingleValueField(field);
switch(fieldType) {
case DIMENSION:
pinotSchema.addField(new DimensionFieldSpec(fieldName, dataType, isSingleValueField));
break;
case METRIC:
Preconditions.checkState(isSingleValueField, "Unsupported multi-value for metric field.");
pinotSchema.addField(new MetricFieldSpec(fieldName, dataType));
break;
case TIME:
Preconditions.checkState(isSingleValueField, "Unsupported multi-value for time field.");
pinotSchema.addField(new TimeFieldSpec(field.name(), dataType, timeUnit));
break;
default:
throw new UnsupportedOperationException("Unsupported field type: " + fieldType + " for field: " + fieldName);
}
}
return pinotSchema;
}
use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.
the class OffHeapStarTreeBuilder method init.
public void init(StarTreeBuilderConfig builderConfig) throws Exception {
schema = builderConfig.schema;
timeColumnName = schema.getTimeColumnName();
this.dimensionsSplitOrder = builderConfig.dimensionsSplitOrder;
skipStarNodeCreationForDimensions = builderConfig.getSkipStarNodeCreationForDimensions();
skipMaterializationForDimensions = builderConfig.getSkipMaterializationForDimensions();
skipMaterializationCardinalityThreshold = builderConfig.getSkipMaterializationCardinalityThreshold();
enableOffHeapFormat = builderConfig.isEnableOffHealpFormat();
this.maxLeafRecords = builderConfig.maxLeafRecords;
this.outDir = builderConfig.getOutDir();
if (outDir == null) {
outDir = new File(System.getProperty("java.io.tmpdir"), V1Constants.STAR_TREE_INDEX_DIR + "_" + DateTime.now());
}
LOG.info("Index output directory:{}", outDir);
dimensionTypes = new ArrayList<>();
dimensionNames = new ArrayList<>();
dimensionNameToIndexMap = HashBiMap.create();
dimensionNameToStarValueMap = new HashMap<>();
dictionaryMap = new HashMap<>();
// READ DIMENSIONS COLUMNS
List<DimensionFieldSpec> dimensionFieldSpecs = schema.getDimensionFieldSpecs();
for (int index = 0; index < dimensionFieldSpecs.size(); index++) {
DimensionFieldSpec spec = dimensionFieldSpecs.get(index);
String dimensionName = spec.getName();
dimensionNames.add(dimensionName);
dimensionNameToIndexMap.put(dimensionName, index);
Object starValue;
starValue = getAllStarValue(spec);
dimensionNameToStarValueMap.put(dimensionName, starValue);
dimensionTypes.add(spec.getDataType());
HashBiMap<Object, Integer> dictionary = HashBiMap.create();
dictionaryMap.put(dimensionName, dictionary);
}
// this dimension unless explicitly specified in split order
if (timeColumnName != null) {
dimensionNames.add(timeColumnName);
TimeFieldSpec timeFieldSpec = schema.getTimeFieldSpec();
dimensionTypes.add(timeFieldSpec.getDataType());
int index = dimensionNameToIndexMap.size();
dimensionNameToIndexMap.put(timeColumnName, index);
Object starValue;
starValue = getAllStarValue(timeFieldSpec);
dimensionNameToStarValueMap.put(timeColumnName, starValue);
HashBiMap<Object, Integer> dictionary = HashBiMap.create();
dictionaryMap.put(schema.getTimeColumnName(), dictionary);
}
dimensionSizeBytes = dimensionNames.size() * Integer.SIZE / 8;
this.numDimensions = dimensionNames.size();
// READ METRIC COLUMNS
this.metricNames = new ArrayList<>();
this.metricNameToIndexMap = new HashMap<>();
this.metricSizeBytes = 0;
List<MetricFieldSpec> metricFieldSpecs = schema.getMetricFieldSpecs();
for (int index = 0; index < metricFieldSpecs.size(); index++) {
MetricFieldSpec spec = metricFieldSpecs.get(index);
String metricName = spec.getName();
metricNames.add(metricName);
metricNameToIndexMap.put(metricName, index);
metricSizeBytes += spec.getFieldSize();
}
numMetrics = metricNames.size();
builderConfig.getOutDir().mkdirs();
dataFile = new File(outDir, "star-tree.buf");
LOG.info("StarTree output data file: {}", dataFile.getAbsolutePath());
dataBuffer = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(dataFile)));
// INITIALIZE THE ROOT NODE
this.starTreeRootIndexNode = new StarTreeIndexNode();
this.starTreeRootIndexNode.setDimensionName(StarTreeIndexNodeInterf.ALL);
this.starTreeRootIndexNode.setDimensionValue(StarTreeIndexNodeInterf.ALL);
this.starTreeRootIndexNode.setLevel(0);
LOG.info("dimensionNames:{}", dimensionNames);
LOG.info("metricNames:{}", metricNames);
}
use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.
the class SegmentTestUtils method extractSchemaFromAvroWithoutTime.
public static Schema extractSchemaFromAvroWithoutTime(File avroFile) throws FileNotFoundException, IOException {
DataFileStream<GenericRecord> dataStream = new DataFileStream<GenericRecord>(new FileInputStream(avroFile), new GenericDatumReader<GenericRecord>());
Schema schema = new Schema();
for (final Field field : dataStream.getSchema().getFields()) {
try {
getColumnType(field);
} catch (Exception e) {
LOGGER.warn("Caught exception while converting Avro field {} of type {}, field will not be in schema.", field.name(), field.schema().getType());
continue;
}
final String columnName = field.name();
final String pinotType = field.getProp("pinotType");
final FieldSpec fieldSpec;
if (pinotType != null && "METRIC".equals(pinotType)) {
fieldSpec = new MetricFieldSpec();
} else {
fieldSpec = new DimensionFieldSpec();
}
fieldSpec.setName(columnName);
fieldSpec.setDataType(getColumnType(dataStream.getSchema().getField(columnName)));
fieldSpec.setSingleValueField(isSingleValueField(dataStream.getSchema().getField(columnName)));
schema.addField(fieldSpec);
}
dataStream.close();
return schema;
}
use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.
the class DataGenerator method buildSpec.
private FieldSpec buildSpec(DataGeneratorSpec genSpec, String column) {
DataType dataType = genSpec.getDataTypesMap().get(column);
FieldType fieldType = genSpec.getFieldTypesMap().get(column);
FieldSpec spec;
switch(fieldType) {
case DIMENSION:
spec = new DimensionFieldSpec();
break;
case METRIC:
spec = new MetricFieldSpec();
break;
case TIME:
spec = new TimeFieldSpec(column, dataType, genSpec.getTimeUnitMap().get(column));
break;
default:
throw new RuntimeException("Invalid Field type.");
}
spec.setName(column);
spec.setDataType(dataType);
spec.setSingleValueField(true);
return spec;
}
use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.
the class AutoLoadPinotMetricsService method addNewDataset.
/**
* Adds a new dataset to the thirdeye database
* @param dataset
* @param schema
*/
private void addNewDataset(String dataset, Schema schema) throws Exception {
List<MetricFieldSpec> metricSpecs = schema.getMetricFieldSpecs();
// Create DatasetConfig
DatasetConfigDTO datasetConfigDTO = ConfigGenerator.generateDatasetConfig(dataset, schema);
LOG.info("Creating dataset for {}", dataset);
DAO_REGISTRY.getDatasetConfigDAO().save(datasetConfigDTO);
// Create MetricConfig
for (MetricFieldSpec metricFieldSpec : metricSpecs) {
MetricConfigDTO metricConfigDTO = ConfigGenerator.generateMetricConfig(metricFieldSpec, dataset);
LOG.info("Creating metric {} for {}", metricConfigDTO.getName(), dataset);
DAO_REGISTRY.getMetricConfigDAO().save(metricConfigDTO);
}
// Create Default DashboardConfig
List<Long> metricIds = ConfigGenerator.getMetricIdsFromMetricConfigs(DAO_REGISTRY.getMetricConfigDAO().findByDataset(dataset));
DashboardConfigDTO dashboardConfigDTO = ConfigGenerator.generateDefaultDashboardConfig(dataset, metricIds);
LOG.info("Creating default dashboard for dataset {}", dataset);
DAO_REGISTRY.getDashboardConfigDAO().save(dashboardConfigDTO);
}
Aggregations