use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.
the class RawIndexBenchmark method buildSegment.
/**
* Helper method that builds a segment containing two columns both with data from input file.
* The first column has raw indices (no dictionary), where as the second column is dictionary encoded.
*
* @throws Exception
*/
private File buildSegment() throws Exception {
Schema schema = new Schema();
for (int i = 0; i < NUM_COLUMNS; i++) {
String column = "column_" + i;
DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(column, FieldSpec.DataType.STRING, true);
schema.addField(dimensionFieldSpec);
}
SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
config.setRawIndexCreationColumns(Collections.singletonList(_rawIndexColumn));
config.setOutDir(SEGMENT_DIR_NAME);
config.setSegmentName(SEGMENT_NAME);
BufferedReader reader = new BufferedReader(new FileReader(_dataFile));
String value;
final List<GenericRow> rows = new ArrayList<>();
System.out.println("Reading data...");
while ((value = reader.readLine()) != null) {
HashMap<String, Object> map = new HashMap<>();
for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
map.put(fieldSpec.getName(), value);
}
GenericRow genericRow = new GenericRow();
genericRow.init(map);
rows.add(genericRow);
_numRows++;
if (_numRows % 1000000 == 0) {
System.out.println("Read rows: " + _numRows);
}
}
System.out.println("Generating segment...");
SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
RecordReader recordReader = new TestRecordReader(rows, schema);
driver.init(config, recordReader);
driver.build();
return new File(SEGMENT_DIR_NAME, SEGMENT_NAME);
}
use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.
the class AutoloadPinotMetricsServiceTest method testRefreshDataset.
@Test(dependsOnMethods = { "testAddNewDataset" })
public void testRefreshDataset() throws Exception {
DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec("newDimension", DataType.STRING, true);
schema.addField(dimensionFieldSpec);
testAutoLoadPinotMetricsService.addPinotDataset(dataset, schema, datasetConfig);
Assert.assertEquals(datasetConfigDAO.findAll().size(), 1);
DatasetConfigDTO newDatasetConfig1 = datasetConfigDAO.findByDataset(dataset);
Assert.assertEquals(newDatasetConfig1.getDataset(), dataset);
Assert.assertEquals(Sets.newHashSet(newDatasetConfig1.getDimensions()), Sets.newHashSet(schema.getDimensionNames()));
MetricFieldSpec metricFieldSpec = new MetricFieldSpec("newMetric", DataType.LONG);
schema.addField(metricFieldSpec);
testAutoLoadPinotMetricsService.addPinotDataset(dataset, schema, newDatasetConfig1);
Assert.assertEquals(datasetConfigDAO.findAll().size(), 1);
List<MetricConfigDTO> metricConfigs = metricConfigDAO.findByDataset(dataset);
List<String> schemaMetricNames = schema.getMetricNames();
List<Long> metricIds = new ArrayList<>();
Assert.assertEquals(metricConfigs.size(), schemaMetricNames.size());
for (MetricConfigDTO metricConfig : metricConfigs) {
Assert.assertTrue(schemaMetricNames.contains(metricConfig.getName()));
metricIds.add(metricConfig.getId());
}
DashboardConfigDTO dashboardConfig = dashboardConfigDAO.findByName(DashboardConfigBean.DEFAULT_DASHBOARD_PREFIX + dataset);
Assert.assertEquals(dashboardConfig.getMetricIds(), metricIds);
}
use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.
the class FilterTreeOptimizationTest method buildSchema.
/**
* Helper method to build a schema containing dimensions with names passed in.
*
* @param dimensions Dimension names
*
*/
private static Schema buildSchema(String[] dimensions) {
Schema schema = new Schema();
for (String dimension : dimensions) {
DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(dimension, FieldSpec.DataType.STRING, true);
schema.addField(dimensionFieldSpec);
}
return schema;
}
use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.
the class ThirdeyePinotSchemaUtils method createSchema.
/**
* Transforms the thirdeyeConfig to pinot schema
* Adds default __COUNT metric if not already present
* Adds additional columns for all dimensions which
* are wither specified as topk or whitelist
* and hence have a transformed new column_raw
* @param thirdeyeConfig
* @return
*/
public static Schema createSchema(ThirdEyeConfig thirdeyeConfig) {
Schema schema = new Schema();
Set<String> transformDimensions = thirdeyeConfig.getTransformDimensions();
for (DimensionSpec dimensionSpec : thirdeyeConfig.getDimensions()) {
FieldSpec fieldSpec = new DimensionFieldSpec();
String dimensionName = dimensionSpec.getName();
fieldSpec.setName(dimensionName);
fieldSpec.setDataType(DataType.STRING);
fieldSpec.setSingleValueField(true);
schema.addField(dimensionName, fieldSpec);
if (transformDimensions.contains(dimensionName)) {
fieldSpec = new DimensionFieldSpec();
dimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
fieldSpec.setName(dimensionName);
fieldSpec.setDataType(DataType.STRING);
fieldSpec.setSingleValueField(true);
schema.addField(dimensionName, fieldSpec);
}
}
boolean countIncluded = false;
for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
FieldSpec fieldSpec = new MetricFieldSpec();
String metricName = metricSpec.getName();
if (metricName.equals(ThirdEyeConstants.AUTO_METRIC_COUNT)) {
countIncluded = true;
}
fieldSpec.setName(metricName);
fieldSpec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
fieldSpec.setSingleValueField(true);
schema.addField(metricName, fieldSpec);
}
if (!countIncluded) {
FieldSpec fieldSpec = new MetricFieldSpec();
String metricName = ThirdEyeConstants.AUTO_METRIC_COUNT;
fieldSpec.setName(metricName);
fieldSpec.setDataType(DataType.LONG);
fieldSpec.setDefaultNullValue(1);
schema.addField(metricName, fieldSpec);
}
TimeGranularitySpec incoming = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
TimeGranularitySpec outgoing = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
schema.addField(thirdeyeConfig.getTime().getColumnName(), new TimeFieldSpec(incoming, outgoing));
schema.setSchemaName(thirdeyeConfig.getCollection());
return schema;
}
use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.
the class PinotSegmentRecordReader method getSchema.
@Override
public Schema getSchema() {
Schema schema = new Schema();
schema.setSchemaName(segmentMetadata.getName());
for (String column : columns) {
ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
String columnName = columnMetadata.getColumnName();
DataType dataType = columnMetadata.getDataType();
FieldType fieldType = columnMetadata.getFieldType();
FieldSpec fieldSpec = null;
switch(fieldType) {
case DIMENSION:
boolean isSingleValue = columnMetadata.isSingleValue();
fieldSpec = new DimensionFieldSpec(columnName, dataType, isSingleValue);
break;
case METRIC:
fieldSpec = new MetricFieldSpec(columnName, dataType);
break;
case TIME:
TimeUnit timeType = columnMetadata.getTimeUnit();
TimeGranularitySpec incomingGranularitySpec = new TimeGranularitySpec(dataType, timeType, columnName);
fieldSpec = new TimeFieldSpec(incomingGranularitySpec);
break;
default:
break;
}
schema.addField(fieldSpec);
}
return schema;
}
Aggregations