use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.
the class AvroUtils method getPinotSchemaFromAvroSchema.
/**
* Given an avro schema object along with column field types and time unit, return the equivalent
* pinot schema object.
*
* @param avroSchema Avro schema for which to get the Pinot schema.
* @param fieldTypes Map containing fieldTypes for each column.
* @param timeUnit Time unit to be used for the time column.
* @return Return the equivalent pinot schema for the given avro schema.
*/
private static Schema getPinotSchemaFromAvroSchema(org.apache.avro.Schema avroSchema, Map<String, FieldSpec.FieldType> fieldTypes, TimeUnit timeUnit) {
Schema pinotSchema = new Schema();
for (final Field field : avroSchema.getFields()) {
String fieldName = field.name();
FieldSpec.DataType dataType;
try {
dataType = AvroRecordReader.getColumnType(field);
} catch (UnsupportedOperationException e) {
LOGGER.warn("Unsupported field type for field {} schema {}, using String instead.", fieldName, field.schema());
dataType = FieldSpec.DataType.STRING;
}
FieldSpec.FieldType fieldType = fieldTypes.get(fieldName);
boolean isSingleValueField = AvroRecordReader.isSingleValueField(field);
switch(fieldType) {
case DIMENSION:
pinotSchema.addField(new DimensionFieldSpec(fieldName, dataType, isSingleValueField));
break;
case METRIC:
Preconditions.checkState(isSingleValueField, "Unsupported multi-value for metric field.");
pinotSchema.addField(new MetricFieldSpec(fieldName, dataType));
break;
case TIME:
Preconditions.checkState(isSingleValueField, "Unsupported multi-value for time field.");
pinotSchema.addField(new TimeFieldSpec(field.name(), dataType, timeUnit));
break;
default:
throw new UnsupportedOperationException("Unsupported field type: " + fieldType + " for field: " + fieldName);
}
}
return pinotSchema;
}
use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.
the class OffHeapStarTreeBuilder method init.
public void init(StarTreeBuilderConfig builderConfig) throws Exception {
schema = builderConfig.schema;
timeColumnName = schema.getTimeColumnName();
this.dimensionsSplitOrder = builderConfig.dimensionsSplitOrder;
skipStarNodeCreationForDimensions = builderConfig.getSkipStarNodeCreationForDimensions();
skipMaterializationForDimensions = builderConfig.getSkipMaterializationForDimensions();
skipMaterializationCardinalityThreshold = builderConfig.getSkipMaterializationCardinalityThreshold();
enableOffHeapFormat = builderConfig.isEnableOffHealpFormat();
this.maxLeafRecords = builderConfig.maxLeafRecords;
this.outDir = builderConfig.getOutDir();
if (outDir == null) {
outDir = new File(System.getProperty("java.io.tmpdir"), V1Constants.STAR_TREE_INDEX_DIR + "_" + DateTime.now());
}
LOG.info("Index output directory:{}", outDir);
dimensionTypes = new ArrayList<>();
dimensionNames = new ArrayList<>();
dimensionNameToIndexMap = HashBiMap.create();
dimensionNameToStarValueMap = new HashMap<>();
dictionaryMap = new HashMap<>();
// READ DIMENSIONS COLUMNS
List<DimensionFieldSpec> dimensionFieldSpecs = schema.getDimensionFieldSpecs();
for (int index = 0; index < dimensionFieldSpecs.size(); index++) {
DimensionFieldSpec spec = dimensionFieldSpecs.get(index);
String dimensionName = spec.getName();
dimensionNames.add(dimensionName);
dimensionNameToIndexMap.put(dimensionName, index);
Object starValue;
starValue = getAllStarValue(spec);
dimensionNameToStarValueMap.put(dimensionName, starValue);
dimensionTypes.add(spec.getDataType());
HashBiMap<Object, Integer> dictionary = HashBiMap.create();
dictionaryMap.put(dimensionName, dictionary);
}
// this dimension unless explicitly specified in split order
if (timeColumnName != null) {
dimensionNames.add(timeColumnName);
TimeFieldSpec timeFieldSpec = schema.getTimeFieldSpec();
dimensionTypes.add(timeFieldSpec.getDataType());
int index = dimensionNameToIndexMap.size();
dimensionNameToIndexMap.put(timeColumnName, index);
Object starValue;
starValue = getAllStarValue(timeFieldSpec);
dimensionNameToStarValueMap.put(timeColumnName, starValue);
HashBiMap<Object, Integer> dictionary = HashBiMap.create();
dictionaryMap.put(schema.getTimeColumnName(), dictionary);
}
dimensionSizeBytes = dimensionNames.size() * Integer.SIZE / 8;
this.numDimensions = dimensionNames.size();
// READ METRIC COLUMNS
this.metricNames = new ArrayList<>();
this.metricNameToIndexMap = new HashMap<>();
this.metricSizeBytes = 0;
List<MetricFieldSpec> metricFieldSpecs = schema.getMetricFieldSpecs();
for (int index = 0; index < metricFieldSpecs.size(); index++) {
MetricFieldSpec spec = metricFieldSpecs.get(index);
String metricName = spec.getName();
metricNames.add(metricName);
metricNameToIndexMap.put(metricName, index);
metricSizeBytes += spec.getFieldSize();
}
numMetrics = metricNames.size();
builderConfig.getOutDir().mkdirs();
dataFile = new File(outDir, "star-tree.buf");
LOG.info("StarTree output data file: {}", dataFile.getAbsolutePath());
dataBuffer = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(dataFile)));
// INITIALIZE THE ROOT NODE
this.starTreeRootIndexNode = new StarTreeIndexNode();
this.starTreeRootIndexNode.setDimensionName(StarTreeIndexNodeInterf.ALL);
this.starTreeRootIndexNode.setDimensionValue(StarTreeIndexNodeInterf.ALL);
this.starTreeRootIndexNode.setLevel(0);
LOG.info("dimensionNames:{}", dimensionNames);
LOG.info("metricNames:{}", metricNames);
}
use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.
the class RawIndexCreatorTest method setup.
/**
* Setup to build a segment with raw indexes (no-dictionary) of various data types.
*
* @throws Exception
*/
@BeforeClass
public void setup() throws Exception {
Schema schema = new Schema();
schema.addField(new DimensionFieldSpec(INT_COLUMN, FieldSpec.DataType.INT, true));
schema.addField(new DimensionFieldSpec(LONG_COLUMN, FieldSpec.DataType.LONG, true));
schema.addField(new DimensionFieldSpec(FLOAT_COLUMN, FieldSpec.DataType.FLOAT, true));
schema.addField(new DimensionFieldSpec(DOUBLE_COLUMN, FieldSpec.DataType.DOUBLE, true));
schema.addField(new DimensionFieldSpec(STRING_COLUMN, FieldSpec.DataType.STRING, true));
_random = new Random(System.nanoTime());
_recordReader = buildIndex(schema);
}
use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.
the class NoDictionaryGroupKeyGeneratorTest method buildSegment.
/**
* Helper method to build a segment as follows:
* <ul>
* <li> One string column without dictionary. </li>
* <li> One integer column with dictionary. </li>
* </ul>
*
* It also computes the unique group keys while it generates the index.
*
* @return Set containing unique group keys from the created segment.
*
* @throws Exception
*/
private TestRecordReader buildSegment() throws Exception {
Schema schema = new Schema();
for (int i = 0; i < COLUMN_NAMES.length; i++) {
DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(COLUMN_NAMES[i], DATA_TYPES[i], true);
schema.addField(dimensionFieldSpec);
}
SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
config.setRawIndexCreationColumns(Arrays.asList(NO_DICT_COLUMN_NAMES));
config.setOutDir(SEGMENT_DIR_NAME);
config.setSegmentName(SEGMENT_NAME);
Random random = new Random();
List<GenericRow> rows = new ArrayList<>(NUM_ROWS);
for (int i = 0; i < NUM_ROWS; i++) {
Map<String, Object> map = new HashMap<>(NUM_COLUMNS);
for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
String column = fieldSpec.getName();
FieldSpec.DataType dataType = fieldSpec.getDataType();
switch(dataType) {
case INT:
map.put(column, random.nextInt());
break;
case LONG:
map.put(column, random.nextLong());
break;
case FLOAT:
map.put(column, random.nextFloat());
break;
case DOUBLE:
map.put(column, random.nextDouble());
break;
case STRING:
map.put(column, "value_" + i);
break;
default:
throw new IllegalArgumentException("Illegal data type specified: " + dataType);
}
}
GenericRow genericRow = new GenericRow();
genericRow.init(map);
rows.add(genericRow);
}
SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
_recordReader = new TestRecordReader(rows, schema);
driver.init(config, _recordReader);
driver.build();
return _recordReader;
}
use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.
the class DictionariesTest method testIntColumnPreIndexStatsCollector.
@Test
public void testIntColumnPreIndexStatsCollector() throws Exception {
FieldSpec spec = new DimensionFieldSpec("column1", DataType.INT, true);
AbstractColumnStatisticsCollector statsCollector = new IntColumnPreIndexStatsCollector(spec);
statsCollector.collect(new Integer(1));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Float(2));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Long(3));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Double(4));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Integer(4));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Float(2));
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect(new Double(40));
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect(new Double(20));
Assert.assertFalse(statsCollector.isSorted());
statsCollector.seal();
Assert.assertEquals(statsCollector.getCardinality(), 6);
Assert.assertEquals(((Number) statsCollector.getMinValue()).intValue(), 1);
Assert.assertEquals(((Number) statsCollector.getMaxValue()).intValue(), 40);
Assert.assertFalse(statsCollector.isSorted());
}
Aggregations