use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.
the class RealtimeFileBasedReaderTest method testDataSourceWithoutPredicateForSingleValueDimensionColumns.
private void testDataSourceWithoutPredicateForSingleValueDimensionColumns() {
for (FieldSpec spec : schema.getAllFieldSpecs()) {
if (spec.isSingleValueField() && spec.getFieldType() == FieldType.DIMENSION) {
DataSource offlineDS = offlineSegment.getDataSource(spec.getName());
DataSource realtimeDS = realtimeSegment.getDataSource(spec.getName());
Block offlineBlock = offlineDS.nextBlock();
Block realtimeBlock = realtimeDS.nextBlock();
BlockMetadata offlineMetadata = offlineBlock.getMetadata();
BlockMetadata realtimeMetadata = realtimeBlock.getMetadata();
BlockSingleValIterator offlineValIterator = (BlockSingleValIterator) offlineBlock.getBlockValueSet().iterator();
BlockSingleValIterator realtimeValIterator = (BlockSingleValIterator) realtimeBlock.getBlockValueSet().iterator();
Assert.assertEquals(offlineSegment.getSegmentMetadata().getTotalDocs(), realtimeSegment.getAggregateDocumentCount());
while (realtimeValIterator.hasNext()) {
int offlineDicId = offlineValIterator.nextIntVal();
int realtimeDicId = realtimeValIterator.nextIntVal();
try {
Assert.assertEquals(offlineMetadata.getDictionary().get(offlineDicId), realtimeMetadata.getDictionary().get(realtimeDicId));
} catch (AssertionError e) {
LOGGER.info("column : {}", spec.getName());
LOGGER.info("realtimeDicId : {}, rawValue : {}", realtimeDicId, realtimeMetadata.getDictionary().get(realtimeDicId));
LOGGER.info("offlineDicId : {}, rawValue : {}", offlineDicId, offlineMetadata.getDictionary().get(offlineDicId));
throw e;
}
}
Assert.assertEquals(offlineValIterator.hasNext(), realtimeValIterator.hasNext());
}
}
}
use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.
the class NoDictionaryGroupKeyGeneratorTest method buildSegment.
/**
* Helper method to build a segment as follows:
* <ul>
* <li> One string column without dictionary. </li>
* <li> One integer column with dictionary. </li>
* </ul>
*
* It also computes the unique group keys while it generates the index.
*
* @return Set containing unique group keys from the created segment.
*
* @throws Exception
*/
private TestRecordReader buildSegment() throws Exception {
Schema schema = new Schema();
for (int i = 0; i < COLUMN_NAMES.length; i++) {
DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(COLUMN_NAMES[i], DATA_TYPES[i], true);
schema.addField(dimensionFieldSpec);
}
SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
config.setRawIndexCreationColumns(Arrays.asList(NO_DICT_COLUMN_NAMES));
config.setOutDir(SEGMENT_DIR_NAME);
config.setSegmentName(SEGMENT_NAME);
Random random = new Random();
List<GenericRow> rows = new ArrayList<>(NUM_ROWS);
for (int i = 0; i < NUM_ROWS; i++) {
Map<String, Object> map = new HashMap<>(NUM_COLUMNS);
for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
String column = fieldSpec.getName();
FieldSpec.DataType dataType = fieldSpec.getDataType();
switch(dataType) {
case INT:
map.put(column, random.nextInt());
break;
case LONG:
map.put(column, random.nextLong());
break;
case FLOAT:
map.put(column, random.nextFloat());
break;
case DOUBLE:
map.put(column, random.nextDouble());
break;
case STRING:
map.put(column, "value_" + i);
break;
default:
throw new IllegalArgumentException("Illegal data type specified: " + dataType);
}
}
GenericRow genericRow = new GenericRow();
genericRow.init(map);
rows.add(genericRow);
}
SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
_recordReader = new TestRecordReader(rows, schema);
driver.init(config, _recordReader);
driver.build();
return _recordReader;
}
use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.
the class HybridClusterIntegrationTest method makeSortedColumn.
/**
* Pick one column at random (or null) out of dimensions and return it as a sorted column.
* @note: Change this method to return a specific sorted column (or null) to debug failed tests
*
* @return sorted column name or null if none is to be used for this run.
*/
protected String makeSortedColumn() {
List<String> dimensions = schema.getDimensionNames();
final int nDimensions = dimensions.size();
int ntries = nDimensions;
int rand = random.nextInt();
if (rand % 5 == 0) {
// Return no sorted column 20% of the time
return null;
}
while (ntries-- > 0) {
int dimPos = random.nextInt(dimensions.size() + 1);
if (dimPos == nDimensions) {
continue;
}
String sortedColumn = dimensions.get(dimPos);
FieldSpec fieldSpec = schema.getFieldSpecFor(sortedColumn);
if (fieldSpec.isSingleValueField()) {
return sortedColumn;
}
}
return null;
}
use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.
the class DictionariesTest method testIntColumnPreIndexStatsCollector.
@Test
public void testIntColumnPreIndexStatsCollector() throws Exception {
FieldSpec spec = new DimensionFieldSpec("column1", DataType.INT, true);
AbstractColumnStatisticsCollector statsCollector = new IntColumnPreIndexStatsCollector(spec);
statsCollector.collect(new Integer(1));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Float(2));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Long(3));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Double(4));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Integer(4));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Float(2));
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect(new Double(40));
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect(new Double(20));
Assert.assertFalse(statsCollector.isSorted());
statsCollector.seal();
Assert.assertEquals(statsCollector.getCardinality(), 6);
Assert.assertEquals(((Number) statsCollector.getMinValue()).intValue(), 1);
Assert.assertEquals(((Number) statsCollector.getMaxValue()).intValue(), 40);
Assert.assertFalse(statsCollector.isSorted());
}
use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.
the class DictionariesTest method testStringColumnPreIndexStatsCollectorForBoolean.
@Test
public void testStringColumnPreIndexStatsCollectorForBoolean() throws Exception {
FieldSpec spec = new DimensionFieldSpec("column1", DataType.BOOLEAN, true);
AbstractColumnStatisticsCollector statsCollector = new StringColumnPreIndexStatsCollector(spec);
statsCollector.collect("false");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("false");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("false");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("true");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("true");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("false");
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect("false");
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect("true");
Assert.assertFalse(statsCollector.isSorted());
statsCollector.seal();
Assert.assertEquals(statsCollector.getCardinality(), 2);
Assert.assertEquals((statsCollector.getMinValue()).toString(), "false");
Assert.assertEquals((statsCollector.getMaxValue()).toString(), "true");
Assert.assertFalse(statsCollector.isSorted());
}
Aggregations