use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.
the class NoDictionaryGroupKeyGeneratorTest method buildSegment.
/**
* Helper method to build a segment as follows:
* <ul>
* <li> One string column without dictionary. </li>
* <li> One integer column with dictionary. </li>
* </ul>
*
* It also computes the unique group keys while it generates the index.
*
* @return Set containing unique group keys from the created segment.
*
* @throws Exception
*/
private TestRecordReader buildSegment() throws Exception {
Schema schema = new Schema();
for (int i = 0; i < COLUMN_NAMES.length; i++) {
DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(COLUMN_NAMES[i], DATA_TYPES[i], true);
schema.addField(dimensionFieldSpec);
}
SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
config.setRawIndexCreationColumns(Arrays.asList(NO_DICT_COLUMN_NAMES));
config.setOutDir(SEGMENT_DIR_NAME);
config.setSegmentName(SEGMENT_NAME);
Random random = new Random();
List<GenericRow> rows = new ArrayList<>(NUM_ROWS);
for (int i = 0; i < NUM_ROWS; i++) {
Map<String, Object> map = new HashMap<>(NUM_COLUMNS);
for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
String column = fieldSpec.getName();
FieldSpec.DataType dataType = fieldSpec.getDataType();
switch(dataType) {
case INT:
map.put(column, random.nextInt());
break;
case LONG:
map.put(column, random.nextLong());
break;
case FLOAT:
map.put(column, random.nextFloat());
break;
case DOUBLE:
map.put(column, random.nextDouble());
break;
case STRING:
map.put(column, "value_" + i);
break;
default:
throw new IllegalArgumentException("Illegal data type specified: " + dataType);
}
}
GenericRow genericRow = new GenericRow();
genericRow.init(map);
rows.add(genericRow);
}
SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
_recordReader = new TestRecordReader(rows, schema);
driver.init(config, _recordReader);
driver.build();
return _recordReader;
}
use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.
the class DictionariesTest method testIntColumnPreIndexStatsCollector.
@Test
public void testIntColumnPreIndexStatsCollector() throws Exception {
FieldSpec spec = new DimensionFieldSpec("column1", DataType.INT, true);
AbstractColumnStatisticsCollector statsCollector = new IntColumnPreIndexStatsCollector(spec);
statsCollector.collect(new Integer(1));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Float(2));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Long(3));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Double(4));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Integer(4));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Float(2));
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect(new Double(40));
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect(new Double(20));
Assert.assertFalse(statsCollector.isSorted());
statsCollector.seal();
Assert.assertEquals(statsCollector.getCardinality(), 6);
Assert.assertEquals(((Number) statsCollector.getMinValue()).intValue(), 1);
Assert.assertEquals(((Number) statsCollector.getMaxValue()).intValue(), 40);
Assert.assertFalse(statsCollector.isSorted());
}
use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.
the class DictionariesTest method testStringColumnPreIndexStatsCollectorForBoolean.
@Test
public void testStringColumnPreIndexStatsCollectorForBoolean() throws Exception {
FieldSpec spec = new DimensionFieldSpec("column1", DataType.BOOLEAN, true);
AbstractColumnStatisticsCollector statsCollector = new StringColumnPreIndexStatsCollector(spec);
statsCollector.collect("false");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("false");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("false");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("true");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("true");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("false");
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect("false");
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect("true");
Assert.assertFalse(statsCollector.isSorted());
statsCollector.seal();
Assert.assertEquals(statsCollector.getCardinality(), 2);
Assert.assertEquals((statsCollector.getMinValue()).toString(), "false");
Assert.assertEquals((statsCollector.getMaxValue()).toString(), "true");
Assert.assertFalse(statsCollector.isSorted());
}
use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.
the class DictionariesTest method testUTF8Characters.
/**
* Test for ensuring that Strings with special characters can be handled
* correctly.
*
* @throws Exception
*/
@Test
public void testUTF8Characters() throws Exception {
File indexDir = new File("/tmp/dict.test");
indexDir.deleteOnExit();
FieldSpec fieldSpec = new DimensionFieldSpec("test", DataType.STRING, true);
String[] inputStrings = new String[3];
char paddingChar = '%';
// "Café";
inputStrings[0] = new String(new byte[] { 67, 97, 102, -61, -87 });
// "François";
inputStrings[1] = new String(new byte[] { 70, 114, 97, 110, -61, -89, 111, 105, 115 });
// "Côte d'Ivoire";
inputStrings[2] = new String(new byte[] { 67, -61, -76, 116, 101, 32, 100, 39, 73, 118, 111, 105, 114, 101 });
Arrays.sort(inputStrings);
SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(false, inputStrings, fieldSpec, indexDir, paddingChar);
dictionaryCreator.build(new boolean[] { false });
for (String inputString : inputStrings) {
Assert.assertTrue(dictionaryCreator.indexOfSV(inputString) >= 0, "Value not found in dictionary " + inputString);
}
dictionaryCreator.close();
FileUtils.deleteQuietly(indexDir);
}
use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.
the class DictionariesTest method testStringColumnPreIndexStatsCollectorForRandomString.
@Test
public void testStringColumnPreIndexStatsCollectorForRandomString() throws Exception {
FieldSpec spec = new DimensionFieldSpec("column1", DataType.STRING, true);
AbstractColumnStatisticsCollector statsCollector = new StringColumnPreIndexStatsCollector(spec);
statsCollector.collect("a");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("b");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("c");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("d");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("d");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("b");
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect("z");
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect("u");
Assert.assertFalse(statsCollector.isSorted());
statsCollector.seal();
Assert.assertEquals(statsCollector.getCardinality(), 6);
Assert.assertEquals((statsCollector.getMinValue()).toString(), "a");
Assert.assertEquals((statsCollector.getMaxValue()).toString(), "z");
Assert.assertFalse(statsCollector.isSorted());
}
Aggregations