Search in sources :

Example 6 with DimensionFieldSpec

use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.

the class NoDictionaryGroupKeyGeneratorTest method buildSegment.

/**
   * Helper method to build a segment as follows:
   * <ul>
   *   <li> One string column without dictionary. </li>
   *   <li> One integer column with dictionary. </li>
   * </ul>
   *
   * It also computes the unique group keys while it generates the index.
   *
   * @return Set containing unique group keys from the created segment.
   *
   * @throws Exception
   */
private TestRecordReader buildSegment() throws Exception {
    Schema schema = new Schema();
    for (int i = 0; i < COLUMN_NAMES.length; i++) {
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(COLUMN_NAMES[i], DATA_TYPES[i], true);
        schema.addField(dimensionFieldSpec);
    }
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setRawIndexCreationColumns(Arrays.asList(NO_DICT_COLUMN_NAMES));
    config.setOutDir(SEGMENT_DIR_NAME);
    config.setSegmentName(SEGMENT_NAME);
    Random random = new Random();
    List<GenericRow> rows = new ArrayList<>(NUM_ROWS);
    for (int i = 0; i < NUM_ROWS; i++) {
        Map<String, Object> map = new HashMap<>(NUM_COLUMNS);
        for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
            String column = fieldSpec.getName();
            FieldSpec.DataType dataType = fieldSpec.getDataType();
            switch(dataType) {
                case INT:
                    map.put(column, random.nextInt());
                    break;
                case LONG:
                    map.put(column, random.nextLong());
                    break;
                case FLOAT:
                    map.put(column, random.nextFloat());
                    break;
                case DOUBLE:
                    map.put(column, random.nextDouble());
                    break;
                case STRING:
                    map.put(column, "value_" + i);
                    break;
                default:
                    throw new IllegalArgumentException("Illegal data type specified: " + dataType);
            }
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        rows.add(genericRow);
    }
    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    _recordReader = new TestRecordReader(rows, schema);
    driver.init(config, _recordReader);
    driver.build();
    return _recordReader;
}
Also used : TestRecordReader(com.linkedin.pinot.core.data.readers.TestRecordReader) HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) ArrayList(java.util.ArrayList) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) GenericRow(com.linkedin.pinot.core.data.GenericRow) Random(java.util.Random) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 7 with DimensionFieldSpec

use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.

the class DictionariesTest method testIntColumnPreIndexStatsCollector.

@Test
public void testIntColumnPreIndexStatsCollector() throws Exception {
    FieldSpec spec = new DimensionFieldSpec("column1", DataType.INT, true);
    AbstractColumnStatisticsCollector statsCollector = new IntColumnPreIndexStatsCollector(spec);
    statsCollector.collect(new Integer(1));
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect(new Float(2));
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect(new Long(3));
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect(new Double(4));
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect(new Integer(4));
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect(new Float(2));
    Assert.assertFalse(statsCollector.isSorted());
    statsCollector.collect(new Double(40));
    Assert.assertFalse(statsCollector.isSorted());
    statsCollector.collect(new Double(20));
    Assert.assertFalse(statsCollector.isSorted());
    statsCollector.seal();
    Assert.assertEquals(statsCollector.getCardinality(), 6);
    Assert.assertEquals(((Number) statsCollector.getMinValue()).intValue(), 1);
    Assert.assertEquals(((Number) statsCollector.getMaxValue()).intValue(), 40);
    Assert.assertFalse(statsCollector.isSorted());
}
Also used : IntColumnPreIndexStatsCollector(com.linkedin.pinot.core.segment.creator.impl.stats.IntColumnPreIndexStatsCollector) AbstractColumnStatisticsCollector(com.linkedin.pinot.core.segment.creator.AbstractColumnStatisticsCollector) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Test(org.testng.annotations.Test)

Example 8 with DimensionFieldSpec

use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.

the class DictionariesTest method testStringColumnPreIndexStatsCollectorForBoolean.

@Test
public void testStringColumnPreIndexStatsCollectorForBoolean() throws Exception {
    FieldSpec spec = new DimensionFieldSpec("column1", DataType.BOOLEAN, true);
    AbstractColumnStatisticsCollector statsCollector = new StringColumnPreIndexStatsCollector(spec);
    statsCollector.collect("false");
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect("false");
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect("false");
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect("true");
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect("true");
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect("false");
    Assert.assertFalse(statsCollector.isSorted());
    statsCollector.collect("false");
    Assert.assertFalse(statsCollector.isSorted());
    statsCollector.collect("true");
    Assert.assertFalse(statsCollector.isSorted());
    statsCollector.seal();
    Assert.assertEquals(statsCollector.getCardinality(), 2);
    Assert.assertEquals((statsCollector.getMinValue()).toString(), "false");
    Assert.assertEquals((statsCollector.getMaxValue()).toString(), "true");
    Assert.assertFalse(statsCollector.isSorted());
}
Also used : StringColumnPreIndexStatsCollector(com.linkedin.pinot.core.segment.creator.impl.stats.StringColumnPreIndexStatsCollector) AbstractColumnStatisticsCollector(com.linkedin.pinot.core.segment.creator.AbstractColumnStatisticsCollector) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Test(org.testng.annotations.Test)

Example 9 with DimensionFieldSpec

use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.

the class DictionariesTest method testUTF8Characters.

/**
   * Test for ensuring that Strings with special characters can be handled
   * correctly.
   *
   * @throws Exception
   */
@Test
public void testUTF8Characters() throws Exception {
    File indexDir = new File("/tmp/dict.test");
    indexDir.deleteOnExit();
    FieldSpec fieldSpec = new DimensionFieldSpec("test", DataType.STRING, true);
    String[] inputStrings = new String[3];
    char paddingChar = '%';
    // "Café";
    inputStrings[0] = new String(new byte[] { 67, 97, 102, -61, -87 });
    // "François";
    inputStrings[1] = new String(new byte[] { 70, 114, 97, 110, -61, -89, 111, 105, 115 });
    // "Côte d'Ivoire";
    inputStrings[2] = new String(new byte[] { 67, -61, -76, 116, 101, 32, 100, 39, 73, 118, 111, 105, 114, 101 });
    Arrays.sort(inputStrings);
    SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(false, inputStrings, fieldSpec, indexDir, paddingChar);
    dictionaryCreator.build(new boolean[] { false });
    for (String inputString : inputStrings) {
        Assert.assertTrue(dictionaryCreator.indexOfSV(inputString) >= 0, "Value not found in dictionary " + inputString);
    }
    dictionaryCreator.close();
    FileUtils.deleteQuietly(indexDir);
}
Also used : SegmentDictionaryCreator(com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator) File(java.io.File) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Test(org.testng.annotations.Test)

Example 10 with DimensionFieldSpec

use of com.linkedin.pinot.common.data.DimensionFieldSpec in project pinot by linkedin.

the class DictionariesTest method testStringColumnPreIndexStatsCollectorForRandomString.

@Test
public void testStringColumnPreIndexStatsCollectorForRandomString() throws Exception {
    FieldSpec spec = new DimensionFieldSpec("column1", DataType.STRING, true);
    AbstractColumnStatisticsCollector statsCollector = new StringColumnPreIndexStatsCollector(spec);
    statsCollector.collect("a");
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect("b");
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect("c");
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect("d");
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect("d");
    Assert.assertTrue(statsCollector.isSorted());
    statsCollector.collect("b");
    Assert.assertFalse(statsCollector.isSorted());
    statsCollector.collect("z");
    Assert.assertFalse(statsCollector.isSorted());
    statsCollector.collect("u");
    Assert.assertFalse(statsCollector.isSorted());
    statsCollector.seal();
    Assert.assertEquals(statsCollector.getCardinality(), 6);
    Assert.assertEquals((statsCollector.getMinValue()).toString(), "a");
    Assert.assertEquals((statsCollector.getMaxValue()).toString(), "z");
    Assert.assertFalse(statsCollector.isSorted());
}
Also used : StringColumnPreIndexStatsCollector(com.linkedin.pinot.core.segment.creator.impl.stats.StringColumnPreIndexStatsCollector) AbstractColumnStatisticsCollector(com.linkedin.pinot.core.segment.creator.AbstractColumnStatisticsCollector) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Test(org.testng.annotations.Test)

Aggregations

DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)38 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)27 Schema (com.linkedin.pinot.common.data.Schema)18 Test (org.testng.annotations.Test)17 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)16 File (java.io.File)16 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)13 HashMap (java.util.HashMap)9 GenericRow (com.linkedin.pinot.core.data.GenericRow)7 Random (java.util.Random)7 TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)6 AbstractColumnStatisticsCollector (com.linkedin.pinot.core.segment.creator.AbstractColumnStatisticsCollector)6 SegmentDictionaryCreator (com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator)6 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)5 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)5 ArrayList (java.util.ArrayList)4 FieldType (com.linkedin.pinot.common.data.FieldSpec.FieldType)3 IndexSegment (com.linkedin.pinot.core.indexsegment.IndexSegment)3 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)2 RecordReader (com.linkedin.pinot.core.data.readers.RecordReader)2