use of com.linkedin.pinot.core.segment.creator.AbstractColumnStatisticsCollector in project pinot by linkedin.
the class SegmentIndexCreationDriverImpl method handlePostCreation.
private void handlePostCreation() throws Exception {
final String timeColumn = config.getTimeColumnName();
segmentName = config.getSegmentNameGenerator().getSegmentName(segmentStats.getColumnProfileFor(timeColumn));
// Write the index files to disk
indexCreator.setSegmentName(segmentName);
indexCreator.seal();
LOGGER.info("Finished segment seal!");
// Delete the directory named after the segment name, if it exists
final File outputDir = new File(config.getOutDir());
final File segmentOutputDir = new File(outputDir, segmentName);
if (segmentOutputDir.exists()) {
FileUtils.deleteDirectory(segmentOutputDir);
}
// Move the temporary directory into its final location
FileUtils.moveDirectory(tempIndexDir, segmentOutputDir);
// Delete the temporary directory
FileUtils.deleteQuietly(tempIndexDir);
// Compute CRC
final long crc = CrcUtils.forAllFilesInFolder(segmentOutputDir).computeCrc();
// Persist creation metadata to disk
persistCreationMeta(segmentOutputDir, crc);
Map<String, MutableLong> nullCountMap = recordReader.getNullCountMap();
if (nullCountMap != null) {
for (Map.Entry<String, MutableLong> entry : nullCountMap.entrySet()) {
AbstractColumnStatisticsCollector columnStatisticsCollector = segmentStats.getColumnProfileFor(entry.getKey());
columnStatisticsCollector.setNumInputNullValues(entry.getValue().intValue());
}
}
convertFormatIfNeeded(segmentOutputDir);
LOGGER.info("Driver, record read time : {}", totalRecordReadTime);
LOGGER.info("Driver, stats collector time : {}", totalStatsCollectorTime);
LOGGER.info("Driver, indexing time : {}", totalIndexTime);
}
use of com.linkedin.pinot.core.segment.creator.AbstractColumnStatisticsCollector in project pinot by linkedin.
the class DictionariesTest method testIntColumnPreIndexStatsCollector.
@Test
public void testIntColumnPreIndexStatsCollector() throws Exception {
FieldSpec spec = new DimensionFieldSpec("column1", DataType.INT, true);
AbstractColumnStatisticsCollector statsCollector = new IntColumnPreIndexStatsCollector(spec);
statsCollector.collect(new Integer(1));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Float(2));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Long(3));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Double(4));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Integer(4));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Float(2));
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect(new Double(40));
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect(new Double(20));
Assert.assertFalse(statsCollector.isSorted());
statsCollector.seal();
Assert.assertEquals(statsCollector.getCardinality(), 6);
Assert.assertEquals(((Number) statsCollector.getMinValue()).intValue(), 1);
Assert.assertEquals(((Number) statsCollector.getMaxValue()).intValue(), 40);
Assert.assertFalse(statsCollector.isSorted());
}
use of com.linkedin.pinot.core.segment.creator.AbstractColumnStatisticsCollector in project pinot by linkedin.
the class DictionariesTest method testStringColumnPreIndexStatsCollectorForBoolean.
@Test
public void testStringColumnPreIndexStatsCollectorForBoolean() throws Exception {
FieldSpec spec = new DimensionFieldSpec("column1", DataType.BOOLEAN, true);
AbstractColumnStatisticsCollector statsCollector = new StringColumnPreIndexStatsCollector(spec);
statsCollector.collect("false");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("false");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("false");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("true");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("true");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("false");
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect("false");
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect("true");
Assert.assertFalse(statsCollector.isSorted());
statsCollector.seal();
Assert.assertEquals(statsCollector.getCardinality(), 2);
Assert.assertEquals((statsCollector.getMinValue()).toString(), "false");
Assert.assertEquals((statsCollector.getMaxValue()).toString(), "true");
Assert.assertFalse(statsCollector.isSorted());
}
use of com.linkedin.pinot.core.segment.creator.AbstractColumnStatisticsCollector in project pinot by linkedin.
the class DictionariesTest method testStringColumnPreIndexStatsCollectorForRandomString.
@Test
public void testStringColumnPreIndexStatsCollectorForRandomString() throws Exception {
FieldSpec spec = new DimensionFieldSpec("column1", DataType.STRING, true);
AbstractColumnStatisticsCollector statsCollector = new StringColumnPreIndexStatsCollector(spec);
statsCollector.collect("a");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("b");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("c");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("d");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("d");
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect("b");
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect("z");
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect("u");
Assert.assertFalse(statsCollector.isSorted());
statsCollector.seal();
Assert.assertEquals(statsCollector.getCardinality(), 6);
Assert.assertEquals((statsCollector.getMinValue()).toString(), "a");
Assert.assertEquals((statsCollector.getMaxValue()).toString(), "z");
Assert.assertFalse(statsCollector.isSorted());
}
use of com.linkedin.pinot.core.segment.creator.AbstractColumnStatisticsCollector in project pinot by linkedin.
the class DictionariesTest method testLongColumnPreIndexStatsCollector.
@Test
public void testLongColumnPreIndexStatsCollector() throws Exception {
FieldSpec spec = new DimensionFieldSpec("column1", DataType.LONG, true);
AbstractColumnStatisticsCollector statsCollector = new LongColumnPreIndexStatsCollector(spec);
statsCollector.collect(new Integer(1));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Float(2));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Long(3));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Double(4));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Integer(4));
Assert.assertTrue(statsCollector.isSorted());
statsCollector.collect(new Float(2));
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect(new Double(40));
Assert.assertFalse(statsCollector.isSorted());
statsCollector.collect(new Double(20));
Assert.assertFalse(statsCollector.isSorted());
statsCollector.seal();
Assert.assertEquals(statsCollector.getCardinality(), 6);
Assert.assertEquals(((Number) statsCollector.getMinValue()).intValue(), 1);
Assert.assertEquals(((Number) statsCollector.getMaxValue()).intValue(), 40);
Assert.assertFalse(statsCollector.isSorted());
}
Aggregations