Search in sources :

Example 1 with SegmentIndexCreationDriverImpl

use of com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl in project pinot by linkedin.

the class StarTreeIndexTestSegmentHelper method buildSegment.

private static Schema buildSegment(String segmentDirName, String segmentName, HllConfig hllConfig, boolean enableOffHeapFormat) throws Exception {
    final int rows = (int) MathUtils.factorial(NUM_DIMENSIONS) * 100;
    Schema schema = new Schema();
    for (int i = 0; i < NUM_DIMENSIONS; i++) {
        String dimName = "d" + (i + 1);
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(dimName, FieldSpec.DataType.STRING, true);
        schema.addField(dimName, dimensionFieldSpec);
    }
    schema.setTimeFieldSpec(new TimeFieldSpec(TIME_COLUMN_NAME, FieldSpec.DataType.INT, TimeUnit.DAYS));
    for (int i = 0; i < NUM_METRICS; i++) {
        String metricName = "m" + (i + 1);
        MetricFieldSpec metricFieldSpec = new MetricFieldSpec(metricName, FieldSpec.DataType.INT);
        schema.addField(metricName, metricFieldSpec);
    }
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setEnableStarTreeIndex(true);
    config.setOutDir(segmentDirName);
    config.setFormat(FileFormat.AVRO);
    config.setSegmentName(segmentName);
    config.setHllConfig(hllConfig);
    config.setStarTreeIndexSpec(buildStarTreeIndexSpec(enableOffHeapFormat));
    Random random = new Random(RANDOM_SEED);
    final List<GenericRow> data = new ArrayList<>();
    for (int row = 0; row < rows; row++) {
        HashMap<String, Object> map = new HashMap<>();
        // Dim columns.
        for (int i = 0; i < NUM_DIMENSIONS / 2; i++) {
            String dimName = schema.getDimensionFieldSpecs().get(i).getName();
            map.put(dimName, dimName + "-v" + row % (NUM_DIMENSIONS - i));
        }
        // Random values make cardinality of d3, d4 column values larger to better test hll
        for (int i = NUM_DIMENSIONS / 2; i < NUM_DIMENSIONS; i++) {
            String dimName = schema.getDimensionFieldSpecs().get(i).getName();
            map.put(dimName, dimName + "-v" + random.nextInt(i * 100));
        }
        // Metric columns.
        for (int i = 0; i < NUM_METRICS; i++) {
            String metName = schema.getMetricFieldSpecs().get(i).getName();
            map.put(metName, random.nextInt(METRIC_MAX_VALUE));
        }
        // Time column.
        map.put(TIME_COLUMN_NAME, row % 7);
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        data.add(genericRow);
    }
    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    RecordReader reader = new TestUtils.GenericRowRecordReader(schema, data);
    driver.init(config, reader);
    driver.build();
    LOGGER.info("Built segment {} at {}", segmentName, segmentDirName);
    return schema;
}
Also used : RecordReader(com.linkedin.pinot.core.data.readers.RecordReader) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) GenericRow(com.linkedin.pinot.core.data.GenericRow) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)

Example 2 with SegmentIndexCreationDriverImpl

use of com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl in project pinot by linkedin.

the class NoDictionaryGroupKeyGeneratorTest method buildSegment.

/**
   * Helper method to build a segment as follows:
   * <ul>
   *   <li> One string column without dictionary. </li>
   *   <li> One integer column with dictionary. </li>
   * </ul>
   *
   * It also computes the unique group keys while it generates the index.
   *
   * @return Set containing unique group keys from the created segment.
   *
   * @throws Exception
   */
private TestRecordReader buildSegment() throws Exception {
    Schema schema = new Schema();
    for (int i = 0; i < COLUMN_NAMES.length; i++) {
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(COLUMN_NAMES[i], DATA_TYPES[i], true);
        schema.addField(dimensionFieldSpec);
    }
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setRawIndexCreationColumns(Arrays.asList(NO_DICT_COLUMN_NAMES));
    config.setOutDir(SEGMENT_DIR_NAME);
    config.setSegmentName(SEGMENT_NAME);
    Random random = new Random();
    List<GenericRow> rows = new ArrayList<>(NUM_ROWS);
    for (int i = 0; i < NUM_ROWS; i++) {
        Map<String, Object> map = new HashMap<>(NUM_COLUMNS);
        for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
            String column = fieldSpec.getName();
            FieldSpec.DataType dataType = fieldSpec.getDataType();
            switch(dataType) {
                case INT:
                    map.put(column, random.nextInt());
                    break;
                case LONG:
                    map.put(column, random.nextLong());
                    break;
                case FLOAT:
                    map.put(column, random.nextFloat());
                    break;
                case DOUBLE:
                    map.put(column, random.nextDouble());
                    break;
                case STRING:
                    map.put(column, "value_" + i);
                    break;
                default:
                    throw new IllegalArgumentException("Illegal data type specified: " + dataType);
            }
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        rows.add(genericRow);
    }
    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    _recordReader = new TestRecordReader(rows, schema);
    driver.init(config, _recordReader);
    driver.build();
    return _recordReader;
}
Also used : TestRecordReader(com.linkedin.pinot.core.data.readers.TestRecordReader) HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) ArrayList(java.util.ArrayList) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) GenericRow(com.linkedin.pinot.core.data.GenericRow) Random(java.util.Random) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 3 with SegmentIndexCreationDriverImpl

use of com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl in project pinot by linkedin.

the class QueryExceptionTest method setupSegmentFor.

private void setupSegmentFor(String table) throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));
    if (INDEX_DIR.exists()) {
        FileUtils.deleteQuietly(INDEX_DIR);
    }
    INDEX_DIR.mkdir();
    final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), new File(INDEX_DIR, "segment"), "daysSinceEpoch", TimeUnit.DAYS, table);
    final SegmentIndexCreationDriver driver = new SegmentIndexCreationDriverImpl();
    driver.init(config);
    driver.build();
//    System.out.println("built at : " + INDEX_DIR.getAbsolutePath());
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) File(java.io.File) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)

Example 4 with SegmentIndexCreationDriverImpl

use of com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl in project pinot by linkedin.

the class CreateSegmentCommand method execute.

@Override
public boolean execute() throws Exception {
    LOGGER.info("Executing command: {}", toString());
    // Load generator config if exist.
    final SegmentGeneratorConfig segmentGeneratorConfig;
    if (_generatorConfigFile != null) {
        segmentGeneratorConfig = new ObjectMapper().readValue(new File(_generatorConfigFile), SegmentGeneratorConfig.class);
    } else {
        segmentGeneratorConfig = new SegmentGeneratorConfig();
    }
    // Load config from segment generator config.
    String configDataDir = segmentGeneratorConfig.getDataDir();
    if (_dataDir == null) {
        if (configDataDir == null) {
            throw new RuntimeException("Must specify dataDir.");
        }
        _dataDir = configDataDir;
    } else {
        if (configDataDir != null && !configDataDir.equals(_dataDir)) {
            LOGGER.warn("Find dataDir conflict in command line and config file, use config in command line: {}", _dataDir);
        }
    }
    FileFormat configFormat = segmentGeneratorConfig.getFormat();
    if (_format == null) {
        if (configFormat == null) {
            throw new RuntimeException("Format cannot be null in config file.");
        }
        _format = configFormat;
    } else {
        if (configFormat != _format && configFormat != FileFormat.AVRO) {
            LOGGER.warn("Find format conflict in command line and config file, use config in command line: {}", _format);
        }
    }
    String configOutDir = segmentGeneratorConfig.getOutDir();
    if (_outDir == null) {
        if (configOutDir == null) {
            throw new RuntimeException("Must specify outDir.");
        }
        _outDir = configOutDir;
    } else {
        if (configOutDir != null && !configOutDir.equals(_outDir)) {
            LOGGER.warn("Find outDir conflict in command line and config file, use config in command line: {}", _outDir);
        }
    }
    if (segmentGeneratorConfig.isOverwrite()) {
        _overwrite = true;
    }
    String configTableName = segmentGeneratorConfig.getTableName();
    if (_tableName == null) {
        if (configTableName == null) {
            throw new RuntimeException("Must specify tableName.");
        }
        _tableName = configTableName;
    } else {
        if (configTableName != null && !configTableName.equals(_tableName)) {
            LOGGER.warn("Find tableName conflict in command line and config file, use config in command line: {}", _tableName);
        }
    }
    String configSegmentName = segmentGeneratorConfig.getSegmentName();
    if (_segmentName == null) {
        if (configSegmentName == null) {
            throw new RuntimeException("Must specify segmentName.");
        }
        _segmentName = configSegmentName;
    } else {
        if (configSegmentName != null && !configSegmentName.equals(_segmentName)) {
            LOGGER.warn("Find segmentName conflict in command line and config file, use config in command line: {}", _segmentName);
        }
    }
    // Filter out all input files.
    File dir = new File(_dataDir);
    if (!dir.exists() || !dir.isDirectory()) {
        throw new RuntimeException("Data directory " + _dataDir + " not found.");
    }
    File[] files = dir.listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return name.toLowerCase().endsWith(_format.toString().toLowerCase());
        }
    });
    if ((files == null) || (files.length == 0)) {
        throw new RuntimeException("Data directory " + _dataDir + " does not contain " + _format.toString().toUpperCase() + " files.");
    }
    // Make sure output directory does not already exist, or can be overwritten.
    File outDir = new File(_outDir);
    if (outDir.exists()) {
        if (!_overwrite) {
            throw new IOException("Output directory " + _outDir + " already exists.");
        } else {
            FileUtils.deleteDirectory(outDir);
        }
    }
    // Set other generator configs from command line.
    segmentGeneratorConfig.setDataDir(_dataDir);
    segmentGeneratorConfig.setFormat(_format);
    segmentGeneratorConfig.setOutDir(_outDir);
    segmentGeneratorConfig.setOverwrite(_overwrite);
    segmentGeneratorConfig.setTableName(_tableName);
    segmentGeneratorConfig.setSegmentName(_segmentName);
    if (_schemaFile != null) {
        if (segmentGeneratorConfig.getSchemaFile() != null && !segmentGeneratorConfig.getSchemaFile().equals(_schemaFile)) {
            LOGGER.warn("Find schemaFile conflict in command line and config file, use config in command line: {}", _schemaFile);
        }
        segmentGeneratorConfig.setSchemaFile(_schemaFile);
    }
    if (_readerConfigFile != null) {
        if (segmentGeneratorConfig.getReaderConfigFile() != null && !segmentGeneratorConfig.getReaderConfigFile().equals(_readerConfigFile)) {
            LOGGER.warn("Find readerConfigFile conflict in command line and config file, use config in command line: {}", _readerConfigFile);
        }
        segmentGeneratorConfig.setReaderConfigFile(_readerConfigFile);
    }
    if (_enableStarTreeIndex) {
        segmentGeneratorConfig.setEnableStarTreeIndex(true);
    }
    if (_starTreeIndexSpecFile != null) {
        if (segmentGeneratorConfig.getStarTreeIndexSpecFile() != null && !segmentGeneratorConfig.getStarTreeIndexSpecFile().equals(_starTreeIndexSpecFile)) {
            LOGGER.warn("Find starTreeIndexSpecFile conflict in command line and config file, use config in command line: {}", _starTreeIndexSpecFile);
        }
        segmentGeneratorConfig.setStarTreeIndexSpecFile(_starTreeIndexSpecFile);
    }
    ExecutorService executor = Executors.newFixedThreadPool(_numThreads);
    int cnt = 0;
    for (final File file : files) {
        final int segCnt = cnt;
        executor.execute(new Runnable() {

            @Override
            public void run() {
                try {
                    SegmentGeneratorConfig config = new SegmentGeneratorConfig(segmentGeneratorConfig);
                    config.setInputFilePath(file.getAbsolutePath());
                    config.setSegmentName(_segmentName + "_" + segCnt);
                    config.loadConfigFiles();
                    final SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
                    driver.init(config);
                    driver.build();
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        });
        cnt += 1;
    }
    executor.shutdown();
    return executor.awaitTermination(1, TimeUnit.HOURS);
}
Also used : IOException(java.io.IOException) FileFormat(com.linkedin.pinot.core.data.readers.FileFormat) IOException(java.io.IOException) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) FilenameFilter(java.io.FilenameFilter) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) ExecutorService(java.util.concurrent.ExecutorService) File(java.io.File) ObjectMapper(org.codehaus.jackson.map.ObjectMapper)

Example 5 with SegmentIndexCreationDriverImpl

use of com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl in project pinot by linkedin.

the class ColumnarToStarTreeConverter method convertSegment.

/**
   * Helper method to perform the conversion.
   * @param columnarSegment Columnar segment directory to convert
   * @throws Exception
   */
private void convertSegment(File columnarSegment) throws Exception {
    PinotSegmentRecordReader pinotSegmentRecordReader = new PinotSegmentRecordReader(columnarSegment);
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(pinotSegmentRecordReader.getSchema());
    config.setDataDir(_inputDirName);
    config.setInputFilePath(columnarSegment.getAbsolutePath());
    config.setFormat(FileFormat.PINOT);
    config.setEnableStarTreeIndex(true);
    config.setOutDir(_outputDirName);
    config.setStarTreeIndexSpecFile(_starTreeConfigFileName);
    config.setOverwrite(_overwrite);
    config.setSegmentName(columnarSegment.getName());
    SegmentIndexCreationDriver indexCreator = new SegmentIndexCreationDriverImpl();
    indexCreator.init(config);
    indexCreator.build();
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) PinotSegmentRecordReader(com.linkedin.pinot.core.data.readers.PinotSegmentRecordReader) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)

Aggregations

SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)19 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)19 GenericRow (com.linkedin.pinot.core.data.GenericRow)11 HashMap (java.util.HashMap)10 RecordReader (com.linkedin.pinot.core.data.readers.RecordReader)9 Schema (com.linkedin.pinot.common.data.Schema)8 File (java.io.File)8 ArrayList (java.util.ArrayList)8 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)6 SegmentIndexCreationDriver (com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver)5 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)4 Random (java.util.Random)4 TestRecordReader (com.linkedin.pinot.core.data.readers.TestRecordReader)3 IndexSegment (com.linkedin.pinot.core.indexsegment.IndexSegment)2 BaseOperator (com.linkedin.pinot.core.operator.BaseOperator)2 TestDataRecordReader (com.linkedin.pinot.util.TestDataRecordReader)2 URL (java.net.URL)2 BeforeClass (org.testng.annotations.BeforeClass)2 BeforeTest (org.testng.annotations.BeforeTest)2 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)1