Search in sources :

Example 1 with FileFormat

use of com.linkedin.pinot.core.data.readers.FileFormat in project pinot by linkedin.

the class CreateSegmentCommand method execute.

@Override
public boolean execute() throws Exception {
    LOGGER.info("Executing command: {}", toString());
    // Load generator config if exist.
    final SegmentGeneratorConfig segmentGeneratorConfig;
    if (_generatorConfigFile != null) {
        segmentGeneratorConfig = new ObjectMapper().readValue(new File(_generatorConfigFile), SegmentGeneratorConfig.class);
    } else {
        segmentGeneratorConfig = new SegmentGeneratorConfig();
    }
    // Load config from segment generator config.
    String configDataDir = segmentGeneratorConfig.getDataDir();
    if (_dataDir == null) {
        if (configDataDir == null) {
            throw new RuntimeException("Must specify dataDir.");
        }
        _dataDir = configDataDir;
    } else {
        if (configDataDir != null && !configDataDir.equals(_dataDir)) {
            LOGGER.warn("Find dataDir conflict in command line and config file, use config in command line: {}", _dataDir);
        }
    }
    FileFormat configFormat = segmentGeneratorConfig.getFormat();
    if (_format == null) {
        if (configFormat == null) {
            throw new RuntimeException("Format cannot be null in config file.");
        }
        _format = configFormat;
    } else {
        if (configFormat != _format && configFormat != FileFormat.AVRO) {
            LOGGER.warn("Find format conflict in command line and config file, use config in command line: {}", _format);
        }
    }
    String configOutDir = segmentGeneratorConfig.getOutDir();
    if (_outDir == null) {
        if (configOutDir == null) {
            throw new RuntimeException("Must specify outDir.");
        }
        _outDir = configOutDir;
    } else {
        if (configOutDir != null && !configOutDir.equals(_outDir)) {
            LOGGER.warn("Find outDir conflict in command line and config file, use config in command line: {}", _outDir);
        }
    }
    if (segmentGeneratorConfig.isOverwrite()) {
        _overwrite = true;
    }
    String configTableName = segmentGeneratorConfig.getTableName();
    if (_tableName == null) {
        if (configTableName == null) {
            throw new RuntimeException("Must specify tableName.");
        }
        _tableName = configTableName;
    } else {
        if (configTableName != null && !configTableName.equals(_tableName)) {
            LOGGER.warn("Find tableName conflict in command line and config file, use config in command line: {}", _tableName);
        }
    }
    String configSegmentName = segmentGeneratorConfig.getSegmentName();
    if (_segmentName == null) {
        if (configSegmentName == null) {
            throw new RuntimeException("Must specify segmentName.");
        }
        _segmentName = configSegmentName;
    } else {
        if (configSegmentName != null && !configSegmentName.equals(_segmentName)) {
            LOGGER.warn("Find segmentName conflict in command line and config file, use config in command line: {}", _segmentName);
        }
    }
    // Filter out all input files.
    File dir = new File(_dataDir);
    if (!dir.exists() || !dir.isDirectory()) {
        throw new RuntimeException("Data directory " + _dataDir + " not found.");
    }
    File[] files = dir.listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return name.toLowerCase().endsWith(_format.toString().toLowerCase());
        }
    });
    if ((files == null) || (files.length == 0)) {
        throw new RuntimeException("Data directory " + _dataDir + " does not contain " + _format.toString().toUpperCase() + " files.");
    }
    // Make sure output directory does not already exist, or can be overwritten.
    File outDir = new File(_outDir);
    if (outDir.exists()) {
        if (!_overwrite) {
            throw new IOException("Output directory " + _outDir + " already exists.");
        } else {
            FileUtils.deleteDirectory(outDir);
        }
    }
    // Set other generator configs from command line.
    segmentGeneratorConfig.setDataDir(_dataDir);
    segmentGeneratorConfig.setFormat(_format);
    segmentGeneratorConfig.setOutDir(_outDir);
    segmentGeneratorConfig.setOverwrite(_overwrite);
    segmentGeneratorConfig.setTableName(_tableName);
    segmentGeneratorConfig.setSegmentName(_segmentName);
    if (_schemaFile != null) {
        if (segmentGeneratorConfig.getSchemaFile() != null && !segmentGeneratorConfig.getSchemaFile().equals(_schemaFile)) {
            LOGGER.warn("Find schemaFile conflict in command line and config file, use config in command line: {}", _schemaFile);
        }
        segmentGeneratorConfig.setSchemaFile(_schemaFile);
    }
    if (_readerConfigFile != null) {
        if (segmentGeneratorConfig.getReaderConfigFile() != null && !segmentGeneratorConfig.getReaderConfigFile().equals(_readerConfigFile)) {
            LOGGER.warn("Find readerConfigFile conflict in command line and config file, use config in command line: {}", _readerConfigFile);
        }
        segmentGeneratorConfig.setReaderConfigFile(_readerConfigFile);
    }
    if (_enableStarTreeIndex) {
        segmentGeneratorConfig.setEnableStarTreeIndex(true);
    }
    if (_starTreeIndexSpecFile != null) {
        if (segmentGeneratorConfig.getStarTreeIndexSpecFile() != null && !segmentGeneratorConfig.getStarTreeIndexSpecFile().equals(_starTreeIndexSpecFile)) {
            LOGGER.warn("Find starTreeIndexSpecFile conflict in command line and config file, use config in command line: {}", _starTreeIndexSpecFile);
        }
        segmentGeneratorConfig.setStarTreeIndexSpecFile(_starTreeIndexSpecFile);
    }
    ExecutorService executor = Executors.newFixedThreadPool(_numThreads);
    int cnt = 0;
    for (final File file : files) {
        final int segCnt = cnt;
        executor.execute(new Runnable() {

            @Override
            public void run() {
                try {
                    SegmentGeneratorConfig config = new SegmentGeneratorConfig(segmentGeneratorConfig);
                    config.setInputFilePath(file.getAbsolutePath());
                    config.setSegmentName(_segmentName + "_" + segCnt);
                    config.loadConfigFiles();
                    final SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
                    driver.init(config);
                    driver.build();
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        });
        cnt += 1;
    }
    executor.shutdown();
    return executor.awaitTermination(1, TimeUnit.HOURS);
}
Also used : IOException(java.io.IOException) FileFormat(com.linkedin.pinot.core.data.readers.FileFormat) IOException(java.io.IOException) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) FilenameFilter(java.io.FilenameFilter) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) ExecutorService(java.util.concurrent.ExecutorService) File(java.io.File) ObjectMapper(org.codehaus.jackson.map.ObjectMapper)

Aggregations

FileFormat (com.linkedin.pinot.core.data.readers.FileFormat)1 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)1 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)1 File (java.io.File)1 FilenameFilter (java.io.FilenameFilter)1 IOException (java.io.IOException)1 ExecutorService (java.util.concurrent.ExecutorService)1 ObjectMapper (org.codehaus.jackson.map.ObjectMapper)1