use of com.linkedin.pinot.core.data.readers.FileFormat in project pinot by linkedin.
the class CreateSegmentCommand method execute.
@Override
public boolean execute() throws Exception {
LOGGER.info("Executing command: {}", toString());
// Load generator config if exist.
final SegmentGeneratorConfig segmentGeneratorConfig;
if (_generatorConfigFile != null) {
segmentGeneratorConfig = new ObjectMapper().readValue(new File(_generatorConfigFile), SegmentGeneratorConfig.class);
} else {
segmentGeneratorConfig = new SegmentGeneratorConfig();
}
// Load config from segment generator config.
String configDataDir = segmentGeneratorConfig.getDataDir();
if (_dataDir == null) {
if (configDataDir == null) {
throw new RuntimeException("Must specify dataDir.");
}
_dataDir = configDataDir;
} else {
if (configDataDir != null && !configDataDir.equals(_dataDir)) {
LOGGER.warn("Find dataDir conflict in command line and config file, use config in command line: {}", _dataDir);
}
}
FileFormat configFormat = segmentGeneratorConfig.getFormat();
if (_format == null) {
if (configFormat == null) {
throw new RuntimeException("Format cannot be null in config file.");
}
_format = configFormat;
} else {
if (configFormat != _format && configFormat != FileFormat.AVRO) {
LOGGER.warn("Find format conflict in command line and config file, use config in command line: {}", _format);
}
}
String configOutDir = segmentGeneratorConfig.getOutDir();
if (_outDir == null) {
if (configOutDir == null) {
throw new RuntimeException("Must specify outDir.");
}
_outDir = configOutDir;
} else {
if (configOutDir != null && !configOutDir.equals(_outDir)) {
LOGGER.warn("Find outDir conflict in command line and config file, use config in command line: {}", _outDir);
}
}
if (segmentGeneratorConfig.isOverwrite()) {
_overwrite = true;
}
String configTableName = segmentGeneratorConfig.getTableName();
if (_tableName == null) {
if (configTableName == null) {
throw new RuntimeException("Must specify tableName.");
}
_tableName = configTableName;
} else {
if (configTableName != null && !configTableName.equals(_tableName)) {
LOGGER.warn("Find tableName conflict in command line and config file, use config in command line: {}", _tableName);
}
}
String configSegmentName = segmentGeneratorConfig.getSegmentName();
if (_segmentName == null) {
if (configSegmentName == null) {
throw new RuntimeException("Must specify segmentName.");
}
_segmentName = configSegmentName;
} else {
if (configSegmentName != null && !configSegmentName.equals(_segmentName)) {
LOGGER.warn("Find segmentName conflict in command line and config file, use config in command line: {}", _segmentName);
}
}
// Filter out all input files.
File dir = new File(_dataDir);
if (!dir.exists() || !dir.isDirectory()) {
throw new RuntimeException("Data directory " + _dataDir + " not found.");
}
File[] files = dir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.toLowerCase().endsWith(_format.toString().toLowerCase());
}
});
if ((files == null) || (files.length == 0)) {
throw new RuntimeException("Data directory " + _dataDir + " does not contain " + _format.toString().toUpperCase() + " files.");
}
// Make sure output directory does not already exist, or can be overwritten.
File outDir = new File(_outDir);
if (outDir.exists()) {
if (!_overwrite) {
throw new IOException("Output directory " + _outDir + " already exists.");
} else {
FileUtils.deleteDirectory(outDir);
}
}
// Set other generator configs from command line.
segmentGeneratorConfig.setDataDir(_dataDir);
segmentGeneratorConfig.setFormat(_format);
segmentGeneratorConfig.setOutDir(_outDir);
segmentGeneratorConfig.setOverwrite(_overwrite);
segmentGeneratorConfig.setTableName(_tableName);
segmentGeneratorConfig.setSegmentName(_segmentName);
if (_schemaFile != null) {
if (segmentGeneratorConfig.getSchemaFile() != null && !segmentGeneratorConfig.getSchemaFile().equals(_schemaFile)) {
LOGGER.warn("Find schemaFile conflict in command line and config file, use config in command line: {}", _schemaFile);
}
segmentGeneratorConfig.setSchemaFile(_schemaFile);
}
if (_readerConfigFile != null) {
if (segmentGeneratorConfig.getReaderConfigFile() != null && !segmentGeneratorConfig.getReaderConfigFile().equals(_readerConfigFile)) {
LOGGER.warn("Find readerConfigFile conflict in command line and config file, use config in command line: {}", _readerConfigFile);
}
segmentGeneratorConfig.setReaderConfigFile(_readerConfigFile);
}
if (_enableStarTreeIndex) {
segmentGeneratorConfig.setEnableStarTreeIndex(true);
}
if (_starTreeIndexSpecFile != null) {
if (segmentGeneratorConfig.getStarTreeIndexSpecFile() != null && !segmentGeneratorConfig.getStarTreeIndexSpecFile().equals(_starTreeIndexSpecFile)) {
LOGGER.warn("Find starTreeIndexSpecFile conflict in command line and config file, use config in command line: {}", _starTreeIndexSpecFile);
}
segmentGeneratorConfig.setStarTreeIndexSpecFile(_starTreeIndexSpecFile);
}
ExecutorService executor = Executors.newFixedThreadPool(_numThreads);
int cnt = 0;
for (final File file : files) {
final int segCnt = cnt;
executor.execute(new Runnable() {
@Override
public void run() {
try {
SegmentGeneratorConfig config = new SegmentGeneratorConfig(segmentGeneratorConfig);
config.setInputFilePath(file.getAbsolutePath());
config.setSegmentName(_segmentName + "_" + segCnt);
config.loadConfigFiles();
final SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
driver.init(config);
driver.build();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
});
cnt += 1;
}
executor.shutdown();
return executor.awaitTermination(1, TimeUnit.HOURS);
}
Aggregations