use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.
the class FileBasedSentineTest method setup.
@BeforeClass
public void setup() throws Exception {
url = new URL("http://localhost:" + FileBasedServerBrokerStarters.BROKER_CLIENT_PORT + "/query");
// lets generate data
final String[] columns = { "dimention1", "dimention2", "dimention3", "dimention4", "metric1", "daysSinceEpoch" };
final Map<String, DataType> dataTypes = new HashMap<String, FieldSpec.DataType>();
final Map<String, FieldType> fieldTypes = new HashMap<String, FieldType>();
final Map<String, TimeUnit> timeUnits = new HashMap<String, TimeUnit>();
final Map<String, Integer> cardinality = new HashMap<String, Integer>();
// Crate empty range map as the signature of DataGeneratorSpec has changed, and this test does not
// use metric/time as fieldType.
final Map<String, IntRange> range = new HashMap<String, IntRange>();
for (final String col : columns) {
if (col.equals("dimention1")) {
dataTypes.put(col, DataType.STRING);
cardinality.put(col, 1000);
} else {
dataTypes.put(col, DataType.INT);
cardinality.put(col, 1000);
}
fieldTypes.put(col, FieldType.DIMENSION);
}
if (avroDataDir.exists()) {
FileUtils.deleteDirectory(avroDataDir);
}
final DataGeneratorSpec spec = new DataGeneratorSpec(Arrays.asList(columns), cardinality, range, dataTypes, fieldTypes, timeUnits, FileFormat.AVRO, avroDataDir.getAbsolutePath(), true);
generator = new DataGenerator();
generator.init(spec);
generator.generate(100000L, 2);
// lets make segments now
final File bootstrapDir = new File(FileBasedServerBrokerStarters.SERVER_BOOTSTRAP_DIR);
if (bootstrapDir.exists()) {
FileUtils.deleteDirectory(bootstrapDir);
}
bootstrapDir.mkdir();
int counter = 0;
for (final File avro : avroDataDir.listFiles()) {
for (final String table : FileBasedServerBrokerStarters.TABLE_NAMES) {
final SegmentGeneratorConfig genConfig = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(avro, new File(bootstrapDir, "segment-" + counter), "daysSinceEpoch", TimeUnit.DAYS, table);
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
driver.init(genConfig);
driver.build();
counter++;
}
}
// lets start the server and the broker now
starter = new FileBasedServerBrokerStarters();
starter.startAll();
// pick some values from here if you need to use it for running filter queries
final JSONObject selectionRequestResponse = postQuery("select * from 'table1' limit 100", "http://localhost:" + FileBasedServerBrokerStarters.BROKER_CLIENT_PORT);
// System.out.println(selectionRequestResponse.toString(1));
}
use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.
the class SegmentTestUtils method getSegmentGenSpecWithSchemAndProjectedColumns.
public static SegmentGeneratorConfig getSegmentGenSpecWithSchemAndProjectedColumns(File inputAvro, File outputDir, String timeColumn, TimeUnit timeUnit, String tableName) throws FileNotFoundException, IOException {
final SegmentGeneratorConfig segmentGenSpec = new SegmentGeneratorConfig(extractSchemaFromAvroWithoutTime(inputAvro));
segmentGenSpec.setInputFilePath(inputAvro.getAbsolutePath());
segmentGenSpec.setTimeColumnName(timeColumn);
segmentGenSpec.setSegmentTimeUnit(timeUnit);
segmentGenSpec.setFormat(FileFormat.AVRO);
segmentGenSpec.setSegmentVersion(SegmentVersion.v1);
segmentGenSpec.setTableName(tableName);
segmentGenSpec.setOutDir(outputDir.getAbsolutePath());
segmentGenSpec.createInvertedIndexForAllColumns();
return segmentGenSpec;
}
use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.
the class ResourceTestHelper method setupSegment.
public IndexSegment setupSegment(String tableName, String avroDataFilePath, String segmentNamePostfix) throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(SegmentV1V2ToV3FormatConverter.class.getClassLoader().getResource(avroDataFilePath));
// intentionally changed this to TimeUnit.Hours to make it non-default for testing
final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch", TimeUnit.HOURS, tableName);
config.setSegmentNamePostfix(segmentNamePostfix);
config.setTimeColumnName("daysSinceEpoch");
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
driver.init(config);
driver.build();
File segmentDirectory = new File(INDEX_DIR, driver.getSegmentName());
IndexSegment segment = ColumnarSegmentLoader.load(segmentDirectory, ReadMode.mmap);
serverInstance.getInstanceDataManager().addSegment(segment.getSegmentMetadata(), null, null);
return segment;
}
use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.
the class CreateSegmentCommand method execute.
@Override
public boolean execute() throws Exception {
LOGGER.info("Executing command: {}", toString());
// Load generator config if exist.
final SegmentGeneratorConfig segmentGeneratorConfig;
if (_generatorConfigFile != null) {
segmentGeneratorConfig = new ObjectMapper().readValue(new File(_generatorConfigFile), SegmentGeneratorConfig.class);
} else {
segmentGeneratorConfig = new SegmentGeneratorConfig();
}
// Load config from segment generator config.
String configDataDir = segmentGeneratorConfig.getDataDir();
if (_dataDir == null) {
if (configDataDir == null) {
throw new RuntimeException("Must specify dataDir.");
}
_dataDir = configDataDir;
} else {
if (configDataDir != null && !configDataDir.equals(_dataDir)) {
LOGGER.warn("Find dataDir conflict in command line and config file, use config in command line: {}", _dataDir);
}
}
FileFormat configFormat = segmentGeneratorConfig.getFormat();
if (_format == null) {
if (configFormat == null) {
throw new RuntimeException("Format cannot be null in config file.");
}
_format = configFormat;
} else {
if (configFormat != _format && configFormat != FileFormat.AVRO) {
LOGGER.warn("Find format conflict in command line and config file, use config in command line: {}", _format);
}
}
String configOutDir = segmentGeneratorConfig.getOutDir();
if (_outDir == null) {
if (configOutDir == null) {
throw new RuntimeException("Must specify outDir.");
}
_outDir = configOutDir;
} else {
if (configOutDir != null && !configOutDir.equals(_outDir)) {
LOGGER.warn("Find outDir conflict in command line and config file, use config in command line: {}", _outDir);
}
}
if (segmentGeneratorConfig.isOverwrite()) {
_overwrite = true;
}
String configTableName = segmentGeneratorConfig.getTableName();
if (_tableName == null) {
if (configTableName == null) {
throw new RuntimeException("Must specify tableName.");
}
_tableName = configTableName;
} else {
if (configTableName != null && !configTableName.equals(_tableName)) {
LOGGER.warn("Find tableName conflict in command line and config file, use config in command line: {}", _tableName);
}
}
String configSegmentName = segmentGeneratorConfig.getSegmentName();
if (_segmentName == null) {
if (configSegmentName == null) {
throw new RuntimeException("Must specify segmentName.");
}
_segmentName = configSegmentName;
} else {
if (configSegmentName != null && !configSegmentName.equals(_segmentName)) {
LOGGER.warn("Find segmentName conflict in command line and config file, use config in command line: {}", _segmentName);
}
}
// Filter out all input files.
File dir = new File(_dataDir);
if (!dir.exists() || !dir.isDirectory()) {
throw new RuntimeException("Data directory " + _dataDir + " not found.");
}
File[] files = dir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.toLowerCase().endsWith(_format.toString().toLowerCase());
}
});
if ((files == null) || (files.length == 0)) {
throw new RuntimeException("Data directory " + _dataDir + " does not contain " + _format.toString().toUpperCase() + " files.");
}
// Make sure output directory does not already exist, or can be overwritten.
File outDir = new File(_outDir);
if (outDir.exists()) {
if (!_overwrite) {
throw new IOException("Output directory " + _outDir + " already exists.");
} else {
FileUtils.deleteDirectory(outDir);
}
}
// Set other generator configs from command line.
segmentGeneratorConfig.setDataDir(_dataDir);
segmentGeneratorConfig.setFormat(_format);
segmentGeneratorConfig.setOutDir(_outDir);
segmentGeneratorConfig.setOverwrite(_overwrite);
segmentGeneratorConfig.setTableName(_tableName);
segmentGeneratorConfig.setSegmentName(_segmentName);
if (_schemaFile != null) {
if (segmentGeneratorConfig.getSchemaFile() != null && !segmentGeneratorConfig.getSchemaFile().equals(_schemaFile)) {
LOGGER.warn("Find schemaFile conflict in command line and config file, use config in command line: {}", _schemaFile);
}
segmentGeneratorConfig.setSchemaFile(_schemaFile);
}
if (_readerConfigFile != null) {
if (segmentGeneratorConfig.getReaderConfigFile() != null && !segmentGeneratorConfig.getReaderConfigFile().equals(_readerConfigFile)) {
LOGGER.warn("Find readerConfigFile conflict in command line and config file, use config in command line: {}", _readerConfigFile);
}
segmentGeneratorConfig.setReaderConfigFile(_readerConfigFile);
}
if (_enableStarTreeIndex) {
segmentGeneratorConfig.setEnableStarTreeIndex(true);
}
if (_starTreeIndexSpecFile != null) {
if (segmentGeneratorConfig.getStarTreeIndexSpecFile() != null && !segmentGeneratorConfig.getStarTreeIndexSpecFile().equals(_starTreeIndexSpecFile)) {
LOGGER.warn("Find starTreeIndexSpecFile conflict in command line and config file, use config in command line: {}", _starTreeIndexSpecFile);
}
segmentGeneratorConfig.setStarTreeIndexSpecFile(_starTreeIndexSpecFile);
}
ExecutorService executor = Executors.newFixedThreadPool(_numThreads);
int cnt = 0;
for (final File file : files) {
final int segCnt = cnt;
executor.execute(new Runnable() {
@Override
public void run() {
try {
SegmentGeneratorConfig config = new SegmentGeneratorConfig(segmentGeneratorConfig);
config.setInputFilePath(file.getAbsolutePath());
config.setSegmentName(_segmentName + "_" + segCnt);
config.loadConfigFiles();
final SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
driver.init(config);
driver.build();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
});
cnt += 1;
}
executor.shutdown();
return executor.awaitTermination(1, TimeUnit.HOURS);
}
use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.
the class ColumnarToStarTreeConverter method convertSegment.
/**
* Helper method to perform the conversion.
* @param columnarSegment Columnar segment directory to convert
* @throws Exception
*/
private void convertSegment(File columnarSegment) throws Exception {
PinotSegmentRecordReader pinotSegmentRecordReader = new PinotSegmentRecordReader(columnarSegment);
SegmentGeneratorConfig config = new SegmentGeneratorConfig(pinotSegmentRecordReader.getSchema());
config.setDataDir(_inputDirName);
config.setInputFilePath(columnarSegment.getAbsolutePath());
config.setFormat(FileFormat.PINOT);
config.setEnableStarTreeIndex(true);
config.setOutDir(_outputDirName);
config.setStarTreeIndexSpecFile(_starTreeConfigFileName);
config.setOverwrite(_overwrite);
config.setSegmentName(columnarSegment.getName());
SegmentIndexCreationDriver indexCreator = new SegmentIndexCreationDriverImpl();
indexCreator.init(config);
indexCreator.build();
}
Aggregations