Search in sources :

Example 21 with SegmentGeneratorConfig

use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.

the class FileBasedSentineTest method setup.

@BeforeClass
public void setup() throws Exception {
    url = new URL("http://localhost:" + FileBasedServerBrokerStarters.BROKER_CLIENT_PORT + "/query");
    // lets generate data
    final String[] columns = { "dimention1", "dimention2", "dimention3", "dimention4", "metric1", "daysSinceEpoch" };
    final Map<String, DataType> dataTypes = new HashMap<String, FieldSpec.DataType>();
    final Map<String, FieldType> fieldTypes = new HashMap<String, FieldType>();
    final Map<String, TimeUnit> timeUnits = new HashMap<String, TimeUnit>();
    final Map<String, Integer> cardinality = new HashMap<String, Integer>();
    // Crate empty range map as the signature of DataGeneratorSpec has changed, and this test does not
    // use metric/time as fieldType.
    final Map<String, IntRange> range = new HashMap<String, IntRange>();
    for (final String col : columns) {
        if (col.equals("dimention1")) {
            dataTypes.put(col, DataType.STRING);
            cardinality.put(col, 1000);
        } else {
            dataTypes.put(col, DataType.INT);
            cardinality.put(col, 1000);
        }
        fieldTypes.put(col, FieldType.DIMENSION);
    }
    if (avroDataDir.exists()) {
        FileUtils.deleteDirectory(avroDataDir);
    }
    final DataGeneratorSpec spec = new DataGeneratorSpec(Arrays.asList(columns), cardinality, range, dataTypes, fieldTypes, timeUnits, FileFormat.AVRO, avroDataDir.getAbsolutePath(), true);
    generator = new DataGenerator();
    generator.init(spec);
    generator.generate(100000L, 2);
    // lets make segments now
    final File bootstrapDir = new File(FileBasedServerBrokerStarters.SERVER_BOOTSTRAP_DIR);
    if (bootstrapDir.exists()) {
        FileUtils.deleteDirectory(bootstrapDir);
    }
    bootstrapDir.mkdir();
    int counter = 0;
    for (final File avro : avroDataDir.listFiles()) {
        for (final String table : FileBasedServerBrokerStarters.TABLE_NAMES) {
            final SegmentGeneratorConfig genConfig = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(avro, new File(bootstrapDir, "segment-" + counter), "daysSinceEpoch", TimeUnit.DAYS, table);
            final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
            driver.init(genConfig);
            driver.build();
            counter++;
        }
    }
    // lets start the server and the broker now
    starter = new FileBasedServerBrokerStarters();
    starter.startAll();
    // pick some values from here if you need to use it for running filter queries
    final JSONObject selectionRequestResponse = postQuery("select * from 'table1' limit 100", "http://localhost:" + FileBasedServerBrokerStarters.BROKER_CLIENT_PORT);
//    System.out.println(selectionRequestResponse.toString(1));
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) HashMap(java.util.HashMap) IntRange(org.apache.commons.lang.math.IntRange) URL(java.net.URL) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType) JSONObject(org.json.JSONObject) DataGenerator(com.linkedin.pinot.tools.data.generator.DataGenerator) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) TimeUnit(java.util.concurrent.TimeUnit) File(java.io.File) DataGeneratorSpec(com.linkedin.pinot.tools.data.generator.DataGeneratorSpec) BeforeClass(org.testng.annotations.BeforeClass)

Example 22 with SegmentGeneratorConfig

use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.

the class SegmentTestUtils method getSegmentGenSpecWithSchemAndProjectedColumns.

public static SegmentGeneratorConfig getSegmentGenSpecWithSchemAndProjectedColumns(File inputAvro, File outputDir, String timeColumn, TimeUnit timeUnit, String tableName) throws FileNotFoundException, IOException {
    final SegmentGeneratorConfig segmentGenSpec = new SegmentGeneratorConfig(extractSchemaFromAvroWithoutTime(inputAvro));
    segmentGenSpec.setInputFilePath(inputAvro.getAbsolutePath());
    segmentGenSpec.setTimeColumnName(timeColumn);
    segmentGenSpec.setSegmentTimeUnit(timeUnit);
    segmentGenSpec.setFormat(FileFormat.AVRO);
    segmentGenSpec.setSegmentVersion(SegmentVersion.v1);
    segmentGenSpec.setTableName(tableName);
    segmentGenSpec.setOutDir(outputDir.getAbsolutePath());
    segmentGenSpec.createInvertedIndexForAllColumns();
    return segmentGenSpec;
}
Also used : SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)

Example 23 with SegmentGeneratorConfig

use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.

the class ResourceTestHelper method setupSegment.

public IndexSegment setupSegment(String tableName, String avroDataFilePath, String segmentNamePostfix) throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(SegmentV1V2ToV3FormatConverter.class.getClassLoader().getResource(avroDataFilePath));
    // intentionally changed this to TimeUnit.Hours to make it non-default for testing
    final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch", TimeUnit.HOURS, tableName);
    config.setSegmentNamePostfix(segmentNamePostfix);
    config.setTimeColumnName("daysSinceEpoch");
    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
    driver.init(config);
    driver.build();
    File segmentDirectory = new File(INDEX_DIR, driver.getSegmentName());
    IndexSegment segment = ColumnarSegmentLoader.load(segmentDirectory, ReadMode.mmap);
    serverInstance.getInstanceDataManager().addSegment(segment.getSegmentMetadata(), null, null);
    return segment;
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) IndexSegment(com.linkedin.pinot.core.indexsegment.IndexSegment) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) File(java.io.File)

Example 24 with SegmentGeneratorConfig

use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.

the class CreateSegmentCommand method execute.

@Override
public boolean execute() throws Exception {
    LOGGER.info("Executing command: {}", toString());
    // Load generator config if exist.
    final SegmentGeneratorConfig segmentGeneratorConfig;
    if (_generatorConfigFile != null) {
        segmentGeneratorConfig = new ObjectMapper().readValue(new File(_generatorConfigFile), SegmentGeneratorConfig.class);
    } else {
        segmentGeneratorConfig = new SegmentGeneratorConfig();
    }
    // Load config from segment generator config.
    String configDataDir = segmentGeneratorConfig.getDataDir();
    if (_dataDir == null) {
        if (configDataDir == null) {
            throw new RuntimeException("Must specify dataDir.");
        }
        _dataDir = configDataDir;
    } else {
        if (configDataDir != null && !configDataDir.equals(_dataDir)) {
            LOGGER.warn("Find dataDir conflict in command line and config file, use config in command line: {}", _dataDir);
        }
    }
    FileFormat configFormat = segmentGeneratorConfig.getFormat();
    if (_format == null) {
        if (configFormat == null) {
            throw new RuntimeException("Format cannot be null in config file.");
        }
        _format = configFormat;
    } else {
        if (configFormat != _format && configFormat != FileFormat.AVRO) {
            LOGGER.warn("Find format conflict in command line and config file, use config in command line: {}", _format);
        }
    }
    String configOutDir = segmentGeneratorConfig.getOutDir();
    if (_outDir == null) {
        if (configOutDir == null) {
            throw new RuntimeException("Must specify outDir.");
        }
        _outDir = configOutDir;
    } else {
        if (configOutDir != null && !configOutDir.equals(_outDir)) {
            LOGGER.warn("Find outDir conflict in command line and config file, use config in command line: {}", _outDir);
        }
    }
    if (segmentGeneratorConfig.isOverwrite()) {
        _overwrite = true;
    }
    String configTableName = segmentGeneratorConfig.getTableName();
    if (_tableName == null) {
        if (configTableName == null) {
            throw new RuntimeException("Must specify tableName.");
        }
        _tableName = configTableName;
    } else {
        if (configTableName != null && !configTableName.equals(_tableName)) {
            LOGGER.warn("Find tableName conflict in command line and config file, use config in command line: {}", _tableName);
        }
    }
    String configSegmentName = segmentGeneratorConfig.getSegmentName();
    if (_segmentName == null) {
        if (configSegmentName == null) {
            throw new RuntimeException("Must specify segmentName.");
        }
        _segmentName = configSegmentName;
    } else {
        if (configSegmentName != null && !configSegmentName.equals(_segmentName)) {
            LOGGER.warn("Find segmentName conflict in command line and config file, use config in command line: {}", _segmentName);
        }
    }
    // Filter out all input files.
    File dir = new File(_dataDir);
    if (!dir.exists() || !dir.isDirectory()) {
        throw new RuntimeException("Data directory " + _dataDir + " not found.");
    }
    File[] files = dir.listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return name.toLowerCase().endsWith(_format.toString().toLowerCase());
        }
    });
    if ((files == null) || (files.length == 0)) {
        throw new RuntimeException("Data directory " + _dataDir + " does not contain " + _format.toString().toUpperCase() + " files.");
    }
    // Make sure output directory does not already exist, or can be overwritten.
    File outDir = new File(_outDir);
    if (outDir.exists()) {
        if (!_overwrite) {
            throw new IOException("Output directory " + _outDir + " already exists.");
        } else {
            FileUtils.deleteDirectory(outDir);
        }
    }
    // Set other generator configs from command line.
    segmentGeneratorConfig.setDataDir(_dataDir);
    segmentGeneratorConfig.setFormat(_format);
    segmentGeneratorConfig.setOutDir(_outDir);
    segmentGeneratorConfig.setOverwrite(_overwrite);
    segmentGeneratorConfig.setTableName(_tableName);
    segmentGeneratorConfig.setSegmentName(_segmentName);
    if (_schemaFile != null) {
        if (segmentGeneratorConfig.getSchemaFile() != null && !segmentGeneratorConfig.getSchemaFile().equals(_schemaFile)) {
            LOGGER.warn("Find schemaFile conflict in command line and config file, use config in command line: {}", _schemaFile);
        }
        segmentGeneratorConfig.setSchemaFile(_schemaFile);
    }
    if (_readerConfigFile != null) {
        if (segmentGeneratorConfig.getReaderConfigFile() != null && !segmentGeneratorConfig.getReaderConfigFile().equals(_readerConfigFile)) {
            LOGGER.warn("Find readerConfigFile conflict in command line and config file, use config in command line: {}", _readerConfigFile);
        }
        segmentGeneratorConfig.setReaderConfigFile(_readerConfigFile);
    }
    if (_enableStarTreeIndex) {
        segmentGeneratorConfig.setEnableStarTreeIndex(true);
    }
    if (_starTreeIndexSpecFile != null) {
        if (segmentGeneratorConfig.getStarTreeIndexSpecFile() != null && !segmentGeneratorConfig.getStarTreeIndexSpecFile().equals(_starTreeIndexSpecFile)) {
            LOGGER.warn("Find starTreeIndexSpecFile conflict in command line and config file, use config in command line: {}", _starTreeIndexSpecFile);
        }
        segmentGeneratorConfig.setStarTreeIndexSpecFile(_starTreeIndexSpecFile);
    }
    ExecutorService executor = Executors.newFixedThreadPool(_numThreads);
    int cnt = 0;
    for (final File file : files) {
        final int segCnt = cnt;
        executor.execute(new Runnable() {

            @Override
            public void run() {
                try {
                    SegmentGeneratorConfig config = new SegmentGeneratorConfig(segmentGeneratorConfig);
                    config.setInputFilePath(file.getAbsolutePath());
                    config.setSegmentName(_segmentName + "_" + segCnt);
                    config.loadConfigFiles();
                    final SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
                    driver.init(config);
                    driver.build();
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        });
        cnt += 1;
    }
    executor.shutdown();
    return executor.awaitTermination(1, TimeUnit.HOURS);
}
Also used : IOException(java.io.IOException) FileFormat(com.linkedin.pinot.core.data.readers.FileFormat) IOException(java.io.IOException) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl) FilenameFilter(java.io.FilenameFilter) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) ExecutorService(java.util.concurrent.ExecutorService) File(java.io.File) ObjectMapper(org.codehaus.jackson.map.ObjectMapper)

Example 25 with SegmentGeneratorConfig

use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.

the class ColumnarToStarTreeConverter method convertSegment.

/**
   * Helper method to perform the conversion.
   * @param columnarSegment Columnar segment directory to convert
   * @throws Exception
   */
private void convertSegment(File columnarSegment) throws Exception {
    PinotSegmentRecordReader pinotSegmentRecordReader = new PinotSegmentRecordReader(columnarSegment);
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(pinotSegmentRecordReader.getSchema());
    config.setDataDir(_inputDirName);
    config.setInputFilePath(columnarSegment.getAbsolutePath());
    config.setFormat(FileFormat.PINOT);
    config.setEnableStarTreeIndex(true);
    config.setOutDir(_outputDirName);
    config.setStarTreeIndexSpecFile(_starTreeConfigFileName);
    config.setOverwrite(_overwrite);
    config.setSegmentName(columnarSegment.getName());
    SegmentIndexCreationDriver indexCreator = new SegmentIndexCreationDriverImpl();
    indexCreator.init(config);
    indexCreator.build();
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) PinotSegmentRecordReader(com.linkedin.pinot.core.data.readers.PinotSegmentRecordReader) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)

Aggregations

SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)57 SegmentIndexCreationDriver (com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver)34 File (java.io.File)31 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)19 GenericRow (com.linkedin.pinot.core.data.GenericRow)14 Test (org.testng.annotations.Test)13 Schema (com.linkedin.pinot.common.data.Schema)12 HashMap (java.util.HashMap)12 ArrayList (java.util.ArrayList)10 RecordReader (com.linkedin.pinot.core.data.readers.RecordReader)9 BeforeClass (org.testng.annotations.BeforeClass)8 ColumnMetadataTest (com.linkedin.pinot.core.segment.index.ColumnMetadataTest)7 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)6 IndexSegment (com.linkedin.pinot.core.indexsegment.IndexSegment)6 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)5 URL (java.net.URL)5 Random (java.util.Random)4 AvroRecordReader (com.linkedin.pinot.core.data.readers.AvroRecordReader)3 TestRecordReader (com.linkedin.pinot.core.data.readers.TestRecordReader)3 JSONObject (org.json.JSONObject)3