Search in sources :

Example 1 with DataGeneratorSpec

use of com.linkedin.pinot.tools.data.generator.DataGeneratorSpec in project pinot by linkedin.

the class FileBasedSentineTest method setup.

@BeforeClass
public void setup() throws Exception {
    url = new URL("http://localhost:" + FileBasedServerBrokerStarters.BROKER_CLIENT_PORT + "/query");
    // lets generate data
    final String[] columns = { "dimention1", "dimention2", "dimention3", "dimention4", "metric1", "daysSinceEpoch" };
    final Map<String, DataType> dataTypes = new HashMap<String, FieldSpec.DataType>();
    final Map<String, FieldType> fieldTypes = new HashMap<String, FieldType>();
    final Map<String, TimeUnit> timeUnits = new HashMap<String, TimeUnit>();
    final Map<String, Integer> cardinality = new HashMap<String, Integer>();
    // Crate empty range map as the signature of DataGeneratorSpec has changed, and this test does not
    // use metric/time as fieldType.
    final Map<String, IntRange> range = new HashMap<String, IntRange>();
    for (final String col : columns) {
        if (col.equals("dimention1")) {
            dataTypes.put(col, DataType.STRING);
            cardinality.put(col, 1000);
        } else {
            dataTypes.put(col, DataType.INT);
            cardinality.put(col, 1000);
        }
        fieldTypes.put(col, FieldType.DIMENSION);
    }
    if (avroDataDir.exists()) {
        FileUtils.deleteDirectory(avroDataDir);
    }
    final DataGeneratorSpec spec = new DataGeneratorSpec(Arrays.asList(columns), cardinality, range, dataTypes, fieldTypes, timeUnits, FileFormat.AVRO, avroDataDir.getAbsolutePath(), true);
    generator = new DataGenerator();
    generator.init(spec);
    generator.generate(100000L, 2);
    // lets make segments now
    final File bootstrapDir = new File(FileBasedServerBrokerStarters.SERVER_BOOTSTRAP_DIR);
    if (bootstrapDir.exists()) {
        FileUtils.deleteDirectory(bootstrapDir);
    }
    bootstrapDir.mkdir();
    int counter = 0;
    for (final File avro : avroDataDir.listFiles()) {
        for (final String table : FileBasedServerBrokerStarters.TABLE_NAMES) {
            final SegmentGeneratorConfig genConfig = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(avro, new File(bootstrapDir, "segment-" + counter), "daysSinceEpoch", TimeUnit.DAYS, table);
            final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
            driver.init(genConfig);
            driver.build();
            counter++;
        }
    }
    // lets start the server and the broker now
    starter = new FileBasedServerBrokerStarters();
    starter.startAll();
    // pick some values from here if you need to use it for running filter queries
    final JSONObject selectionRequestResponse = postQuery("select * from 'table1' limit 100", "http://localhost:" + FileBasedServerBrokerStarters.BROKER_CLIENT_PORT);
//    System.out.println(selectionRequestResponse.toString(1));
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) HashMap(java.util.HashMap) IntRange(org.apache.commons.lang.math.IntRange) URL(java.net.URL) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType) JSONObject(org.json.JSONObject) DataGenerator(com.linkedin.pinot.tools.data.generator.DataGenerator) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) TimeUnit(java.util.concurrent.TimeUnit) File(java.io.File) DataGeneratorSpec(com.linkedin.pinot.tools.data.generator.DataGeneratorSpec) BeforeClass(org.testng.annotations.BeforeClass)

Example 2 with DataGeneratorSpec

use of com.linkedin.pinot.tools.data.generator.DataGeneratorSpec in project pinot by linkedin.

the class GenerateDataCommand method buildDataGeneratorSpec.

private DataGeneratorSpec buildDataGeneratorSpec(Schema schema, List<String> columns, HashMap<String, DataType> dataTypes, HashMap<String, FieldType> fieldTypes, HashMap<String, TimeUnit> timeUnits, HashMap<String, Integer> cardinality, HashMap<String, IntRange> range) {
    for (final FieldSpec fs : schema.getAllFieldSpecs()) {
        String col = fs.getName();
        columns.add(col);
        dataTypes.put(col, fs.getDataType());
        fieldTypes.put(col, fs.getFieldType());
        switch(fs.getFieldType()) {
            case DIMENSION:
                if (cardinality.get(col) == null) {
                    cardinality.put(col, 1000);
                }
                break;
            case METRIC:
                if (!range.containsKey(col)) {
                    range.put(col, new IntRange(1, 1000));
                }
                break;
            case TIME:
                if (!range.containsKey(col)) {
                    range.put(col, new IntRange(1, 1000));
                }
                TimeFieldSpec tfs = (TimeFieldSpec) fs;
                timeUnits.put(col, tfs.getIncomingGranularitySpec().getTimeType());
                break;
            default:
                throw new RuntimeException("Invalid field type.");
        }
    }
    return new DataGeneratorSpec(columns, cardinality, range, dataTypes, fieldTypes, timeUnits, FileFormat.AVRO, _outDir, _overwrite);
}
Also used : TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) IntRange(org.apache.commons.lang.math.IntRange) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DataGeneratorSpec(com.linkedin.pinot.tools.data.generator.DataGeneratorSpec)

Example 3 with DataGeneratorSpec

use of com.linkedin.pinot.tools.data.generator.DataGeneratorSpec in project pinot by linkedin.

the class GenerateDataCommand method execute.

@Override
public boolean execute() throws Exception {
    LOGGER.info("Executing command: " + toString());
    if ((_numRecords < 0) || (_numFiles < 0)) {
        throw new RuntimeException("Cannot generate negative number of records/files.");
    }
    Schema schema = Schema.fromFile(new File(_schemaFile));
    List<String> columns = new LinkedList<String>();
    final HashMap<String, DataType> dataTypes = new HashMap<String, DataType>();
    final HashMap<String, FieldType> fieldTypes = new HashMap<String, FieldType>();
    final HashMap<String, TimeUnit> timeUnits = new HashMap<String, TimeUnit>();
    final HashMap<String, Integer> cardinality = new HashMap<String, Integer>();
    final HashMap<String, IntRange> range = new HashMap<String, IntRange>();
    buildCardinalityRangeMaps(_schemaAnnFile, cardinality, range);
    final DataGeneratorSpec spec = buildDataGeneratorSpec(schema, columns, dataTypes, fieldTypes, timeUnits, cardinality, range);
    final DataGenerator gen = new DataGenerator();
    gen.init(spec);
    gen.generate(_numRecords, _numFiles);
    return true;
}
Also used : HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) IntRange(org.apache.commons.lang.math.IntRange) LinkedList(java.util.LinkedList) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType) DataGenerator(com.linkedin.pinot.tools.data.generator.DataGenerator) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) TimeUnit(java.util.concurrent.TimeUnit) File(java.io.File) DataGeneratorSpec(com.linkedin.pinot.tools.data.generator.DataGeneratorSpec)

Aggregations

DataGeneratorSpec (com.linkedin.pinot.tools.data.generator.DataGeneratorSpec)3 IntRange (org.apache.commons.lang.math.IntRange)3 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)2 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)2 FieldType (com.linkedin.pinot.common.data.FieldSpec.FieldType)2 DataGenerator (com.linkedin.pinot.tools.data.generator.DataGenerator)2 File (java.io.File)2 HashMap (java.util.HashMap)2 TimeUnit (java.util.concurrent.TimeUnit)2 Schema (com.linkedin.pinot.common.data.Schema)1 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)1 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)1 SegmentIndexCreationDriver (com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver)1 URL (java.net.URL)1 LinkedList (java.util.LinkedList)1 JSONObject (org.json.JSONObject)1 BeforeClass (org.testng.annotations.BeforeClass)1