Search in sources :

Example 26 with GenericRow

use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.

the class TestOffheapStarTreeBuilder method testRandom.

@Test
public void testRandom() throws Exception {
    int ROWS = 100;
    int numDimensions = 6;
    int numMetrics = 6;
    StarTreeBuilderConfig builderConfig = new StarTreeBuilderConfig();
    Schema schema = new Schema();
    builderConfig.dimensionsSplitOrder = new ArrayList<>();
    for (int i = 0; i < numDimensions; i++) {
        String dimName = "d" + (i + 1);
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(dimName, DataType.INT, true);
        schema.addField(dimensionFieldSpec);
        builderConfig.dimensionsSplitOrder.add(dimName);
    }
    schema.setTimeFieldSpec(new TimeFieldSpec("daysSinceEpoch", DataType.INT, TimeUnit.DAYS));
    for (int i = 0; i < numMetrics; i++) {
        String metricName = "n" + (i + 1);
        MetricFieldSpec metricFieldSpec = new MetricFieldSpec(metricName, DataType.INT);
        schema.addField(metricFieldSpec);
    }
    builderConfig.maxLeafRecords = 10;
    builderConfig.schema = schema;
    builderConfig.outDir = new File("/tmp/startree");
    OffHeapStarTreeBuilder builder = new OffHeapStarTreeBuilder();
    builder.init(builderConfig);
    Random r = new Random();
    HashMap<String, Object> map = new HashMap<>();
    for (int row = 0; row < ROWS; row++) {
        for (int i = 0; i < numDimensions; i++) {
            String dimName = schema.getDimensionFieldSpecs().get(i).getName();
            map.put(dimName, dimName + "-v" + r.nextInt((numDimensions - i + 2)));
        }
        //time
        map.put("daysSinceEpoch", r.nextInt(1000));
        for (int i = 0; i < numMetrics; i++) {
            String metName = schema.getMetricFieldSpecs().get(i).getName();
            map.put(metName, r.nextInt((numDimensions - i + 2)));
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        builder.append(genericRow);
    }
    builder.build();
    FileUtils.deleteDirectory(builderConfig.outDir);
}
Also used : HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) GenericRow(com.linkedin.pinot.core.data.GenericRow) Random(java.util.Random) File(java.io.File) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Test(org.testng.annotations.Test)

Example 27 with GenericRow

use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.

the class TestOffheapStarTreeBuilder method testSimpleCore.

private void testSimpleCore(int numDimensions, int numMetrics, int numSkipMaterializationDimensions) throws Exception {
    int ROWS = (int) MathUtils.factorial(numDimensions);
    StarTreeBuilderConfig builderConfig = new StarTreeBuilderConfig();
    Schema schema = new Schema();
    builderConfig.dimensionsSplitOrder = new ArrayList<>();
    builderConfig.setSkipMaterializationForDimensions(new HashSet<String>());
    Set<String> skipMaterializationForDimensions = builderConfig.getSkipMaterializationForDimensions();
    for (int i = 0; i < numDimensions; i++) {
        String dimName = "d" + (i + 1);
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(dimName, DataType.STRING, true);
        schema.addField(dimensionFieldSpec);
        if (i < (numDimensions - numSkipMaterializationDimensions)) {
            builderConfig.dimensionsSplitOrder.add(dimName);
        } else {
            builderConfig.getSkipMaterializationForDimensions().add(dimName);
        }
    }
    schema.setTimeFieldSpec(new TimeFieldSpec("daysSinceEpoch", DataType.INT, TimeUnit.DAYS));
    for (int i = 0; i < numMetrics; i++) {
        String metricName = "m" + (i + 1);
        MetricFieldSpec metricFieldSpec = new MetricFieldSpec(metricName, DataType.INT);
        schema.addField(metricFieldSpec);
    }
    builderConfig.maxLeafRecords = 10;
    builderConfig.schema = schema;
    builderConfig.outDir = new File("/tmp/startree");
    OffHeapStarTreeBuilder builder = new OffHeapStarTreeBuilder();
    builder.init(builderConfig);
    HashMap<String, Object> map = new HashMap<>();
    for (int row = 0; row < ROWS; row++) {
        for (int i = 0; i < numDimensions; i++) {
            String dimName = schema.getDimensionFieldSpecs().get(i).getName();
            map.put(dimName, dimName + "-v" + row % (numDimensions - i));
        }
        //time
        map.put("daysSinceEpoch", 1);
        for (int i = 0; i < numMetrics; i++) {
            String metName = schema.getMetricFieldSpecs().get(i).getName();
            map.put(metName, 1);
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        builder.append(genericRow);
    }
    builder.build();
    int totalDocs = builder.getTotalRawDocumentCount() + builder.getTotalAggregateDocumentCount();
    Iterator<GenericRow> iterator = builder.iterator(0, totalDocs);
    while (iterator.hasNext()) {
        GenericRow row = iterator.next();
    //      System.out.println(row);
    }
    iterator = builder.iterator(builder.getTotalRawDocumentCount(), totalDocs);
    while (iterator.hasNext()) {
        GenericRow row = iterator.next();
        for (String skipDimension : skipMaterializationForDimensions) {
            String rowValue = (String) row.getValue(skipDimension);
            assert (rowValue.equals("ALL"));
        }
    }
    FileUtils.deleteDirectory(builderConfig.outDir);
}
Also used : HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) GenericRow(com.linkedin.pinot.core.data.GenericRow) File(java.io.File) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 28 with GenericRow

use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.

the class RealtimeSegmentImplTest method testDropInvalidRows.

@Test
public void testDropInvalidRows() throws Exception {
    Schema schema = new Schema.SchemaBuilder().setSchemaName("potato").addSingleValueDimension("dimension", FieldSpec.DataType.STRING).addMetric("metric", FieldSpec.DataType.LONG).addTime("time", TimeUnit.SECONDS, FieldSpec.DataType.LONG).build();
    RealtimeSegmentImpl realtimeSegment = createRealtimeSegmentImpl(schema, 100, "noTable", "noSegment", schema.getSchemaName(), new ServerMetrics(new MetricsRegistry()));
    // Segment should be empty
    Assert.assertEquals(realtimeSegment.getRawDocumentCount(), 0);
    Map<String, Object> genericRowContents = new HashMap<>();
    genericRowContents.put("dimension", "potato");
    genericRowContents.put("metric", 1234L);
    genericRowContents.put("time", 4567L);
    GenericRow row = new GenericRow();
    row.init(genericRowContents);
    // Add a valid row
    boolean notFull = realtimeSegment.index(row);
    Assert.assertEquals(notFull, true);
    Assert.assertEquals(realtimeSegment.getRawDocumentCount(), 1);
    // Add an invalid row
    genericRowContents.put("metric", null);
    notFull = realtimeSegment.index(row);
    Assert.assertEquals(notFull, true);
    Assert.assertEquals(realtimeSegment.getRawDocumentCount(), 1);
    // Add another valid row
    genericRowContents.put("metric", 2222L);
    notFull = realtimeSegment.index(row);
    Assert.assertEquals(notFull, true);
    Assert.assertEquals(realtimeSegment.getRawDocumentCount(), 2);
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) MetricsRegistry(com.yammer.metrics.core.MetricsRegistry) HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) ServerMetrics(com.linkedin.pinot.common.metrics.ServerMetrics) RealtimeSegmentImpl(com.linkedin.pinot.core.realtime.impl.RealtimeSegmentImpl) Test(org.testng.annotations.Test)

Example 29 with GenericRow

use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.

the class FilterTreeOptimizationTest method buildSegment.

/**
   * Helper method to build a segment.
   *
   * @param segmentDirName Name of segment directory
   * @param segmentName Name of segment
   * @param schema Schema for segment
   * @return Schema built for the segment
   * @throws Exception
   */
private RecordReader buildSegment(String segmentDirName, String segmentName, Schema schema) throws Exception {
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setOutDir(segmentDirName);
    config.setFormat(FileFormat.AVRO);
    config.setTableName(TABLE_NAME);
    config.setSegmentName(segmentName);
    final List<GenericRow> data = new ArrayList<>();
    for (int row = 0; row < NUM_ROWS; row++) {
        HashMap<String, Object> map = new HashMap<>();
        for (String dimensionName : DIMENSIONS) {
            map.put(dimensionName, dimensionName + '_' + (row % MAX_DIMENSION_VALUES));
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        data.add(genericRow);
    }
    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    RecordReader reader = new TestUtils.GenericRowRecordReader(schema, data);
    driver.init(config, reader);
    driver.build();
    LOGGER.info("Built segment {} at {}", segmentName, segmentDirName);
    return reader;
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) HashMap(java.util.HashMap) RecordReader(com.linkedin.pinot.core.data.readers.RecordReader) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) ArrayList(java.util.ArrayList) SegmentIndexCreationDriverImpl(com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)

Example 30 with GenericRow

use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.

the class AvroDataPublisherTest method TestReadPartialAvro.

@Test
public void TestReadPartialAvro() throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));
    final String jsonPath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(JSON_DATA));
    final List<String> projectedColumns = new ArrayList<String>();
    projectedColumns.add("column3");
    projectedColumns.add("column2");
    Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("column3", DataType.STRING).addSingleValueDimension("column2", DataType.STRING).build();
    final SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setFormat(FileFormat.AVRO);
    config.setInputFilePath(filePath);
    config.setSegmentVersion(SegmentVersion.v1);
    final AvroRecordReader avroDataPublisher = new AvroRecordReader(FieldExtractorFactory.getPlainFieldExtractor(config), config.getInputFilePath());
    avroDataPublisher.next();
    int cnt = 0;
    for (final String line : FileUtils.readLines(new File(jsonPath))) {
        final JSONObject obj = new JSONObject(line);
        if (avroDataPublisher.hasNext()) {
            final GenericRow recordRow = avroDataPublisher.next();
            // System.out.println(recordRow);
            Assert.assertEquals(recordRow.getFieldNames().length, 2);
            for (final String column : recordRow.getFieldNames()) {
                final String valueFromJson = obj.get(column).toString();
                final String valueFromAvro = recordRow.getValue(column).toString();
                if (cnt > 1) {
                    Assert.assertEquals(valueFromAvro, valueFromJson);
                }
            }
        }
        cnt++;
    }
    Assert.assertEquals(10001, cnt);
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) JSONObject(org.json.JSONObject) AvroRecordReader(com.linkedin.pinot.core.data.readers.AvroRecordReader) Schema(com.linkedin.pinot.common.data.Schema) ArrayList(java.util.ArrayList) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) File(java.io.File) Test(org.testng.annotations.Test)

Aggregations

GenericRow (com.linkedin.pinot.core.data.GenericRow)45 HashMap (java.util.HashMap)24 File (java.io.File)17 Test (org.testng.annotations.Test)15 Schema (com.linkedin.pinot.common.data.Schema)14 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)14 ArrayList (java.util.ArrayList)13 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)11 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)8 RecordReader (com.linkedin.pinot.core.data.readers.RecordReader)8 Random (java.util.Random)6 JSONObject (org.json.JSONObject)5 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)4 ServerMetrics (com.linkedin.pinot.common.metrics.ServerMetrics)4 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)3 AvroRecordReader (com.linkedin.pinot.core.data.readers.AvroRecordReader)3 PinotSegmentRecordReader (com.linkedin.pinot.core.data.readers.PinotSegmentRecordReader)3 TestRecordReader (com.linkedin.pinot.core.data.readers.TestRecordReader)3 MetricsRegistry (com.yammer.metrics.core.MetricsRegistry)3 BeforeClass (org.testng.annotations.BeforeClass)3