Search in sources :

Example 6 with GenericRow

use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.

the class PlainFieldExtractorTest method timeSpecStringTest.

@Test
public void timeSpecStringTest() {
    Schema schema = new Schema.SchemaBuilder().setSchemaName("testSchema").addTime("timeString", TimeUnit.DAYS, DataType.STRING).build();
    PlainFieldExtractor plainFieldExtractor = new PlainFieldExtractor(schema);
    GenericRow row = new GenericRow();
    Map<String, Object> fieldMap = new HashMap<>();
    fieldMap.put("timeString", "2016-01-01");
    row.init(fieldMap);
    plainFieldExtractor.transform(row);
    Assert.assertTrue(row.getValue("timeString") instanceof String);
    Assert.assertEquals(row.getValue("timeString"), "2016-01-01");
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) Test(org.testng.annotations.Test)

Example 7 with GenericRow

use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.

the class PlainFieldExtractorTest method nullValueTest.

@Test
public void nullValueTest() {
    GenericRow row = new GenericRow();
    Map<String, Object> fieldMap = new HashMap<>();
    for (int i = 0; i < NUMBER_OF_TYPES; i++) {
        PlainFieldExtractor plainFieldExtractor = new PlainFieldExtractor(ALL_TYPE_SCHEMAS[i]);
        row.init(fieldMap);
        plainFieldExtractor.transform(row);
        Assert.assertEquals(plainFieldExtractor.getTotalErrors(), 0);
        Assert.assertEquals(plainFieldExtractor.getTotalNulls(), 1);
        Assert.assertEquals(plainFieldExtractor.getTotalConversions(), 0);
    }
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) HashMap(java.util.HashMap) Test(org.testng.annotations.Test)

Example 8 with GenericRow

use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.

the class PlainFieldExtractorTest method classWithToStringTest.

@Test
public void classWithToStringTest() {
    GenericRow row = new GenericRow();
    Map<String, Object> fieldMap = new HashMap<>();
    for (int i = 0; i < NUMBER_OF_TYPES; i++) {
        PlainFieldExtractor plainFieldExtractor = new PlainFieldExtractor(ALL_TYPE_SCHEMAS[i]);
        fieldMap.put(TEST_COLUMN, new AnyClassWithToString());
        row.init(fieldMap);
        plainFieldExtractor.transform(row);
        fieldMap.put(TEST_COLUMN, new Object[] { new AnyClassWithToString(), new AnyClassWithToString() });
        row.init(fieldMap);
        plainFieldExtractor.transform(row);
        // AnyClassWithToString only works with String (array).
        if (i >= INDEX_OF_STRING_TYPE) {
            Assert.assertEquals(plainFieldExtractor.getTotalErrors(), 0);
            Assert.assertEquals(plainFieldExtractor.getTotalNulls(), 0);
            Assert.assertEquals(plainFieldExtractor.getTotalConversions(), 2);
        } else {
            Assert.assertEquals(plainFieldExtractor.getTotalErrors(), 2);
            Assert.assertEquals(plainFieldExtractor.getTotalNulls(), 0);
            Assert.assertEquals(plainFieldExtractor.getTotalConversions(), 0);
        }
    }
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) HashMap(java.util.HashMap) Test(org.testng.annotations.Test)

Example 9 with GenericRow

use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.

the class RealtimeSegmentTest method before.

@BeforeClass
public static void before() throws Exception {
    filePath = RealtimeFileBasedReaderTest.class.getClassLoader().getResource(AVRO_DATA).getFile();
    fieldTypeMap = new HashMap<String, FieldSpec.FieldType>();
    fieldTypeMap.put("column1", FieldType.DIMENSION);
    fieldTypeMap.put("column2", FieldType.DIMENSION);
    fieldTypeMap.put("column3", FieldType.DIMENSION);
    fieldTypeMap.put("column4", FieldType.DIMENSION);
    fieldTypeMap.put("column5", FieldType.DIMENSION);
    fieldTypeMap.put("column6", FieldType.DIMENSION);
    fieldTypeMap.put("column7", FieldType.DIMENSION);
    fieldTypeMap.put("column8", FieldType.DIMENSION);
    fieldTypeMap.put("column9", FieldType.DIMENSION);
    fieldTypeMap.put("column10", FieldType.DIMENSION);
    fieldTypeMap.put("weeksSinceEpochSunday", FieldType.DIMENSION);
    fieldTypeMap.put("daysSinceEpoch", FieldType.DIMENSION);
    fieldTypeMap.put("column13", FieldType.TIME);
    fieldTypeMap.put("count", FieldType.METRIC);
    schema = SegmentTestUtils.extractSchemaFromAvro(new File(filePath), fieldTypeMap, TimeUnit.MINUTES);
    StreamProviderConfig config = new FileBasedStreamProviderConfig(FileFormat.AVRO, filePath, schema);
    //    System.out.println(config);
    StreamProvider provider = new FileBasedStreamProviderImpl();
    final String tableName = RealtimeSegmentTest.class.getSimpleName() + ".noTable";
    provider.init(config, tableName, new ServerMetrics(new MetricsRegistry()));
    List<String> invertedIdxCols = new ArrayList<>();
    invertedIdxCols.add("count");
    segmentWithInvIdx = new RealtimeSegmentImpl(schema, 100000, tableName, "noSegment", AVRO_DATA, new ServerMetrics(new MetricsRegistry()), invertedIdxCols, 2);
    segmentWithoutInvIdx = RealtimeSegmentImplTest.createRealtimeSegmentImpl(schema, 100000, tableName, "noSegment", AVRO_DATA, new ServerMetrics(new MetricsRegistry()));
    GenericRow row = provider.next(new GenericRow());
    while (row != null) {
        segmentWithInvIdx.index(row);
        segmentWithoutInvIdx.index(row);
        row = GenericRow.createOrReuseRow(row);
        row = provider.next(row);
    }
    provider.shutdown();
}
Also used : FileBasedStreamProviderImpl(com.linkedin.pinot.core.realtime.impl.FileBasedStreamProviderImpl) MetricsRegistry(com.yammer.metrics.core.MetricsRegistry) FileBasedStreamProviderConfig(com.linkedin.pinot.core.realtime.impl.FileBasedStreamProviderConfig) ArrayList(java.util.ArrayList) RealtimeSegmentImpl(com.linkedin.pinot.core.realtime.impl.RealtimeSegmentImpl) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType) GenericRow(com.linkedin.pinot.core.data.GenericRow) ServerMetrics(com.linkedin.pinot.common.metrics.ServerMetrics) File(java.io.File) FileBasedStreamProviderConfig(com.linkedin.pinot.core.realtime.impl.FileBasedStreamProviderConfig) BeforeClass(org.testng.annotations.BeforeClass)

Example 10 with GenericRow

use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.

the class OffheapStarTreeBuilderWithHllFieldTest method testSimpleCore.

private void testSimpleCore(int numDimensions, int numMetrics, int numSkipMaterializationDimensions, int[] memberIdColumnValues, long preciseCardinality) throws Exception {
    StarTreeBuilderConfig builderConfig = null;
    try {
        builderConfig = new StarTreeBuilderConfig();
        Schema schema = new Schema();
        builderConfig.dimensionsSplitOrder = new ArrayList<>();
        builderConfig.setSkipMaterializationForDimensions(new HashSet<String>());
        Set<String> skipMaterializationForDimensions = builderConfig.getSkipMaterializationForDimensions();
        // add member id dimension spec
        String dimName = memberIdFieldName;
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(dimName, DataType.INT, true);
        schema.addField(dimensionFieldSpec);
        // add other dimension specs
        for (int i = 1; i < numDimensions; i++) {
            dimName = "d" + (i + 1);
            dimensionFieldSpec = new DimensionFieldSpec(dimName, DataType.STRING, true);
            schema.addField(dimensionFieldSpec);
            if (i < (numDimensions - numSkipMaterializationDimensions)) {
                builderConfig.dimensionsSplitOrder.add(dimName);
            } else {
                builderConfig.getSkipMaterializationForDimensions().add(dimName);
            }
        }
        schema.setTimeFieldSpec(new TimeFieldSpec("daysSinceEpoch", DataType.INT, TimeUnit.DAYS));
        // add other metric specs
        for (int i = 0; i < numMetrics - 1; i++) {
            String metricName = "m" + (i + 1);
            MetricFieldSpec metricFieldSpec = new MetricFieldSpec(metricName, DataType.INT);
            schema.addField(metricFieldSpec);
        }
        // add hll metric
        String hllMetricName = memberIdFieldName + hllDeriveFieldSuffix;
        MetricFieldSpec hllDerivedFieldSpec = new MetricFieldSpec(hllMetricName, FieldSpec.DataType.STRING, HllUtil.getHllFieldSizeFromLog2m(log2m), MetricFieldSpec.DerivedMetricType.HLL);
        schema.addField(hllDerivedFieldSpec);
        //
        builderConfig.maxLeafRecords = 10;
        builderConfig.schema = schema;
        builderConfig.setOutDir(new File("/tmp/startree"));
        //
        OffHeapStarTreeBuilder builder = new OffHeapStarTreeBuilder();
        builder.init(builderConfig);
        // fill values
        HashMap<String, Object> map = new HashMap<>();
        for (int row = 0; row < memberIdColumnValues.length; row++) {
            // add member id column
            dimName = memberIdFieldName;
            map.put(dimName, memberIdColumnValues[row]);
            // add other dimensions
            for (int i = 1; i < numDimensions; i++) {
                dimName = schema.getDimensionFieldSpecs().get(i).getName();
                map.put(dimName, dimName + "-v" + row % (numDimensions - i));
            }
            // add time column
            map.put("daysSinceEpoch", 1);
            // add other metrics
            for (int i = 0; i < numMetrics - 1; i++) {
                String metName = schema.getMetricFieldSpecs().get(i).getName();
                map.put(metName, 1);
            }
            // add hll column value
            map.put(hllMetricName, HllUtil.singleValueHllAsString(log2m, memberIdColumnValues[row]));
            //
            GenericRow genericRow = new GenericRow();
            genericRow.init(map);
            builder.append(genericRow);
        }
        builder.build();
        int totalDocs = builder.getTotalRawDocumentCount() + builder.getTotalAggregateDocumentCount();
        Iterator<GenericRow> iterator = builder.iterator(0, totalDocs);
        while (iterator.hasNext()) {
            GenericRow row = iterator.next();
            LOGGER.info(HllUtil.inspectGenericRow(row, hllDeriveFieldSuffix));
        }
        iterator = builder.iterator(builder.getTotalRawDocumentCount(), totalDocs);
        GenericRow lastRow = null;
        while (iterator.hasNext()) {
            GenericRow row = iterator.next();
            for (String skipDimension : skipMaterializationForDimensions) {
                String rowValue = (String) row.getValue(skipDimension);
                assert (rowValue.equals("ALL"));
            }
            lastRow = row;
        }
        assertApproximation(HllUtil.convertStringToHll((String) lastRow.getValue(hllMetricName)).cardinality(), preciseCardinality, 0.1);
    } finally {
        if (builderConfig != null) {
            FileUtils.deleteDirectory(builderConfig.getOutDir());
        }
    }
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) File(java.io.File) StarTreeBuilderConfig(com.linkedin.pinot.core.startree.StarTreeBuilderConfig) OffHeapStarTreeBuilder(com.linkedin.pinot.core.startree.OffHeapStarTreeBuilder)

Aggregations

GenericRow (com.linkedin.pinot.core.data.GenericRow)45 HashMap (java.util.HashMap)24 File (java.io.File)17 Test (org.testng.annotations.Test)15 Schema (com.linkedin.pinot.common.data.Schema)14 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)14 ArrayList (java.util.ArrayList)13 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)11 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)8 RecordReader (com.linkedin.pinot.core.data.readers.RecordReader)8 Random (java.util.Random)6 JSONObject (org.json.JSONObject)5 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)4 ServerMetrics (com.linkedin.pinot.common.metrics.ServerMetrics)4 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)3 AvroRecordReader (com.linkedin.pinot.core.data.readers.AvroRecordReader)3 PinotSegmentRecordReader (com.linkedin.pinot.core.data.readers.PinotSegmentRecordReader)3 TestRecordReader (com.linkedin.pinot.core.data.readers.TestRecordReader)3 MetricsRegistry (com.yammer.metrics.core.MetricsRegistry)3 BeforeClass (org.testng.annotations.BeforeClass)3