use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.
the class TestOffheapStarTreeBuilder method testRandom.
@Test
public void testRandom() throws Exception {
int ROWS = 100;
int numDimensions = 6;
int numMetrics = 6;
StarTreeBuilderConfig builderConfig = new StarTreeBuilderConfig();
Schema schema = new Schema();
builderConfig.dimensionsSplitOrder = new ArrayList<>();
for (int i = 0; i < numDimensions; i++) {
String dimName = "d" + (i + 1);
DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(dimName, DataType.INT, true);
schema.addField(dimensionFieldSpec);
builderConfig.dimensionsSplitOrder.add(dimName);
}
schema.setTimeFieldSpec(new TimeFieldSpec("daysSinceEpoch", DataType.INT, TimeUnit.DAYS));
for (int i = 0; i < numMetrics; i++) {
String metricName = "n" + (i + 1);
MetricFieldSpec metricFieldSpec = new MetricFieldSpec(metricName, DataType.INT);
schema.addField(metricFieldSpec);
}
builderConfig.maxLeafRecords = 10;
builderConfig.schema = schema;
builderConfig.outDir = new File("/tmp/startree");
OffHeapStarTreeBuilder builder = new OffHeapStarTreeBuilder();
builder.init(builderConfig);
Random r = new Random();
HashMap<String, Object> map = new HashMap<>();
for (int row = 0; row < ROWS; row++) {
for (int i = 0; i < numDimensions; i++) {
String dimName = schema.getDimensionFieldSpecs().get(i).getName();
map.put(dimName, dimName + "-v" + r.nextInt((numDimensions - i + 2)));
}
//time
map.put("daysSinceEpoch", r.nextInt(1000));
for (int i = 0; i < numMetrics; i++) {
String metName = schema.getMetricFieldSpecs().get(i).getName();
map.put(metName, r.nextInt((numDimensions - i + 2)));
}
GenericRow genericRow = new GenericRow();
genericRow.init(map);
builder.append(genericRow);
}
builder.build();
FileUtils.deleteDirectory(builderConfig.outDir);
}
use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.
the class TestOffheapStarTreeBuilder method testSimpleCore.
private void testSimpleCore(int numDimensions, int numMetrics, int numSkipMaterializationDimensions) throws Exception {
int ROWS = (int) MathUtils.factorial(numDimensions);
StarTreeBuilderConfig builderConfig = new StarTreeBuilderConfig();
Schema schema = new Schema();
builderConfig.dimensionsSplitOrder = new ArrayList<>();
builderConfig.setSkipMaterializationForDimensions(new HashSet<String>());
Set<String> skipMaterializationForDimensions = builderConfig.getSkipMaterializationForDimensions();
for (int i = 0; i < numDimensions; i++) {
String dimName = "d" + (i + 1);
DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(dimName, DataType.STRING, true);
schema.addField(dimensionFieldSpec);
if (i < (numDimensions - numSkipMaterializationDimensions)) {
builderConfig.dimensionsSplitOrder.add(dimName);
} else {
builderConfig.getSkipMaterializationForDimensions().add(dimName);
}
}
schema.setTimeFieldSpec(new TimeFieldSpec("daysSinceEpoch", DataType.INT, TimeUnit.DAYS));
for (int i = 0; i < numMetrics; i++) {
String metricName = "m" + (i + 1);
MetricFieldSpec metricFieldSpec = new MetricFieldSpec(metricName, DataType.INT);
schema.addField(metricFieldSpec);
}
builderConfig.maxLeafRecords = 10;
builderConfig.schema = schema;
builderConfig.outDir = new File("/tmp/startree");
OffHeapStarTreeBuilder builder = new OffHeapStarTreeBuilder();
builder.init(builderConfig);
HashMap<String, Object> map = new HashMap<>();
for (int row = 0; row < ROWS; row++) {
for (int i = 0; i < numDimensions; i++) {
String dimName = schema.getDimensionFieldSpecs().get(i).getName();
map.put(dimName, dimName + "-v" + row % (numDimensions - i));
}
//time
map.put("daysSinceEpoch", 1);
for (int i = 0; i < numMetrics; i++) {
String metName = schema.getMetricFieldSpecs().get(i).getName();
map.put(metName, 1);
}
GenericRow genericRow = new GenericRow();
genericRow.init(map);
builder.append(genericRow);
}
builder.build();
int totalDocs = builder.getTotalRawDocumentCount() + builder.getTotalAggregateDocumentCount();
Iterator<GenericRow> iterator = builder.iterator(0, totalDocs);
while (iterator.hasNext()) {
GenericRow row = iterator.next();
// System.out.println(row);
}
iterator = builder.iterator(builder.getTotalRawDocumentCount(), totalDocs);
while (iterator.hasNext()) {
GenericRow row = iterator.next();
for (String skipDimension : skipMaterializationForDimensions) {
String rowValue = (String) row.getValue(skipDimension);
assert (rowValue.equals("ALL"));
}
}
FileUtils.deleteDirectory(builderConfig.outDir);
}
use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.
the class RealtimeSegmentImplTest method testDropInvalidRows.
@Test
public void testDropInvalidRows() throws Exception {
Schema schema = new Schema.SchemaBuilder().setSchemaName("potato").addSingleValueDimension("dimension", FieldSpec.DataType.STRING).addMetric("metric", FieldSpec.DataType.LONG).addTime("time", TimeUnit.SECONDS, FieldSpec.DataType.LONG).build();
RealtimeSegmentImpl realtimeSegment = createRealtimeSegmentImpl(schema, 100, "noTable", "noSegment", schema.getSchemaName(), new ServerMetrics(new MetricsRegistry()));
// Segment should be empty
Assert.assertEquals(realtimeSegment.getRawDocumentCount(), 0);
Map<String, Object> genericRowContents = new HashMap<>();
genericRowContents.put("dimension", "potato");
genericRowContents.put("metric", 1234L);
genericRowContents.put("time", 4567L);
GenericRow row = new GenericRow();
row.init(genericRowContents);
// Add a valid row
boolean notFull = realtimeSegment.index(row);
Assert.assertEquals(notFull, true);
Assert.assertEquals(realtimeSegment.getRawDocumentCount(), 1);
// Add an invalid row
genericRowContents.put("metric", null);
notFull = realtimeSegment.index(row);
Assert.assertEquals(notFull, true);
Assert.assertEquals(realtimeSegment.getRawDocumentCount(), 1);
// Add another valid row
genericRowContents.put("metric", 2222L);
notFull = realtimeSegment.index(row);
Assert.assertEquals(notFull, true);
Assert.assertEquals(realtimeSegment.getRawDocumentCount(), 2);
}
use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.
the class FilterTreeOptimizationTest method buildSegment.
/**
* Helper method to build a segment.
*
* @param segmentDirName Name of segment directory
* @param segmentName Name of segment
* @param schema Schema for segment
* @return Schema built for the segment
* @throws Exception
*/
private RecordReader buildSegment(String segmentDirName, String segmentName, Schema schema) throws Exception {
SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
config.setOutDir(segmentDirName);
config.setFormat(FileFormat.AVRO);
config.setTableName(TABLE_NAME);
config.setSegmentName(segmentName);
final List<GenericRow> data = new ArrayList<>();
for (int row = 0; row < NUM_ROWS; row++) {
HashMap<String, Object> map = new HashMap<>();
for (String dimensionName : DIMENSIONS) {
map.put(dimensionName, dimensionName + '_' + (row % MAX_DIMENSION_VALUES));
}
GenericRow genericRow = new GenericRow();
genericRow.init(map);
data.add(genericRow);
}
SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
RecordReader reader = new TestUtils.GenericRowRecordReader(schema, data);
driver.init(config, reader);
driver.build();
LOGGER.info("Built segment {} at {}", segmentName, segmentDirName);
return reader;
}
use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.
the class AvroDataPublisherTest method TestReadPartialAvro.
@Test
public void TestReadPartialAvro() throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));
final String jsonPath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(JSON_DATA));
final List<String> projectedColumns = new ArrayList<String>();
projectedColumns.add("column3");
projectedColumns.add("column2");
Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("column3", DataType.STRING).addSingleValueDimension("column2", DataType.STRING).build();
final SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
config.setFormat(FileFormat.AVRO);
config.setInputFilePath(filePath);
config.setSegmentVersion(SegmentVersion.v1);
final AvroRecordReader avroDataPublisher = new AvroRecordReader(FieldExtractorFactory.getPlainFieldExtractor(config), config.getInputFilePath());
avroDataPublisher.next();
int cnt = 0;
for (final String line : FileUtils.readLines(new File(jsonPath))) {
final JSONObject obj = new JSONObject(line);
if (avroDataPublisher.hasNext()) {
final GenericRow recordRow = avroDataPublisher.next();
// System.out.println(recordRow);
Assert.assertEquals(recordRow.getFieldNames().length, 2);
for (final String column : recordRow.getFieldNames()) {
final String valueFromJson = obj.get(column).toString();
final String valueFromAvro = recordRow.getValue(column).toString();
if (cnt > 1) {
Assert.assertEquals(valueFromAvro, valueFromJson);
}
}
}
cnt++;
}
Assert.assertEquals(10001, cnt);
}
Aggregations