use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.
the class BitmapInvertedIndexTest method testBitMapInvertedIndex.
void testBitMapInvertedIndex(ReadMode readMode) throws Exception {
IndexLoadingConfigMetadata indexLoadingConfig = new IndexLoadingConfigMetadata(new PropertiesConfiguration());
indexLoadingConfig.initLoadingInvertedIndexColumnSet(invertedIndexColumns);
final IndexSegmentImpl mmapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(segmentDirectory, readMode, indexLoadingConfig);
// compare the loaded inverted index with the record in avro file
final DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>(new FileInputStream(new File(getClass().getClassLoader().getResource(AVRO_DATA).getFile())), new GenericDatumReader<GenericRecord>());
int docId = 0;
while (reader.hasNext()) {
final GenericRecord rec = reader.next();
for (final String column : ((SegmentMetadataImpl) mmapSegment.getSegmentMetadata()).getColumnMetadataMap().keySet()) {
Object entry = rec.get(column);
if (entry instanceof Utf8) {
entry = ((Utf8) entry).toString();
}
final int dicId = mmapSegment.getDictionaryFor(column).indexOf(entry);
// make sure that docId for dicId exist in the inverted index
Assert.assertTrue(mmapSegment.getInvertedIndexFor(column).getImmutable(dicId).contains(docId));
final int size = mmapSegment.getDictionaryFor(column).length();
for (int i = 0; i < size; ++i) {
// remove this for-loop for quick test
if (i == dicId) {
continue;
}
// make sure that docId for dicId does not exist in the inverted index
Assert.assertFalse(mmapSegment.getInvertedIndexFor(column).getImmutable(i).contains(docId));
}
}
++docId;
}
}
use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.
the class BlocksTest method before.
@BeforeClass
public static void before() throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(BlocksTest.class.getClassLoader().getResource(AVRO_DATA));
if (INDEX_DIR.exists()) {
FileUtils.deleteQuietly(INDEX_DIR);
}
// System.out.println(INDEX_DIR.getAbsolutePath());
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch", TimeUnit.DAYS, "test");
config.setTimeColumnName("daysSinceEpoch");
driver.init(config);
driver.build();
final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
final org.apache.avro.Schema avroSchema = avroReader.getSchema();
final String[] columns = new String[avroSchema.getFields().size()];
int i = 0;
for (final Field f : avroSchema.getFields()) {
columns[i] = f.name();
i++;
}
}
use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.
the class DictionariesTest method before.
@BeforeClass
public static void before() throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(DictionariesTest.class.getClassLoader().getResource(AVRO_DATA));
if (INDEX_DIR.exists()) {
FileUtils.deleteQuietly(INDEX_DIR);
}
final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "time_day", TimeUnit.DAYS, "test");
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
driver.init(config);
driver.build();
segmentDirectory = new File(INDEX_DIR, driver.getSegmentName());
final Schema schema = AvroUtils.extractSchemaFromAvro(new File(filePath));
final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
final org.apache.avro.Schema avroSchema = avroReader.getSchema();
final String[] columns = new String[avroSchema.getFields().size()];
int i = 0;
for (final Field f : avroSchema.getFields()) {
columns[i] = f.name();
i++;
}
uniqueEntries = new HashMap<String, Set<Object>>();
for (final String column : columns) {
uniqueEntries.put(column, new HashSet<Object>());
}
while (avroReader.hasNext()) {
final GenericRecord rec = avroReader.next();
for (final String column : columns) {
Object val = rec.get(column);
if (val instanceof Utf8) {
val = ((Utf8) val).toString();
}
uniqueEntries.get(column).add(getAppropriateType(schema.getFieldSpecFor(column).getDataType(), val));
}
}
}
use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.
the class SegmentTestUtils method extractSchemaFromAvroWithoutTime.
public static Schema extractSchemaFromAvroWithoutTime(File avroFile) throws FileNotFoundException, IOException {
DataFileStream<GenericRecord> dataStream = new DataFileStream<GenericRecord>(new FileInputStream(avroFile), new GenericDatumReader<GenericRecord>());
Schema schema = new Schema();
for (final Field field : dataStream.getSchema().getFields()) {
try {
getColumnType(field);
} catch (Exception e) {
LOGGER.warn("Caught exception while converting Avro field {} of type {}, field will not be in schema.", field.name(), field.schema().getType());
continue;
}
final String columnName = field.name();
final String pinotType = field.getProp("pinotType");
final FieldSpec fieldSpec;
if (pinotType != null && "METRIC".equals(pinotType)) {
fieldSpec = new MetricFieldSpec();
} else {
fieldSpec = new DimensionFieldSpec();
}
fieldSpec.setName(columnName);
fieldSpec.setDataType(getColumnType(dataStream.getSchema().getField(columnName)));
fieldSpec.setSingleValueField(isSingleValueField(dataStream.getSchema().getField(columnName)));
schema.addField(fieldSpec);
}
dataStream.close();
return schema;
}
use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.
the class UploadRefreshDeleteIntegrationTest method generateAndUploadRandomSegment1.
protected void generateAndUploadRandomSegment1(final String segmentName, int rowCount) throws Exception {
ThreadLocalRandom random = ThreadLocalRandom.current();
Schema schema = new Schema.Parser().parse(new File(TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource("dummy.avsc"))));
GenericRecord record = new GenericData.Record(schema);
GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<GenericRecord>(datumWriter);
final File avroFile = new File(_tmpDir, segmentName + ".avro");
fileWriter.create(schema, avroFile);
for (int i = 0; i < rowCount; i++) {
record.put(0, random.nextInt());
fileWriter.append(record);
}
fileWriter.close();
final int segmentIndex = Integer.parseInt(segmentName.split("_")[1]);
final String TAR_GZ_FILE_EXTENTION = ".tar.gz";
File segmentTarDir = new File(_tarsDir, segmentName);
buildSegment(segmentTarDir, avroFile, segmentIndex, segmentName, 0);
String segmentFileName = segmentName;
for (String name : segmentTarDir.list()) {
if (name.endsWith(TAR_GZ_FILE_EXTENTION)) {
segmentFileName = name;
}
}
File file = new File(segmentTarDir, segmentFileName);
long segmentLength = file.length();
final File segmentTarDir1 = new File(_tarsDir, segmentName);
FileUtils.deleteQuietly(segmentTarDir);
new Thread(new Runnable() {
@Override
public void run() {
try {
buildSegment(segmentTarDir1, avroFile, segmentIndex, segmentName, 5);
} catch (Exception e) {
}
}
}).start();
FileUploadUtils.sendSegmentFile("localhost", "8998", segmentFileName, file, segmentLength, 5, 5);
avroFile.delete();
FileUtils.deleteQuietly(segmentTarDir);
}
Aggregations