Search in sources :

Example 61 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.

the class BitmapInvertedIndexTest method testBitMapInvertedIndex.

void testBitMapInvertedIndex(ReadMode readMode) throws Exception {
    IndexLoadingConfigMetadata indexLoadingConfig = new IndexLoadingConfigMetadata(new PropertiesConfiguration());
    indexLoadingConfig.initLoadingInvertedIndexColumnSet(invertedIndexColumns);
    final IndexSegmentImpl mmapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(segmentDirectory, readMode, indexLoadingConfig);
    // compare the loaded inverted index with the record in avro file
    final DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>(new FileInputStream(new File(getClass().getClassLoader().getResource(AVRO_DATA).getFile())), new GenericDatumReader<GenericRecord>());
    int docId = 0;
    while (reader.hasNext()) {
        final GenericRecord rec = reader.next();
        for (final String column : ((SegmentMetadataImpl) mmapSegment.getSegmentMetadata()).getColumnMetadataMap().keySet()) {
            Object entry = rec.get(column);
            if (entry instanceof Utf8) {
                entry = ((Utf8) entry).toString();
            }
            final int dicId = mmapSegment.getDictionaryFor(column).indexOf(entry);
            // make sure that docId for dicId exist in the inverted index
            Assert.assertTrue(mmapSegment.getInvertedIndexFor(column).getImmutable(dicId).contains(docId));
            final int size = mmapSegment.getDictionaryFor(column).length();
            for (int i = 0; i < size; ++i) {
                // remove this for-loop for quick test
                if (i == dicId) {
                    continue;
                }
                // make sure that docId for dicId does not exist in the inverted index
                Assert.assertFalse(mmapSegment.getInvertedIndexFor(column).getImmutable(i).contains(docId));
            }
        }
        ++docId;
    }
}
Also used : IndexLoadingConfigMetadata(com.linkedin.pinot.common.metadata.segment.IndexLoadingConfigMetadata) DataFileStream(org.apache.avro.file.DataFileStream) PropertiesConfiguration(org.apache.commons.configuration.PropertiesConfiguration) FileInputStream(java.io.FileInputStream) IndexSegmentImpl(com.linkedin.pinot.core.segment.index.IndexSegmentImpl) Utf8(org.apache.avro.util.Utf8) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 62 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.

the class BlocksTest method before.

@BeforeClass
public static void before() throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(BlocksTest.class.getClassLoader().getResource(AVRO_DATA));
    if (INDEX_DIR.exists()) {
        FileUtils.deleteQuietly(INDEX_DIR);
    }
    //    System.out.println(INDEX_DIR.getAbsolutePath());
    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
    final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch", TimeUnit.DAYS, "test");
    config.setTimeColumnName("daysSinceEpoch");
    driver.init(config);
    driver.build();
    final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
    final org.apache.avro.Schema avroSchema = avroReader.getSchema();
    final String[] columns = new String[avroSchema.getFields().size()];
    int i = 0;
    for (final Field f : avroSchema.getFields()) {
        columns[i] = f.name();
        i++;
    }
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) Field(org.apache.avro.Schema.Field) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) BeforeClass(org.testng.annotations.BeforeClass)

Example 63 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.

the class DictionariesTest method before.

@BeforeClass
public static void before() throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(DictionariesTest.class.getClassLoader().getResource(AVRO_DATA));
    if (INDEX_DIR.exists()) {
        FileUtils.deleteQuietly(INDEX_DIR);
    }
    final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "time_day", TimeUnit.DAYS, "test");
    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
    driver.init(config);
    driver.build();
    segmentDirectory = new File(INDEX_DIR, driver.getSegmentName());
    final Schema schema = AvroUtils.extractSchemaFromAvro(new File(filePath));
    final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
    final org.apache.avro.Schema avroSchema = avroReader.getSchema();
    final String[] columns = new String[avroSchema.getFields().size()];
    int i = 0;
    for (final Field f : avroSchema.getFields()) {
        columns[i] = f.name();
        i++;
    }
    uniqueEntries = new HashMap<String, Set<Object>>();
    for (final String column : columns) {
        uniqueEntries.put(column, new HashSet<Object>());
    }
    while (avroReader.hasNext()) {
        final GenericRecord rec = avroReader.next();
        for (final String column : columns) {
            Object val = rec.get(column);
            if (val instanceof Utf8) {
                val = ((Utf8) val).toString();
            }
            uniqueEntries.get(column).add(getAppropriateType(schema.getFieldSpecFor(column).getDataType(), val));
        }
    }
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) HashSet(java.util.HashSet) Set(java.util.Set) Schema(com.linkedin.pinot.common.data.Schema) Field(org.apache.avro.Schema.Field) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) Utf8(org.apache.avro.util.Utf8) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) BeforeClass(org.testng.annotations.BeforeClass)

Example 64 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.

the class SegmentTestUtils method extractSchemaFromAvroWithoutTime.

public static Schema extractSchemaFromAvroWithoutTime(File avroFile) throws FileNotFoundException, IOException {
    DataFileStream<GenericRecord> dataStream = new DataFileStream<GenericRecord>(new FileInputStream(avroFile), new GenericDatumReader<GenericRecord>());
    Schema schema = new Schema();
    for (final Field field : dataStream.getSchema().getFields()) {
        try {
            getColumnType(field);
        } catch (Exception e) {
            LOGGER.warn("Caught exception while converting Avro field {} of type {}, field will not be in schema.", field.name(), field.schema().getType());
            continue;
        }
        final String columnName = field.name();
        final String pinotType = field.getProp("pinotType");
        final FieldSpec fieldSpec;
        if (pinotType != null && "METRIC".equals(pinotType)) {
            fieldSpec = new MetricFieldSpec();
        } else {
            fieldSpec = new DimensionFieldSpec();
        }
        fieldSpec.setName(columnName);
        fieldSpec.setDataType(getColumnType(dataStream.getSchema().getField(columnName)));
        fieldSpec.setSingleValueField(isSingleValueField(dataStream.getSchema().getField(columnName)));
        schema.addField(fieldSpec);
    }
    dataStream.close();
    return schema;
}
Also used : Field(org.apache.avro.Schema.Field) Schema(com.linkedin.pinot.common.data.Schema) DataFileStream(org.apache.avro.file.DataFileStream) GenericRecord(org.apache.avro.generic.GenericRecord) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 65 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.

the class UploadRefreshDeleteIntegrationTest method generateAndUploadRandomSegment1.

protected void generateAndUploadRandomSegment1(final String segmentName, int rowCount) throws Exception {
    ThreadLocalRandom random = ThreadLocalRandom.current();
    Schema schema = new Schema.Parser().parse(new File(TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource("dummy.avsc"))));
    GenericRecord record = new GenericData.Record(schema);
    GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<GenericRecord>(datumWriter);
    final File avroFile = new File(_tmpDir, segmentName + ".avro");
    fileWriter.create(schema, avroFile);
    for (int i = 0; i < rowCount; i++) {
        record.put(0, random.nextInt());
        fileWriter.append(record);
    }
    fileWriter.close();
    final int segmentIndex = Integer.parseInt(segmentName.split("_")[1]);
    final String TAR_GZ_FILE_EXTENTION = ".tar.gz";
    File segmentTarDir = new File(_tarsDir, segmentName);
    buildSegment(segmentTarDir, avroFile, segmentIndex, segmentName, 0);
    String segmentFileName = segmentName;
    for (String name : segmentTarDir.list()) {
        if (name.endsWith(TAR_GZ_FILE_EXTENTION)) {
            segmentFileName = name;
        }
    }
    File file = new File(segmentTarDir, segmentFileName);
    long segmentLength = file.length();
    final File segmentTarDir1 = new File(_tarsDir, segmentName);
    FileUtils.deleteQuietly(segmentTarDir);
    new Thread(new Runnable() {

        @Override
        public void run() {
            try {
                buildSegment(segmentTarDir1, avroFile, segmentIndex, segmentName, 5);
            } catch (Exception e) {
            }
        }
    }).start();
    FileUploadUtils.sendSegmentFile("localhost", "8998", segmentFileName, file, segmentLength, 5, 5);
    avroFile.delete();
    FileUtils.deleteQuietly(segmentTarDir);
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Aggregations

GenericRecord (org.apache.avro.generic.GenericRecord)262 Schema (org.apache.avro.Schema)101 Test (org.junit.Test)80 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)46 File (java.io.File)35 IOException (java.io.IOException)34 GenericData (org.apache.avro.generic.GenericData)30 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)30 ArrayList (java.util.ArrayList)29 ByteArrayOutputStream (java.io.ByteArrayOutputStream)27 DataFileWriter (org.apache.avro.file.DataFileWriter)20 HashMap (java.util.HashMap)19 ByteBuffer (java.nio.ByteBuffer)18 BinaryEncoder (org.apache.avro.io.BinaryEncoder)17 Field (org.apache.avro.Schema.Field)14 DataFileStream (org.apache.avro.file.DataFileStream)14 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)14 Utf8 (org.apache.avro.util.Utf8)14 Encoder (org.apache.avro.io.Encoder)12 DatasetRepository (com.cloudera.cdk.data.DatasetRepository)11