use of org.apache.avro.util.Utf8 in project crunch by cloudera.
the class AvrosTest method testTriples.
@Test
@SuppressWarnings("rawtypes")
public void testTriples() throws Exception {
AvroType at = Avros.triples(Avros.strings(), Avros.strings(), Avros.strings());
Tuple3 j = Tuple3.of("a", "b", "c");
GenericData.Record w = new GenericData.Record(at.getSchema());
w.put(0, new Utf8("a"));
w.put(1, new Utf8("b"));
w.put(2, new Utf8("c"));
testInputOutputFn(at, j, w);
}
use of org.apache.avro.util.Utf8 in project rest.li by linkedin.
the class AnyRecordTranslator method avroGenericToData.
@Override
public Object avroGenericToData(DataTranslatorContext context, Object avroData, Schema avroSchema, DataSchema schema) {
boolean error = false;
Object result = null;
GenericRecord genericRecord = null;
try {
genericRecord = (GenericRecord) avroData;
} catch (ClassCastException e) {
context.appendMessage("Error translating %1$s, it is not a GenericRecord", avroData);
error = true;
}
if (error == false) {
Utf8 type = null;
Utf8 value = null;
try {
type = (Utf8) genericRecord.get(TYPE);
value = (Utf8) genericRecord.get(VALUE);
} catch (ClassCastException e) {
context.appendMessage("Error translating %1$s, \"type\" or \"value\" is not a %2$s", avroData, Utf8.class.getSimpleName());
error = true;
}
if (error == false) {
if (type == null || value == null) {
context.appendMessage("Error translating %1$s, \"type\" or \"value\" is null", avroData);
} else {
try {
DataMap valueDataMap = _codec.bytesToMap(value.getBytes());
DataMap anyDataMap = new DataMap(2);
anyDataMap.put(type.toString(), valueDataMap);
result = anyDataMap;
} catch (IOException e) {
context.appendMessage("Error translating %1$s, %2$s", avroData, e);
}
}
}
}
return result;
}
use of org.apache.avro.util.Utf8 in project pinot by linkedin.
the class AvroRecordToPinotRowGenerator method transform.
public GenericRow transform(GenericData.Record record, org.apache.avro.Schema schema, GenericRow destination) {
for (String column : indexingSchema.getColumnNames()) {
Object entry = record.get(column);
FieldSpec fieldSpec = indexingSchema.getFieldSpecFor(column);
if (entry != null) {
if (entry instanceof Array) {
entry = AvroRecordReader.transformAvroArrayToObjectArray((Array) entry, fieldSpec);
if (fieldSpec.getDataType() == DataType.STRING || fieldSpec.getDataType() == DataType.STRING_ARRAY) {
for (int i = 0; i < ((Object[]) entry).length; ++i) {
if (((Object[]) entry)[i] != null) {
((Object[]) entry)[i] = ((Object[]) entry)[i].toString();
}
}
}
} else {
if (entry instanceof Utf8) {
entry = ((Utf8) entry).toString();
}
if (fieldSpec.getDataType() == DataType.STRING) {
entry = entry.toString();
}
}
} else {
// entry was null.
if (fieldSpec.isSingleValueField()) {
entry = AvroRecordReader.getDefaultNullValue(fieldSpec);
} else {
// A multi-value field, and null. Any of the instanceof checks above will not match, so we need to repeat some
// of the logic above here.
entry = AvroRecordReader.transformAvroArrayToObjectArray((Array) entry, fieldSpec);
if (fieldSpec.getDataType() == DataType.STRING || fieldSpec.getDataType() == DataType.STRING_ARRAY) {
for (int i = 0; i < ((Object[]) entry).length; ++i) {
if (((Object[]) entry)[i] != null) {
((Object[]) entry)[i] = ((Object[]) entry)[i].toString();
}
}
}
}
}
destination.putField(column, entry);
}
return destination;
}
use of org.apache.avro.util.Utf8 in project pinot by linkedin.
the class BaseClusterIntegrationTest method createH2SchemaAndInsertAvroFiles.
public static void createH2SchemaAndInsertAvroFiles(List<File> avroFiles, Connection connection) {
try {
connection.prepareCall("DROP TABLE IF EXISTS mytable");
File schemaAvroFile = avroFiles.get(0);
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(schemaAvroFile, datumReader);
Schema schema = dataFileReader.getSchema();
List<Schema.Field> fields = schema.getFields();
List<String> columnNamesAndTypes = new ArrayList<String>(fields.size());
int columnCount = 0;
for (Schema.Field field : fields) {
String fieldName = field.name();
Schema.Type fieldType = field.schema().getType();
switch(fieldType) {
case UNION:
List<Schema> types = field.schema().getTypes();
String columnNameAndType;
String typeName = types.get(0).getName();
if (typeName.equalsIgnoreCase("int")) {
typeName = "bigint";
}
if (types.size() == 1) {
columnNameAndType = fieldName + " " + typeName + " not null";
} else {
columnNameAndType = fieldName + " " + typeName;
}
columnNamesAndTypes.add(columnNameAndType.replace("string", "varchar(128)"));
++columnCount;
break;
case ARRAY:
String elementTypeName = field.schema().getElementType().getName();
if (elementTypeName.equalsIgnoreCase("int")) {
elementTypeName = "bigint";
}
elementTypeName = elementTypeName.replace("string", "varchar(128)");
for (int i = 0; i < MAX_ELEMENTS_IN_MULTI_VALUE; i++) {
columnNamesAndTypes.add(fieldName + "__MV" + i + " " + elementTypeName);
}
++columnCount;
break;
case BOOLEAN:
case INT:
case LONG:
case FLOAT:
case DOUBLE:
case STRING:
String fieldTypeName = fieldType.getName();
if (fieldTypeName.equalsIgnoreCase("int")) {
fieldTypeName = "bigint";
}
columnNameAndType = fieldName + " " + fieldTypeName + " not null";
columnNamesAndTypes.add(columnNameAndType.replace("string", "varchar(128)"));
++columnCount;
break;
case RECORD:
// Ignore records
continue;
default:
// Ignore other avro types
LOGGER.warn("Ignoring field {} of type {}", fieldName, field.schema());
}
}
connection.prepareCall("create table mytable(" + StringUtil.join(",", columnNamesAndTypes.toArray(new String[columnNamesAndTypes.size()])) + ")").execute();
long start = System.currentTimeMillis();
StringBuilder params = new StringBuilder("?");
for (int i = 0; i < columnNamesAndTypes.size() - 1; i++) {
params.append(",?");
}
PreparedStatement statement = connection.prepareStatement("INSERT INTO mytable VALUES (" + params.toString() + ")");
dataFileReader.close();
for (File avroFile : avroFiles) {
datumReader = new GenericDatumReader<GenericRecord>();
dataFileReader = new DataFileReader<GenericRecord>(avroFile, datumReader);
GenericRecord record = null;
while (dataFileReader.hasNext()) {
record = dataFileReader.next(record);
int jdbcIndex = 1;
for (int avroIndex = 0; avroIndex < columnCount; ++avroIndex) {
Object value = record.get(avroIndex);
if (value instanceof GenericData.Array) {
GenericData.Array array = (GenericData.Array) value;
for (int i = 0; i < MAX_ELEMENTS_IN_MULTI_VALUE; i++) {
if (i < array.size()) {
value = array.get(i);
if (value instanceof Utf8) {
value = value.toString();
}
} else {
value = null;
}
statement.setObject(jdbcIndex, value);
++jdbcIndex;
}
} else {
if (value instanceof Utf8) {
value = value.toString();
}
statement.setObject(jdbcIndex, value);
++jdbcIndex;
}
}
statement.execute();
}
dataFileReader.close();
}
LOGGER.info("Insertion took " + (System.currentTimeMillis() - start));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of org.apache.avro.util.Utf8 in project pinot by linkedin.
the class BitmapInvertedIndexTest method testBitMapInvertedIndex.
void testBitMapInvertedIndex(ReadMode readMode) throws Exception {
IndexLoadingConfigMetadata indexLoadingConfig = new IndexLoadingConfigMetadata(new PropertiesConfiguration());
indexLoadingConfig.initLoadingInvertedIndexColumnSet(invertedIndexColumns);
final IndexSegmentImpl mmapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(segmentDirectory, readMode, indexLoadingConfig);
// compare the loaded inverted index with the record in avro file
final DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>(new FileInputStream(new File(getClass().getClassLoader().getResource(AVRO_DATA).getFile())), new GenericDatumReader<GenericRecord>());
int docId = 0;
while (reader.hasNext()) {
final GenericRecord rec = reader.next();
for (final String column : ((SegmentMetadataImpl) mmapSegment.getSegmentMetadata()).getColumnMetadataMap().keySet()) {
Object entry = rec.get(column);
if (entry instanceof Utf8) {
entry = ((Utf8) entry).toString();
}
final int dicId = mmapSegment.getDictionaryFor(column).indexOf(entry);
// make sure that docId for dicId exist in the inverted index
Assert.assertTrue(mmapSegment.getInvertedIndexFor(column).getImmutable(dicId).contains(docId));
final int size = mmapSegment.getDictionaryFor(column).length();
for (int i = 0; i < size; ++i) {
// remove this for-loop for quick test
if (i == dicId) {
continue;
}
// make sure that docId for dicId does not exist in the inverted index
Assert.assertFalse(mmapSegment.getInvertedIndexFor(column).getImmutable(i).contains(docId));
}
}
++docId;
}
}
Aggregations