use of org.apache.avro.generic.GenericDatumReader in project avro-kafka-storm by ransilberman.
the class MainTest method testDataFile.
@Test
public void testDataFile() throws IOException {
File fileOut = new File("data.avro");
File fileIn = new File("data.avro");
Schema.Parser parser = new Schema.Parser();
Schema schema = parser.parse(getClass().getResourceAsStream("LPEvent.avsc"));
GenericRecord datum = new GenericData.Record(schema);
datum.put("revision", 1L);
datum.put("siteId", "28280110");
datum.put("eventType", "PLine");
datum.put("timeStamp", System.currentTimeMillis());
datum.put("sessionId", "123456II");
Map<String, Schema> unions = new HashMap<String, Schema>();
List<Schema> typeList = schema.getField("subrecord").schema().getTypes();
for (Schema sch : typeList) {
unions.put(sch.getName(), sch);
}
GenericRecord plineDatum = new GenericData.Record(unions.get("pline"));
plineDatum.put("text", "How can I help you?");
plineDatum.put("lineType", 1);
plineDatum.put("repId", "REPID12345");
datum.put("subrecord", plineDatum);
//write the file
DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(writer);
dataFileWriter.create(schema, fileOut);
dataFileWriter.append(datum);
dataFileWriter.append(datum);
dataFileWriter.append(datum);
dataFileWriter.close();
//read the file
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(fileIn, reader);
assertThat("Scema is the same", schema, is(dataFileReader.getSchema()));
for (GenericRecord record : dataFileReader) {
assertThat(record.get("siteId").toString(), is("28280110"));
assertThat(record.get("eventType").toString(), is("PLine"));
}
}
use of org.apache.avro.generic.GenericDatumReader in project voldemort by voldemort.
the class VoldemortAdminTool method executeQueryKey.
private static void executeQueryKey(final Integer nodeId, AdminClient adminClient, List<String> storeNames, String keyString, String keyFormat) throws IOException {
// decide queryingNode(s) for Key
List<Integer> queryingNodes = new ArrayList<Integer>();
if (nodeId < 0) {
// means all nodes
for (Node node : adminClient.getAdminClientCluster().getNodes()) {
queryingNodes.add(node.getId());
}
} else {
queryingNodes.add(nodeId);
}
// get basic info
List<StoreDefinition> storeDefinitionList = getStoreDefinitions(adminClient, nodeId);
Map<String, StoreDefinition> storeDefinitions = new HashMap<String, StoreDefinition>();
for (StoreDefinition storeDef : storeDefinitionList) {
storeDefinitions.put(storeDef.getName(), storeDef);
}
BufferedWriter out = new BufferedWriter(new OutputStreamWriter(System.out));
// iterate through stores
for (final String storeName : storeNames) {
// store definition
StoreDefinition storeDefinition = storeDefinitions.get(storeName);
if (storeDefinition == null) {
throw new StoreNotFoundException("Store " + storeName + " not found");
}
out.write("STORE_NAME: " + storeDefinition.getName() + "\n");
// k-v serializer
final SerializerDefinition keySerializerDef = storeDefinition.getKeySerializer();
final SerializerDefinition valueSerializerDef = storeDefinition.getValueSerializer();
SerializerFactory serializerFactory = new DefaultSerializerFactory();
@SuppressWarnings("unchecked") final Serializer<Object> keySerializer = (Serializer<Object>) serializerFactory.getSerializer(keySerializerDef);
@SuppressWarnings("unchecked") final Serializer<Object> valueSerializer = (Serializer<Object>) serializerFactory.getSerializer(valueSerializerDef);
// compression strategy
final CompressionStrategy keyCompressionStrategy;
final CompressionStrategy valueCompressionStrategy;
if (keySerializerDef != null && keySerializerDef.hasCompression()) {
keyCompressionStrategy = new CompressionStrategyFactory().get(keySerializerDef.getCompression());
} else {
keyCompressionStrategy = null;
}
if (valueSerializerDef != null && valueSerializerDef.hasCompression()) {
valueCompressionStrategy = new CompressionStrategyFactory().get(valueSerializerDef.getCompression());
} else {
valueCompressionStrategy = null;
}
if (keyCompressionStrategy == null) {
out.write("KEY_COMPRESSION_STRATEGY: None\n");
} else {
out.write("KEY_COMPRESSION_STRATEGY: " + keyCompressionStrategy.getType() + "\n");
}
out.write("KEY_SERIALIZER_NAME: " + keySerializerDef.getName() + "\n");
for (Map.Entry<Integer, String> entry : keySerializerDef.getAllSchemaInfoVersions().entrySet()) {
out.write(String.format("KEY_SCHEMA VERSION=%d\n", entry.getKey()));
out.write("====================================\n");
out.write(entry.getValue());
out.write("\n====================================\n");
}
out.write("\n");
if (valueCompressionStrategy == null) {
out.write("VALUE_COMPRESSION_STRATEGY: None\n");
} else {
out.write("VALUE_COMPRESSION_STRATEGY: " + valueCompressionStrategy.getType() + "\n");
}
out.write("VALUE_SERIALIZER_NAME: " + valueSerializerDef.getName() + "\n");
for (Map.Entry<Integer, String> entry : valueSerializerDef.getAllSchemaInfoVersions().entrySet()) {
out.write(String.format("VALUE_SCHEMA %d\n", entry.getKey()));
out.write("====================================\n");
out.write(entry.getValue());
out.write("\n====================================\n");
}
out.write("\n");
// although the streamingOps support multiple keys, we only query
// one key here
ByteArray key;
try {
if (keyFormat.equals("readable")) {
Object keyObject;
String keySerializerName = keySerializerDef.getName();
if (isAvroSchema(keySerializerName)) {
Schema keySchema = Schema.parse(keySerializerDef.getCurrentSchemaInfo());
JsonDecoder decoder = new JsonDecoder(keySchema, keyString);
GenericDatumReader<Object> datumReader = new GenericDatumReader<Object>(keySchema);
keyObject = datumReader.read(null, decoder);
} else if (keySerializerName.equals(DefaultSerializerFactory.JSON_SERIALIZER_TYPE_NAME)) {
JsonReader jsonReader = new JsonReader(new StringReader(keyString));
keyObject = jsonReader.read();
} else {
keyObject = keyString;
}
key = new ByteArray(keySerializer.toBytes(keyObject));
} else {
key = new ByteArray(ByteUtils.fromHexString(keyString));
}
} catch (SerializationException se) {
System.err.println("Error serializing key " + keyString);
System.err.println("If this is a JSON key, you need to include escaped quotation marks in the command line if it is a string");
se.printStackTrace();
return;
} catch (DecoderException de) {
System.err.println("Error decoding key " + keyString);
de.printStackTrace();
return;
} catch (IOException io) {
System.err.println("Error parsing avro string " + keyString);
io.printStackTrace();
return;
}
boolean printedKey = false;
// A Map<> could have been used instead of List<Entry<>> if
// Versioned supported correct hash codes. Read the comment in
// Versioned about the issue
List<Entry<List<Versioned<byte[]>>, List<Integer>>> nodeValues = new ArrayList<Entry<List<Versioned<byte[]>>, List<Integer>>>();
for (final Integer queryNodeId : queryingNodes) {
Iterator<QueryKeyResult> iterator;
iterator = adminClient.streamingOps.queryKeys(queryNodeId, storeName, Arrays.asList(key).iterator());
final StringWriter stringWriter = new StringWriter();
QueryKeyResult queryKeyResult = iterator.next();
if (!printedKey) {
// de-serialize and write key
byte[] keyBytes = queryKeyResult.getKey().get();
Object keyObject = keySerializer.toObject((null == keyCompressionStrategy) ? keyBytes : keyCompressionStrategy.inflate(keyBytes));
writeVoldKeyOrValueInternal(keyBytes, keySerializer, keyCompressionStrategy, "KEY", out);
printedKey = true;
}
// iterate through, de-serialize and write values
if (queryKeyResult.hasValues() && queryKeyResult.getValues().size() > 0) {
int elementId = -1;
for (int i = 0; i < nodeValues.size(); i++) {
if (Objects.equal(nodeValues.get(i).getKey(), queryKeyResult.getValues())) {
elementId = i;
break;
}
}
if (elementId == -1) {
ArrayList<Integer> nodes = new ArrayList<Integer>();
nodes.add(queryNodeId);
nodeValues.add(new AbstractMap.SimpleEntry<List<Versioned<byte[]>>, List<Integer>>(queryKeyResult.getValues(), nodes));
} else {
nodeValues.get(elementId).getValue().add(queryNodeId);
}
out.write(String.format("\nQueried node %d on store %s\n", queryNodeId, storeName));
int versionCount = 0;
if (queryKeyResult.getValues().size() > 1) {
out.write("VALUE " + versionCount + "\n");
}
for (Versioned<byte[]> versioned : queryKeyResult.getValues()) {
// write version
VectorClock version = (VectorClock) versioned.getVersion();
out.write("VECTOR_CLOCK_BYTE: " + ByteUtils.toHexString(version.toBytes()) + "\n");
out.write("VECTOR_CLOCK_TEXT: " + version.toString() + '[' + new Date(version.getTimestamp()).toString() + "]\n");
// write value
byte[] valueBytes = versioned.getValue();
writeVoldKeyOrValueInternal(valueBytes, valueSerializer, valueCompressionStrategy, "VALUE", out);
versionCount++;
}
} else // exception.
if (queryKeyResult.hasException()) {
boolean isInvalidMetadataException = queryKeyResult.getException() instanceof InvalidMetadataException;
// you are querying only a single node.
if (!isInvalidMetadataException || queryingNodes.size() == 1) {
out.write(String.format("\nNode %d on store %s returned exception\n", queryNodeId, storeName));
out.write(queryKeyResult.getException().toString());
out.write("\n====================================\n");
}
} else {
if (queryingNodes.size() == 1) {
out.write(String.format("\nNode %d on store %s returned NULL\n", queryNodeId, storeName));
out.write("\n====================================\n");
}
}
out.flush();
}
out.write("\n====================================\n");
for (Map.Entry<List<Versioned<byte[]>>, List<Integer>> nodeValue : nodeValues) {
out.write("Nodes with same Value " + Arrays.toString(nodeValue.getValue().toArray()));
out.write("\n====================================\n");
}
if (nodeValues.size() > 1) {
out.write("\n*** Multiple (" + nodeValues.size() + ") versions of key/value exist for the key ***\n");
}
out.flush();
}
}
use of org.apache.avro.generic.GenericDatumReader in project voldemort by voldemort.
the class VoldemortAvroClientShell method processCommands.
private static void processCommands(DefaultStoreClient<Object, Object> client, BufferedReader reader, Schema keySchema, Schema valueSchema, boolean printCommands) throws IOException {
for (String line = reader.readLine(); line != null; line = reader.readLine()) {
System.out.print(PROMPT);
if (line.trim().equals(""))
continue;
if (printCommands)
System.out.println(line);
try {
if (line.toLowerCase().startsWith("get")) {
System.out.println("Enter key:");
line = reader.readLine();
JsonDecoder decoder = new JsonDecoder(keySchema, line);
GenericDatumReader<Object> datumReader = null;
Object key = null;
try {
datumReader = new GenericDatumReader<Object>(keySchema);
key = datumReader.read(null, decoder);
} catch (IOException e) {
}
if (key == null) {
System.err.println("Error parsing key ");
continue;
}
System.out.println("Value - " + client.get(key));
} else if (line.toLowerCase().startsWith("put")) {
String keyString = null;
String valueString = null;
System.out.println("Enter key:");
line = reader.readLine();
keyString = line;
System.out.println("Enter value:");
line = reader.readLine();
valueString = line;
JsonDecoder keyDecoder = new JsonDecoder(keySchema, keyString);
JsonDecoder valueDecoder = new JsonDecoder(valueSchema, valueString);
GenericDatumReader<Object> datumReader = null;
Object key = null;
Object value = null;
try {
datumReader = new GenericDatumReader<Object>(keySchema);
key = datumReader.read(null, keyDecoder);
} catch (IOException e) {
}
if (key == null) {
System.err.println("Error parsing key ");
continue;
}
try {
datumReader = new GenericDatumReader<Object>(valueSchema);
value = datumReader.read(null, valueDecoder);
} catch (IOException e) {
}
if (value == null) {
System.err.println("Error parsing value ");
continue;
}
System.out.println("Put - " + client.put(key, value));
} else if (line.startsWith("quit") || line.startsWith("exit")) {
System.out.println("k k thx bye.");
System.exit(0);
} else {
System.err.println("Only supported 'get' & 'put' ");
}
} catch (Exception e) {
System.err.println("Unexpected error:");
e.printStackTrace(System.err);
}
}
}
use of org.apache.avro.generic.GenericDatumReader in project voldemort by voldemort.
the class VoldemortClientShell method parseObject.
public static Object parseObject(SerializerDefinition serializerDef, String argStr, MutableInt parsePos, PrintStream errorStream) {
Object obj = null;
try {
// TODO everything is read as json string now..
JsonReader jsonReader = new JsonReader(new StringReader(argStr));
obj = jsonReader.read();
// mark how much of the original string, we blew through to
// extract the avrostring.
parsePos.setValue(jsonReader.getCurrentLineOffset() - 1);
if (StoreDefinitionUtils.isAvroSchema(serializerDef.getName())) {
// TODO Need to check all the avro siblings work
// For avro, we hack and extract avro key/value as a string,
// before we do the actual parsing with the schema
String avroString = (String) obj;
// From here on, this is just normal avro parsing.
Schema latestSchema = Schema.parse(serializerDef.getCurrentSchemaInfo());
try {
JsonDecoder decoder = new JsonDecoder(latestSchema, avroString);
GenericDatumReader<Object> datumReader = new GenericDatumReader<Object>(latestSchema);
obj = datumReader.read(null, decoder);
} catch (IOException io) {
errorStream.println("Error parsing avro string " + avroString);
io.printStackTrace();
}
} else {
// all json processing does some numeric type tightening
obj = tightenNumericTypes(obj);
}
} catch (EndOfFileException eof) {
// can be thrown from the jsonReader.read(..) call indicating, we
// have nothing more to read.
obj = null;
}
return obj;
}
use of org.apache.avro.generic.GenericDatumReader in project voldemort by voldemort.
the class AvroUtils method getSchemaFromPath.
/**
* Pull the schema off of the given file (if it is a file). If it is a
* directory, then pull schemas off of all subfiles, and check that they are
* all the same schema. If so, return that schema, otherwise throw an
* exception
*
* @param fs The filesystem to use
* @param path The path from which to get the schema
* @return The schema of this file or all its subfiles
* @throws IOException
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
private static Schema getSchemaFromPath(FileSystem fs, Path path) {
try {
if (fs.isFile(path)) {
BufferedInputStream inStream = null;
try {
inStream = new BufferedInputStream(fs.open(path));
} catch (IOException e1) {
throw new RuntimeException("Unable to open " + path, e1);
}
GenericDatumReader datum = new GenericDatumReader();
DataFileStream reader = null;
try {
reader = new DataFileStream(inStream, datum);
} catch (IOException e) {
throw new RuntimeException("Invalid avro format, path " + path, e);
}
return reader.getSchema();
} else {
FileStatus[] statuses = null;
if (fs.isDirectory(path)) {
// this is a directory, get schemas from all subfiles
statuses = fs.listStatus(path);
if (statuses == null || statuses.length == 0)
throw new IllegalArgumentException("No files in directory " + path);
} else {
// this is wildcard path, get schemas from all matched files
statuses = fs.globStatus(path);
if (statuses == null || statuses.length == 0)
throw new IllegalArgumentException("No matches for path pattern " + path);
}
List<Schema> schemas = new ArrayList<Schema>();
for (FileStatus status : statuses) {
if (!HadoopUtils.shouldPathBeIgnored(status.getPath())) {
schemas.add(getSchemaFromPath(fs, status.getPath()));
}
}
// now check that all the schemas are the same
if (schemas.size() > 0) {
Schema schema = schemas.get(0);
for (int i = 1; i < schemas.size(); i++) if (!schema.equals(schemas.get(i)))
throw new IllegalArgumentException("The directory " + path + " contains heterogeneous schemas: found both '" + schema + "' and '" + schemas.get(i) + "'.");
return schema;
} else {
throw new IllegalArgumentException("No valid metadata file found for path " + path);
}
}
} catch (Exception e) {
throw new RuntimeException("Error getting schema for path " + path, e);
}
}
Aggregations