Search in sources :

Example 36 with GenericDatumReader

use of org.apache.avro.generic.GenericDatumReader in project avro-kafka-storm by ransilberman.

the class MainTest method testDataFile.

@Test
public void testDataFile() throws IOException {
    File fileOut = new File("data.avro");
    File fileIn = new File("data.avro");
    Schema.Parser parser = new Schema.Parser();
    Schema schema = parser.parse(getClass().getResourceAsStream("LPEvent.avsc"));
    GenericRecord datum = new GenericData.Record(schema);
    datum.put("revision", 1L);
    datum.put("siteId", "28280110");
    datum.put("eventType", "PLine");
    datum.put("timeStamp", System.currentTimeMillis());
    datum.put("sessionId", "123456II");
    Map<String, Schema> unions = new HashMap<String, Schema>();
    List<Schema> typeList = schema.getField("subrecord").schema().getTypes();
    for (Schema sch : typeList) {
        unions.put(sch.getName(), sch);
    }
    GenericRecord plineDatum = new GenericData.Record(unions.get("pline"));
    plineDatum.put("text", "How can I help you?");
    plineDatum.put("lineType", 1);
    plineDatum.put("repId", "REPID12345");
    datum.put("subrecord", plineDatum);
    //write the file
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(writer);
    dataFileWriter.create(schema, fileOut);
    dataFileWriter.append(datum);
    dataFileWriter.append(datum);
    dataFileWriter.append(datum);
    dataFileWriter.close();
    //read the file
    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(fileIn, reader);
    assertThat("Scema is the same", schema, is(dataFileReader.getSchema()));
    for (GenericRecord record : dataFileReader) {
        assertThat(record.get("siteId").toString(), is("28280110"));
        assertThat(record.get("eventType").toString(), is("PLine"));
    }
}
Also used : HashMap(java.util.HashMap) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) DataFileReader(org.apache.avro.file.DataFileReader) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) Test(org.junit.Test)

Example 37 with GenericDatumReader

use of org.apache.avro.generic.GenericDatumReader in project voldemort by voldemort.

the class VoldemortAdminTool method executeQueryKey.

private static void executeQueryKey(final Integer nodeId, AdminClient adminClient, List<String> storeNames, String keyString, String keyFormat) throws IOException {
    // decide queryingNode(s) for Key
    List<Integer> queryingNodes = new ArrayList<Integer>();
    if (nodeId < 0) {
        // means all nodes
        for (Node node : adminClient.getAdminClientCluster().getNodes()) {
            queryingNodes.add(node.getId());
        }
    } else {
        queryingNodes.add(nodeId);
    }
    // get basic info
    List<StoreDefinition> storeDefinitionList = getStoreDefinitions(adminClient, nodeId);
    Map<String, StoreDefinition> storeDefinitions = new HashMap<String, StoreDefinition>();
    for (StoreDefinition storeDef : storeDefinitionList) {
        storeDefinitions.put(storeDef.getName(), storeDef);
    }
    BufferedWriter out = new BufferedWriter(new OutputStreamWriter(System.out));
    // iterate through stores
    for (final String storeName : storeNames) {
        // store definition
        StoreDefinition storeDefinition = storeDefinitions.get(storeName);
        if (storeDefinition == null) {
            throw new StoreNotFoundException("Store " + storeName + " not found");
        }
        out.write("STORE_NAME: " + storeDefinition.getName() + "\n");
        // k-v serializer
        final SerializerDefinition keySerializerDef = storeDefinition.getKeySerializer();
        final SerializerDefinition valueSerializerDef = storeDefinition.getValueSerializer();
        SerializerFactory serializerFactory = new DefaultSerializerFactory();
        @SuppressWarnings("unchecked") final Serializer<Object> keySerializer = (Serializer<Object>) serializerFactory.getSerializer(keySerializerDef);
        @SuppressWarnings("unchecked") final Serializer<Object> valueSerializer = (Serializer<Object>) serializerFactory.getSerializer(valueSerializerDef);
        // compression strategy
        final CompressionStrategy keyCompressionStrategy;
        final CompressionStrategy valueCompressionStrategy;
        if (keySerializerDef != null && keySerializerDef.hasCompression()) {
            keyCompressionStrategy = new CompressionStrategyFactory().get(keySerializerDef.getCompression());
        } else {
            keyCompressionStrategy = null;
        }
        if (valueSerializerDef != null && valueSerializerDef.hasCompression()) {
            valueCompressionStrategy = new CompressionStrategyFactory().get(valueSerializerDef.getCompression());
        } else {
            valueCompressionStrategy = null;
        }
        if (keyCompressionStrategy == null) {
            out.write("KEY_COMPRESSION_STRATEGY: None\n");
        } else {
            out.write("KEY_COMPRESSION_STRATEGY: " + keyCompressionStrategy.getType() + "\n");
        }
        out.write("KEY_SERIALIZER_NAME: " + keySerializerDef.getName() + "\n");
        for (Map.Entry<Integer, String> entry : keySerializerDef.getAllSchemaInfoVersions().entrySet()) {
            out.write(String.format("KEY_SCHEMA VERSION=%d\n", entry.getKey()));
            out.write("====================================\n");
            out.write(entry.getValue());
            out.write("\n====================================\n");
        }
        out.write("\n");
        if (valueCompressionStrategy == null) {
            out.write("VALUE_COMPRESSION_STRATEGY: None\n");
        } else {
            out.write("VALUE_COMPRESSION_STRATEGY: " + valueCompressionStrategy.getType() + "\n");
        }
        out.write("VALUE_SERIALIZER_NAME: " + valueSerializerDef.getName() + "\n");
        for (Map.Entry<Integer, String> entry : valueSerializerDef.getAllSchemaInfoVersions().entrySet()) {
            out.write(String.format("VALUE_SCHEMA %d\n", entry.getKey()));
            out.write("====================================\n");
            out.write(entry.getValue());
            out.write("\n====================================\n");
        }
        out.write("\n");
        // although the streamingOps support multiple keys, we only query
        // one key here
        ByteArray key;
        try {
            if (keyFormat.equals("readable")) {
                Object keyObject;
                String keySerializerName = keySerializerDef.getName();
                if (isAvroSchema(keySerializerName)) {
                    Schema keySchema = Schema.parse(keySerializerDef.getCurrentSchemaInfo());
                    JsonDecoder decoder = new JsonDecoder(keySchema, keyString);
                    GenericDatumReader<Object> datumReader = new GenericDatumReader<Object>(keySchema);
                    keyObject = datumReader.read(null, decoder);
                } else if (keySerializerName.equals(DefaultSerializerFactory.JSON_SERIALIZER_TYPE_NAME)) {
                    JsonReader jsonReader = new JsonReader(new StringReader(keyString));
                    keyObject = jsonReader.read();
                } else {
                    keyObject = keyString;
                }
                key = new ByteArray(keySerializer.toBytes(keyObject));
            } else {
                key = new ByteArray(ByteUtils.fromHexString(keyString));
            }
        } catch (SerializationException se) {
            System.err.println("Error serializing key " + keyString);
            System.err.println("If this is a JSON key, you need to include escaped quotation marks in the command line if it is a string");
            se.printStackTrace();
            return;
        } catch (DecoderException de) {
            System.err.println("Error decoding key " + keyString);
            de.printStackTrace();
            return;
        } catch (IOException io) {
            System.err.println("Error parsing avro string " + keyString);
            io.printStackTrace();
            return;
        }
        boolean printedKey = false;
        // A Map<> could have been used instead of List<Entry<>> if
        // Versioned supported correct hash codes. Read the comment in
        // Versioned about the issue
        List<Entry<List<Versioned<byte[]>>, List<Integer>>> nodeValues = new ArrayList<Entry<List<Versioned<byte[]>>, List<Integer>>>();
        for (final Integer queryNodeId : queryingNodes) {
            Iterator<QueryKeyResult> iterator;
            iterator = adminClient.streamingOps.queryKeys(queryNodeId, storeName, Arrays.asList(key).iterator());
            final StringWriter stringWriter = new StringWriter();
            QueryKeyResult queryKeyResult = iterator.next();
            if (!printedKey) {
                // de-serialize and write key
                byte[] keyBytes = queryKeyResult.getKey().get();
                Object keyObject = keySerializer.toObject((null == keyCompressionStrategy) ? keyBytes : keyCompressionStrategy.inflate(keyBytes));
                writeVoldKeyOrValueInternal(keyBytes, keySerializer, keyCompressionStrategy, "KEY", out);
                printedKey = true;
            }
            // iterate through, de-serialize and write values
            if (queryKeyResult.hasValues() && queryKeyResult.getValues().size() > 0) {
                int elementId = -1;
                for (int i = 0; i < nodeValues.size(); i++) {
                    if (Objects.equal(nodeValues.get(i).getKey(), queryKeyResult.getValues())) {
                        elementId = i;
                        break;
                    }
                }
                if (elementId == -1) {
                    ArrayList<Integer> nodes = new ArrayList<Integer>();
                    nodes.add(queryNodeId);
                    nodeValues.add(new AbstractMap.SimpleEntry<List<Versioned<byte[]>>, List<Integer>>(queryKeyResult.getValues(), nodes));
                } else {
                    nodeValues.get(elementId).getValue().add(queryNodeId);
                }
                out.write(String.format("\nQueried node %d on store %s\n", queryNodeId, storeName));
                int versionCount = 0;
                if (queryKeyResult.getValues().size() > 1) {
                    out.write("VALUE " + versionCount + "\n");
                }
                for (Versioned<byte[]> versioned : queryKeyResult.getValues()) {
                    // write version
                    VectorClock version = (VectorClock) versioned.getVersion();
                    out.write("VECTOR_CLOCK_BYTE: " + ByteUtils.toHexString(version.toBytes()) + "\n");
                    out.write("VECTOR_CLOCK_TEXT: " + version.toString() + '[' + new Date(version.getTimestamp()).toString() + "]\n");
                    // write value
                    byte[] valueBytes = versioned.getValue();
                    writeVoldKeyOrValueInternal(valueBytes, valueSerializer, valueCompressionStrategy, "VALUE", out);
                    versionCount++;
                }
            } else // exception.
            if (queryKeyResult.hasException()) {
                boolean isInvalidMetadataException = queryKeyResult.getException() instanceof InvalidMetadataException;
                // you are querying only a single node.
                if (!isInvalidMetadataException || queryingNodes.size() == 1) {
                    out.write(String.format("\nNode %d on store %s returned exception\n", queryNodeId, storeName));
                    out.write(queryKeyResult.getException().toString());
                    out.write("\n====================================\n");
                }
            } else {
                if (queryingNodes.size() == 1) {
                    out.write(String.format("\nNode %d on store %s returned NULL\n", queryNodeId, storeName));
                    out.write("\n====================================\n");
                }
            }
            out.flush();
        }
        out.write("\n====================================\n");
        for (Map.Entry<List<Versioned<byte[]>>, List<Integer>> nodeValue : nodeValues) {
            out.write("Nodes with same Value " + Arrays.toString(nodeValue.getValue().toArray()));
            out.write("\n====================================\n");
        }
        if (nodeValues.size() > 1) {
            out.write("\n*** Multiple (" + nodeValues.size() + ") versions of key/value exist for the key ***\n");
        }
        out.flush();
    }
}
Also used : HashMap(java.util.HashMap) Node(voldemort.cluster.Node) Schema(org.apache.avro.Schema) InvalidMetadataException(voldemort.store.InvalidMetadataException) ArrayList(java.util.ArrayList) CompressionStrategy(voldemort.store.compress.CompressionStrategy) CompressionStrategyFactory(voldemort.store.compress.CompressionStrategyFactory) QueryKeyResult(voldemort.client.protocol.admin.QueryKeyResult) AbstractMap(java.util.AbstractMap) ByteArray(voldemort.utils.ByteArray) List(java.util.List) ArrayList(java.util.ArrayList) Serializer(voldemort.serialization.Serializer) StringSerializer(voldemort.serialization.StringSerializer) DefaultSerializerFactory(voldemort.serialization.DefaultSerializerFactory) JsonDecoder(org.apache.avro.io.JsonDecoder) Map(java.util.Map) HashMap(java.util.HashMap) AbstractMap(java.util.AbstractMap) Versioned(voldemort.versioning.Versioned) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) BufferedWriter(java.io.BufferedWriter) StoreNotFoundException(com.sleepycat.persist.StoreNotFoundException) Entry(java.util.Map.Entry) StringWriter(java.io.StringWriter) StoreDefinition(voldemort.store.StoreDefinition) StringReader(java.io.StringReader) JsonReader(voldemort.serialization.json.JsonReader) DefaultSerializerFactory(voldemort.serialization.DefaultSerializerFactory) SerializerFactory(voldemort.serialization.SerializerFactory) SerializationException(voldemort.serialization.SerializationException) VectorClock(voldemort.versioning.VectorClock) IOException(java.io.IOException) Date(java.util.Date) DecoderException(org.apache.commons.codec.DecoderException) OutputStreamWriter(java.io.OutputStreamWriter) SerializerDefinition(voldemort.serialization.SerializerDefinition)

Example 38 with GenericDatumReader

use of org.apache.avro.generic.GenericDatumReader in project voldemort by voldemort.

the class VoldemortAvroClientShell method processCommands.

private static void processCommands(DefaultStoreClient<Object, Object> client, BufferedReader reader, Schema keySchema, Schema valueSchema, boolean printCommands) throws IOException {
    for (String line = reader.readLine(); line != null; line = reader.readLine()) {
        System.out.print(PROMPT);
        if (line.trim().equals(""))
            continue;
        if (printCommands)
            System.out.println(line);
        try {
            if (line.toLowerCase().startsWith("get")) {
                System.out.println("Enter key:");
                line = reader.readLine();
                JsonDecoder decoder = new JsonDecoder(keySchema, line);
                GenericDatumReader<Object> datumReader = null;
                Object key = null;
                try {
                    datumReader = new GenericDatumReader<Object>(keySchema);
                    key = datumReader.read(null, decoder);
                } catch (IOException e) {
                }
                if (key == null) {
                    System.err.println("Error parsing key ");
                    continue;
                }
                System.out.println("Value - " + client.get(key));
            } else if (line.toLowerCase().startsWith("put")) {
                String keyString = null;
                String valueString = null;
                System.out.println("Enter key:");
                line = reader.readLine();
                keyString = line;
                System.out.println("Enter value:");
                line = reader.readLine();
                valueString = line;
                JsonDecoder keyDecoder = new JsonDecoder(keySchema, keyString);
                JsonDecoder valueDecoder = new JsonDecoder(valueSchema, valueString);
                GenericDatumReader<Object> datumReader = null;
                Object key = null;
                Object value = null;
                try {
                    datumReader = new GenericDatumReader<Object>(keySchema);
                    key = datumReader.read(null, keyDecoder);
                } catch (IOException e) {
                }
                if (key == null) {
                    System.err.println("Error parsing key ");
                    continue;
                }
                try {
                    datumReader = new GenericDatumReader<Object>(valueSchema);
                    value = datumReader.read(null, valueDecoder);
                } catch (IOException e) {
                }
                if (value == null) {
                    System.err.println("Error parsing value ");
                    continue;
                }
                System.out.println("Put - " + client.put(key, value));
            } else if (line.startsWith("quit") || line.startsWith("exit")) {
                System.out.println("k k thx bye.");
                System.exit(0);
            } else {
                System.err.println("Only supported 'get' & 'put' ");
            }
        } catch (Exception e) {
            System.err.println("Unexpected error:");
            e.printStackTrace(System.err);
        }
    }
}
Also used : JsonDecoder(org.apache.avro.io.JsonDecoder) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) IOException(java.io.IOException) IOException(java.io.IOException)

Example 39 with GenericDatumReader

use of org.apache.avro.generic.GenericDatumReader in project voldemort by voldemort.

the class VoldemortClientShell method parseObject.

public static Object parseObject(SerializerDefinition serializerDef, String argStr, MutableInt parsePos, PrintStream errorStream) {
    Object obj = null;
    try {
        // TODO everything is read as json string now..
        JsonReader jsonReader = new JsonReader(new StringReader(argStr));
        obj = jsonReader.read();
        // mark how much of the original string, we blew through to
        // extract the avrostring.
        parsePos.setValue(jsonReader.getCurrentLineOffset() - 1);
        if (StoreDefinitionUtils.isAvroSchema(serializerDef.getName())) {
            // TODO Need to check all the avro siblings work
            // For avro, we hack and extract avro key/value as a string,
            // before we do the actual parsing with the schema
            String avroString = (String) obj;
            // From here on, this is just normal avro parsing.
            Schema latestSchema = Schema.parse(serializerDef.getCurrentSchemaInfo());
            try {
                JsonDecoder decoder = new JsonDecoder(latestSchema, avroString);
                GenericDatumReader<Object> datumReader = new GenericDatumReader<Object>(latestSchema);
                obj = datumReader.read(null, decoder);
            } catch (IOException io) {
                errorStream.println("Error parsing avro string " + avroString);
                io.printStackTrace();
            }
        } else {
            // all json processing does some numeric type tightening
            obj = tightenNumericTypes(obj);
        }
    } catch (EndOfFileException eof) {
        // can be thrown from the jsonReader.read(..) call indicating, we
        // have nothing more to read.
        obj = null;
    }
    return obj;
}
Also used : JsonDecoder(org.apache.avro.io.JsonDecoder) EndOfFileException(voldemort.serialization.json.EndOfFileException) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) StringReader(java.io.StringReader) JsonReader(voldemort.serialization.json.JsonReader) IOException(java.io.IOException)

Example 40 with GenericDatumReader

use of org.apache.avro.generic.GenericDatumReader in project voldemort by voldemort.

the class AvroUtils method getSchemaFromPath.

/**
     * Pull the schema off of the given file (if it is a file). If it is a
     * directory, then pull schemas off of all subfiles, and check that they are
     * all the same schema. If so, return that schema, otherwise throw an
     * exception
     * 
     * @param fs The filesystem to use
     * @param path The path from which to get the schema
     * @return The schema of this file or all its subfiles
     * @throws IOException
     */
@SuppressWarnings({ "unchecked", "rawtypes" })
private static Schema getSchemaFromPath(FileSystem fs, Path path) {
    try {
        if (fs.isFile(path)) {
            BufferedInputStream inStream = null;
            try {
                inStream = new BufferedInputStream(fs.open(path));
            } catch (IOException e1) {
                throw new RuntimeException("Unable to open " + path, e1);
            }
            GenericDatumReader datum = new GenericDatumReader();
            DataFileStream reader = null;
            try {
                reader = new DataFileStream(inStream, datum);
            } catch (IOException e) {
                throw new RuntimeException("Invalid avro format, path " + path, e);
            }
            return reader.getSchema();
        } else {
            FileStatus[] statuses = null;
            if (fs.isDirectory(path)) {
                // this is a directory, get schemas from all subfiles
                statuses = fs.listStatus(path);
                if (statuses == null || statuses.length == 0)
                    throw new IllegalArgumentException("No files in directory " + path);
            } else {
                // this is wildcard path, get schemas from all matched files
                statuses = fs.globStatus(path);
                if (statuses == null || statuses.length == 0)
                    throw new IllegalArgumentException("No matches for path pattern " + path);
            }
            List<Schema> schemas = new ArrayList<Schema>();
            for (FileStatus status : statuses) {
                if (!HadoopUtils.shouldPathBeIgnored(status.getPath())) {
                    schemas.add(getSchemaFromPath(fs, status.getPath()));
                }
            }
            // now check that all the schemas are the same
            if (schemas.size() > 0) {
                Schema schema = schemas.get(0);
                for (int i = 1; i < schemas.size(); i++) if (!schema.equals(schemas.get(i)))
                    throw new IllegalArgumentException("The directory " + path + " contains heterogeneous schemas: found both '" + schema + "' and '" + schemas.get(i) + "'.");
                return schema;
            } else {
                throw new IllegalArgumentException("No valid metadata file found for path " + path);
            }
        }
    } catch (Exception e) {
        throw new RuntimeException("Error getting schema for path " + path, e);
    }
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) IOException(java.io.IOException) DataFileStream(org.apache.avro.file.DataFileStream) IOException(java.io.IOException) BufferedInputStream(java.io.BufferedInputStream)

Aggregations

GenericDatumReader (org.apache.avro.generic.GenericDatumReader)47 GenericRecord (org.apache.avro.generic.GenericRecord)32 Schema (org.apache.avro.Schema)20 IOException (java.io.IOException)15 File (java.io.File)10 DataFileStream (org.apache.avro.file.DataFileStream)10 Decoder (org.apache.avro.io.Decoder)8 ByteArrayInputStream (java.io.ByteArrayInputStream)7 GenericData (org.apache.avro.generic.GenericData)7 DataFileReader (org.apache.avro.file.DataFileReader)6 Test (org.junit.Test)6 ArrayList (java.util.ArrayList)5 JsonDecoder (org.apache.avro.io.JsonDecoder)5 ParseException (io.druid.java.util.common.parsers.ParseException)4 FileInputStream (java.io.FileInputStream)4 DataFileWriter (org.apache.avro.file.DataFileWriter)4 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 InputStream (java.io.InputStream)3 Map (java.util.Map)3