Search in sources :

Example 76 with Writable

use of org.apache.hadoop.io.Writable in project hive by apache.

the class ColumnarStorageBench method createRecord.

private ArrayWritable createRecord(final List<TypeInfo> columnTypes) {
    Writable[] fields = new Writable[columnTypes.size()];
    int pos = 0;
    for (TypeInfo type : columnTypes) {
        switch(type.getCategory()) {
            case PRIMITIVE:
                fields[pos++] = getPrimitiveWritable((PrimitiveTypeInfo) type);
                break;
            case LIST:
                {
                    List<TypeInfo> elementType = new ArrayList<TypeInfo>();
                    elementType.add(((ListTypeInfo) type).getListElementTypeInfo());
                    fields[pos++] = createRecord(elementType);
                }
                break;
            case MAP:
                {
                    List<TypeInfo> keyValueType = new ArrayList<TypeInfo>();
                    keyValueType.add(((MapTypeInfo) type).getMapKeyTypeInfo());
                    keyValueType.add(((MapTypeInfo) type).getMapValueTypeInfo());
                    fields[pos++] = record(createRecord(keyValueType));
                }
                break;
            case STRUCT:
                {
                    List<TypeInfo> elementType = ((StructTypeInfo) type).getAllStructFieldTypeInfos();
                    fields[pos++] = createRecord(elementType);
                }
                break;
            default:
                throw new IllegalStateException("Invalid column type: " + type);
        }
    }
    return record(fields);
}
Also used : ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) Writable(org.apache.hadoop.io.Writable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) List(java.util.List) ArrayList(java.util.ArrayList) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 77 with Writable

use of org.apache.hadoop.io.Writable in project crunch by cloudera.

the class TupleWritable method readFields.

/**
   * {@inheritDoc}
   */
@SuppressWarnings("unchecked")
public // No static typeinfo on Tuples
void readFields(DataInput in) throws IOException {
    int card = WritableUtils.readVInt(in);
    values = new Writable[card];
    written = WritableUtils.readVLong(in);
    Class<? extends Writable>[] cls = new Class[card];
    try {
        for (int i = 0; i < card; ++i) {
            if (has(i)) {
                cls[i] = Class.forName(Text.readString(in)).asSubclass(Writable.class);
            }
        }
        for (int i = 0; i < card; ++i) {
            if (has(i)) {
                values[i] = cls[i].newInstance();
                values[i].readFields(in);
            }
        }
    } catch (ClassNotFoundException e) {
        throw (IOException) new IOException("Failed tuple init").initCause(e);
    } catch (IllegalAccessException e) {
        throw (IOException) new IOException("Failed tuple init").initCause(e);
    } catch (InstantiationException e) {
        throw (IOException) new IOException("Failed tuple init").initCause(e);
    }
}
Also used : Writable(org.apache.hadoop.io.Writable) IOException(java.io.IOException)

Example 78 with Writable

use of org.apache.hadoop.io.Writable in project Solbase by Photobucket.

the class SolbaseIndexReducer method reduce.

public void reduce(BytesWritable key, Iterable<MapWritable> values, Context context) throws IOException, InterruptedException {
    byte[] _key = null;
    int counter = 0;
    int dupCount = 0;
    // since key is checksum, we should do dedupping here
    // TODO: for now, i'm only retrieving one and ignoring rest
    boolean first = true;
    for (MapWritable writable : values) {
        if (first) {
            first = false;
            Iterator<Writable> itr = writable.keySet().iterator();
            while (itr.hasNext()) {
                BytesWritable wrtKey = (BytesWritable) itr.next();
                Writable wrt = writable.get(wrtKey);
                if (wrt instanceof DocumentPutWritable) {
                    DocumentPutWritable docBytes = (DocumentPutWritable) wrt;
                    String globalId = docBytes.getGlobalId();
                    int docId = docBytes.getDocId();
                    Put mapping = new Put(Bytes.toBytes(globalId));
                    mapping.add(Bytes.toBytes("docId"), Bytes.toBytes(""), Bytes.toBytes(docId));
                    context.write(new ImmutableBytesWritable(SolbaseUtil.docKeyIdMapTable), mapping);
                    context.getCounter(Counters.TOTAL_DOC_KEY_ID_MAP).increment(1);
                    List<String> fieldKeys = docBytes.getFieldKeys();
                    List<byte[]> fieldValues = docBytes.getFieldValues();
                    List<Term> allTerms = docBytes.getAllTerms();
                    byte[] md5DocId = SolbaseUtil.randomize(docId);
                    Put documentPut = new Put(md5DocId);
                    // Store each field as a column under this docId
                    for (int i = 0; i < fieldKeys.size(); i++) {
                        String fieldKey = fieldKeys.get(i);
                        byte[] fieldValue = fieldValues.get(i);
                        documentPut.add(Bytes.toBytes("field"), Bytes.toBytes(fieldKey), fieldValue);
                    }
                    // Finally, Store meta-data so we can delete this
                    // document
                    documentPut.add(Bytes.toBytes("allTerms"), Bytes.toBytes("allTerms"), SolbaseUtil.toBytes(allTerms).array());
                    context.write(new ImmutableBytesWritable(SolbaseUtil.docTable), documentPut);
                    context.getCounter(Counters.TOTAL_DOCS).increment(1);
                    counter++;
                } else if (wrt instanceof TermDocMetadataWritable) {
                    // gather all of docs given field key (field/value)
                    TermDocMetadataWritable metadata = (TermDocMetadataWritable) wrt;
                    // convert key to byte array
                    // byte[] fieldTermKey = key.getBytes();
                    byte[] termValue = metadata.getTermDocMetadata();
                    _key = metadata.getFieldTermKey();
                    int docId = metadata.getDocId();
                    Put put = null;
                    switch(TermDocMetadataLoader.storageType) {
                        case KEY_ONLY:
                            {
                                put = new Put(Bytes.add(Bytes.add(_key, SolbaseUtil.delimiter, Bytes.toBytes(docId)), termValue));
                                put.add(SolbaseUtil.termVectorDocColumnFamilyName, Bytes.toBytes(""), Bytes.toBytes(""));
                            }
                            break;
                        case WIDE_ROW:
                            int chunkId = TermDocMetadataLoader.getChunkId(docId);
                            put = new Put(Bytes.add(_key, SolbaseUtil.delimiter, Bytes.toBytes(chunkId)));
                            put.add(SolbaseUtil.termVectorDocColumnFamilyName, Bytes.toBytes(docId), termValue);
                            break;
                        case NARROW_ROW:
                        default:
                            {
                                put = new Put(Bytes.add(_key, SolbaseUtil.delimiter, Bytes.toBytes(docId)));
                                put.add(SolbaseUtil.termVectorDocColumnFamilyName, Bytes.toBytes(""), termValue);
                            }
                    }
                    context.write(new ImmutableBytesWritable(SolbaseUtil.termVectorTable), put);
                    context.getCounter(Counters.TOTAL_TERM_VECTORS).increment(1);
                    counter++;
                } else {
                    System.out.println("else: " + writable.getClass());
                    context.getCounter(Counters.TOTAL_INVALID).increment(1);
                }
            }
        } else {
            dupCount++;
        }
    }
    context.getCounter(Counters.DUPLICATE_ROWS).increment(dupCount);
}
Also used : ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) Writable(org.apache.hadoop.io.Writable) DocumentPutWritable(org.solbase.indexer.writable.DocumentPutWritable) MapWritable(org.apache.hadoop.io.MapWritable) TermDocMetadataWritable(org.solbase.indexer.writable.TermDocMetadataWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) MapWritable(org.apache.hadoop.io.MapWritable) Term(org.apache.lucene.index.Term) Put(org.apache.hadoop.hbase.client.Put) TermDocMetadataWritable(org.solbase.indexer.writable.TermDocMetadataWritable) DocumentPutWritable(org.solbase.indexer.writable.DocumentPutWritable)

Example 79 with Writable

use of org.apache.hadoop.io.Writable in project haivvreo by jghoman.

the class TestAvroSerializer method serializeAndDeserialize.

/**
   * Verify that we can serialize an avro value by taking one, running it through
   * the deser process and then serialize it again.
   */
private GenericRecord serializeAndDeserialize(String recordValue, String fieldName, Object fieldValue) throws SerDeException, IOException {
    Schema s = buildSchema(recordValue);
    GenericData.Record r = new GenericData.Record(s);
    r.put(fieldName, fieldValue);
    AvroSerializer as = new AvroSerializer();
    AvroDeserializer ad = new AvroDeserializer();
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
    ObjectInspector oi = aoig.getObjectInspector();
    List<String> columnNames = aoig.getColumnNames();
    List<TypeInfo> columnTypes = aoig.getColumnTypes();
    AvroGenericRecordWritable agrw = Utils.serializeAndDeserializeRecord(r);
    Object obj = ad.deserialize(columnNames, columnTypes, agrw, s);
    Writable result = as.serialize(obj, oi, columnNames, columnTypes, s);
    assertTrue(result instanceof AvroGenericRecordWritable);
    GenericRecord r2 = ((AvroGenericRecordWritable) result).getRecord();
    assertEquals(s, r2.getSchema());
    return r2;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Schema(org.apache.avro.Schema) Writable(org.apache.hadoop.io.Writable) GenericData(org.apache.avro.generic.GenericData) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 80 with Writable

use of org.apache.hadoop.io.Writable in project goldenorb by jzachr.

the class OrbPartition method compute.

public void compute() {
    if (getSuperStep() == 1) {
        enterBarrier("superStep1Barrier");
        processingVoteToHaltSet = new VoteToHaltSet(vertices.keySet());
        int count = 0;
        List<Vertex<?, ?, ?>> vertexList = new ArrayList<Vertex<?, ?, ?>>();
        List<List<Message<? extends Writable>>> messageList = new ArrayList<List<Message<? extends Writable>>>();
        int verticesLeft = vertices.keySet().size();
        for (Vertex<?, ?, ?> v : vertices.values()) {
            // count += 1;
            // verticesLeft -= 1;
            // vertexList.add(v);
            // messageList.add(new ArrayList<Message<? extends Writable>>());
            //
            // if (count >= getOrbConf().getVerticesPerBlock() || verticesLeft == 0) {
            // computeExecutor.execute(new VertexComputer(vertexList, messageList));
            // vertexList = new ArrayList<Vertex<?,?,?>>();
            // messageList = new ArrayList<List<Message<? extends Writable>>>();
            // count = 0;
            // }
            v.compute(new ArrayList());
        }
        synchronized (this) {
            while (!processingVoteToHaltSet.isEmpty()) {
                try {
                    wait(1000);
                    LOG.debug(Integer.toString(processingVoteToHaltSet.size()));
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
        }
    } else {
        if (processingInboundMessageQueue.getVerticesWithMessages().size() == 0) {
            hasMoreToProcess = false;
            if (enterAllDoneBarrier("superStepBarrier", getSuperStep(), true)) {
                doneComputing();
            }
        } else {
            enterAllDoneBarrier("superStepBarrier", getSuperStep(), false);
            int count = 0;
            List<Vertex<?, ?, ?>> vertexList = new ArrayList<Vertex<?, ?, ?>>();
            List<List<Message<? extends Writable>>> messageList = new ArrayList<List<Message<? extends Writable>>>();
            int verticesLeft = processingInboundMessageQueue.getVerticesWithMessages().size();
            for (String s : processingInboundMessageQueue.getVerticesWithMessages()) {
                // count += 1;
                // verticesLeft -= 1;
                // vertexList.add(vertices.get(s));
                // messageList.add(processingInboundMessageQueue.getMessage(s));
                //
                // if (count >= getOrbConf().getVerticesPerBlock() || verticesLeft == 0) {
                // computeExecutor.execute(new VertexComputer(vertexList, messageList));
                // vertexList = new ArrayList<Vertex<?,?,?>>();
                // messageList = new ArrayList<List<Message<? extends Writable>>>();
                // count = 0;
                // }
                vertices.get(s).compute((Collection) processingInboundMessageQueue.getMessage(s));
            }
            synchronized (this) {
                while (!processingVoteToHaltSet.isEmpty()) {
                    try {
                        wait(10000);
                    } catch (InterruptedException e) {
                        e.printStackTrace();
                    }
                    LOG.debug(Integer.toString(processingVoteToHaltSet.size()));
                }
            }
        }
    }
    enterSuperStepBarrier("doneComputingVerticesBarrier", getSuperStep());
    outboundMessageQueue.sendRemainingMessages();
    enterSuperStepBarrier("doneSendingMessagesBarrier", getSuperStep());
    LOG.info("Partition " + getPartitionID() + " going back to run portion " + Integer.toString(getSuperStep()));
}
Also used : ArrayList(java.util.ArrayList) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) List(java.util.List) ArrayList(java.util.ArrayList)

Aggregations

Writable (org.apache.hadoop.io.Writable)221 IntWritable (org.apache.hadoop.io.IntWritable)103 LongWritable (org.apache.hadoop.io.LongWritable)91 BooleanWritable (org.apache.hadoop.io.BooleanWritable)75 BytesWritable (org.apache.hadoop.io.BytesWritable)74 FloatWritable (org.apache.hadoop.io.FloatWritable)73 Test (org.junit.Test)68 IOException (java.io.IOException)43 Path (org.apache.hadoop.fs.Path)43 Text (org.apache.hadoop.io.Text)40 ArrayWritable (org.apache.hadoop.io.ArrayWritable)37 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)34 SequenceFile (org.apache.hadoop.io.SequenceFile)32 Configuration (org.apache.hadoop.conf.Configuration)31 DoubleWritable (org.apache.hadoop.io.DoubleWritable)30 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)29 ByteWritable (org.apache.hadoop.io.ByteWritable)28 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)25 FileSystem (org.apache.hadoop.fs.FileSystem)24 ArrayList (java.util.ArrayList)23