use of org.apache.hadoop.io.Writable in project hive by apache.
the class ColumnarStorageBench method createRecord.
private ArrayWritable createRecord(final List<TypeInfo> columnTypes) {
Writable[] fields = new Writable[columnTypes.size()];
int pos = 0;
for (TypeInfo type : columnTypes) {
switch(type.getCategory()) {
case PRIMITIVE:
fields[pos++] = getPrimitiveWritable((PrimitiveTypeInfo) type);
break;
case LIST:
{
List<TypeInfo> elementType = new ArrayList<TypeInfo>();
elementType.add(((ListTypeInfo) type).getListElementTypeInfo());
fields[pos++] = createRecord(elementType);
}
break;
case MAP:
{
List<TypeInfo> keyValueType = new ArrayList<TypeInfo>();
keyValueType.add(((MapTypeInfo) type).getMapKeyTypeInfo());
keyValueType.add(((MapTypeInfo) type).getMapValueTypeInfo());
fields[pos++] = record(createRecord(keyValueType));
}
break;
case STRUCT:
{
List<TypeInfo> elementType = ((StructTypeInfo) type).getAllStructFieldTypeInfos();
fields[pos++] = createRecord(elementType);
}
break;
default:
throw new IllegalStateException("Invalid column type: " + type);
}
}
return record(fields);
}
use of org.apache.hadoop.io.Writable in project crunch by cloudera.
the class TupleWritable method readFields.
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
public // No static typeinfo on Tuples
void readFields(DataInput in) throws IOException {
int card = WritableUtils.readVInt(in);
values = new Writable[card];
written = WritableUtils.readVLong(in);
Class<? extends Writable>[] cls = new Class[card];
try {
for (int i = 0; i < card; ++i) {
if (has(i)) {
cls[i] = Class.forName(Text.readString(in)).asSubclass(Writable.class);
}
}
for (int i = 0; i < card; ++i) {
if (has(i)) {
values[i] = cls[i].newInstance();
values[i].readFields(in);
}
}
} catch (ClassNotFoundException e) {
throw (IOException) new IOException("Failed tuple init").initCause(e);
} catch (IllegalAccessException e) {
throw (IOException) new IOException("Failed tuple init").initCause(e);
} catch (InstantiationException e) {
throw (IOException) new IOException("Failed tuple init").initCause(e);
}
}
use of org.apache.hadoop.io.Writable in project Solbase by Photobucket.
the class SolbaseIndexReducer method reduce.
public void reduce(BytesWritable key, Iterable<MapWritable> values, Context context) throws IOException, InterruptedException {
byte[] _key = null;
int counter = 0;
int dupCount = 0;
// since key is checksum, we should do dedupping here
// TODO: for now, i'm only retrieving one and ignoring rest
boolean first = true;
for (MapWritable writable : values) {
if (first) {
first = false;
Iterator<Writable> itr = writable.keySet().iterator();
while (itr.hasNext()) {
BytesWritable wrtKey = (BytesWritable) itr.next();
Writable wrt = writable.get(wrtKey);
if (wrt instanceof DocumentPutWritable) {
DocumentPutWritable docBytes = (DocumentPutWritable) wrt;
String globalId = docBytes.getGlobalId();
int docId = docBytes.getDocId();
Put mapping = new Put(Bytes.toBytes(globalId));
mapping.add(Bytes.toBytes("docId"), Bytes.toBytes(""), Bytes.toBytes(docId));
context.write(new ImmutableBytesWritable(SolbaseUtil.docKeyIdMapTable), mapping);
context.getCounter(Counters.TOTAL_DOC_KEY_ID_MAP).increment(1);
List<String> fieldKeys = docBytes.getFieldKeys();
List<byte[]> fieldValues = docBytes.getFieldValues();
List<Term> allTerms = docBytes.getAllTerms();
byte[] md5DocId = SolbaseUtil.randomize(docId);
Put documentPut = new Put(md5DocId);
// Store each field as a column under this docId
for (int i = 0; i < fieldKeys.size(); i++) {
String fieldKey = fieldKeys.get(i);
byte[] fieldValue = fieldValues.get(i);
documentPut.add(Bytes.toBytes("field"), Bytes.toBytes(fieldKey), fieldValue);
}
// Finally, Store meta-data so we can delete this
// document
documentPut.add(Bytes.toBytes("allTerms"), Bytes.toBytes("allTerms"), SolbaseUtil.toBytes(allTerms).array());
context.write(new ImmutableBytesWritable(SolbaseUtil.docTable), documentPut);
context.getCounter(Counters.TOTAL_DOCS).increment(1);
counter++;
} else if (wrt instanceof TermDocMetadataWritable) {
// gather all of docs given field key (field/value)
TermDocMetadataWritable metadata = (TermDocMetadataWritable) wrt;
// convert key to byte array
// byte[] fieldTermKey = key.getBytes();
byte[] termValue = metadata.getTermDocMetadata();
_key = metadata.getFieldTermKey();
int docId = metadata.getDocId();
Put put = null;
switch(TermDocMetadataLoader.storageType) {
case KEY_ONLY:
{
put = new Put(Bytes.add(Bytes.add(_key, SolbaseUtil.delimiter, Bytes.toBytes(docId)), termValue));
put.add(SolbaseUtil.termVectorDocColumnFamilyName, Bytes.toBytes(""), Bytes.toBytes(""));
}
break;
case WIDE_ROW:
int chunkId = TermDocMetadataLoader.getChunkId(docId);
put = new Put(Bytes.add(_key, SolbaseUtil.delimiter, Bytes.toBytes(chunkId)));
put.add(SolbaseUtil.termVectorDocColumnFamilyName, Bytes.toBytes(docId), termValue);
break;
case NARROW_ROW:
default:
{
put = new Put(Bytes.add(_key, SolbaseUtil.delimiter, Bytes.toBytes(docId)));
put.add(SolbaseUtil.termVectorDocColumnFamilyName, Bytes.toBytes(""), termValue);
}
}
context.write(new ImmutableBytesWritable(SolbaseUtil.termVectorTable), put);
context.getCounter(Counters.TOTAL_TERM_VECTORS).increment(1);
counter++;
} else {
System.out.println("else: " + writable.getClass());
context.getCounter(Counters.TOTAL_INVALID).increment(1);
}
}
} else {
dupCount++;
}
}
context.getCounter(Counters.DUPLICATE_ROWS).increment(dupCount);
}
use of org.apache.hadoop.io.Writable in project haivvreo by jghoman.
the class TestAvroSerializer method serializeAndDeserialize.
/**
* Verify that we can serialize an avro value by taking one, running it through
* the deser process and then serialize it again.
*/
private GenericRecord serializeAndDeserialize(String recordValue, String fieldName, Object fieldValue) throws SerDeException, IOException {
Schema s = buildSchema(recordValue);
GenericData.Record r = new GenericData.Record(s);
r.put(fieldName, fieldValue);
AvroSerializer as = new AvroSerializer();
AvroDeserializer ad = new AvroDeserializer();
AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
ObjectInspector oi = aoig.getObjectInspector();
List<String> columnNames = aoig.getColumnNames();
List<TypeInfo> columnTypes = aoig.getColumnTypes();
AvroGenericRecordWritable agrw = Utils.serializeAndDeserializeRecord(r);
Object obj = ad.deserialize(columnNames, columnTypes, agrw, s);
Writable result = as.serialize(obj, oi, columnNames, columnTypes, s);
assertTrue(result instanceof AvroGenericRecordWritable);
GenericRecord r2 = ((AvroGenericRecordWritable) result).getRecord();
assertEquals(s, r2.getSchema());
return r2;
}
use of org.apache.hadoop.io.Writable in project goldenorb by jzachr.
the class OrbPartition method compute.
public void compute() {
if (getSuperStep() == 1) {
enterBarrier("superStep1Barrier");
processingVoteToHaltSet = new VoteToHaltSet(vertices.keySet());
int count = 0;
List<Vertex<?, ?, ?>> vertexList = new ArrayList<Vertex<?, ?, ?>>();
List<List<Message<? extends Writable>>> messageList = new ArrayList<List<Message<? extends Writable>>>();
int verticesLeft = vertices.keySet().size();
for (Vertex<?, ?, ?> v : vertices.values()) {
// count += 1;
// verticesLeft -= 1;
// vertexList.add(v);
// messageList.add(new ArrayList<Message<? extends Writable>>());
//
// if (count >= getOrbConf().getVerticesPerBlock() || verticesLeft == 0) {
// computeExecutor.execute(new VertexComputer(vertexList, messageList));
// vertexList = new ArrayList<Vertex<?,?,?>>();
// messageList = new ArrayList<List<Message<? extends Writable>>>();
// count = 0;
// }
v.compute(new ArrayList());
}
synchronized (this) {
while (!processingVoteToHaltSet.isEmpty()) {
try {
wait(1000);
LOG.debug(Integer.toString(processingVoteToHaltSet.size()));
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
} else {
if (processingInboundMessageQueue.getVerticesWithMessages().size() == 0) {
hasMoreToProcess = false;
if (enterAllDoneBarrier("superStepBarrier", getSuperStep(), true)) {
doneComputing();
}
} else {
enterAllDoneBarrier("superStepBarrier", getSuperStep(), false);
int count = 0;
List<Vertex<?, ?, ?>> vertexList = new ArrayList<Vertex<?, ?, ?>>();
List<List<Message<? extends Writable>>> messageList = new ArrayList<List<Message<? extends Writable>>>();
int verticesLeft = processingInboundMessageQueue.getVerticesWithMessages().size();
for (String s : processingInboundMessageQueue.getVerticesWithMessages()) {
// count += 1;
// verticesLeft -= 1;
// vertexList.add(vertices.get(s));
// messageList.add(processingInboundMessageQueue.getMessage(s));
//
// if (count >= getOrbConf().getVerticesPerBlock() || verticesLeft == 0) {
// computeExecutor.execute(new VertexComputer(vertexList, messageList));
// vertexList = new ArrayList<Vertex<?,?,?>>();
// messageList = new ArrayList<List<Message<? extends Writable>>>();
// count = 0;
// }
vertices.get(s).compute((Collection) processingInboundMessageQueue.getMessage(s));
}
synchronized (this) {
while (!processingVoteToHaltSet.isEmpty()) {
try {
wait(10000);
} catch (InterruptedException e) {
e.printStackTrace();
}
LOG.debug(Integer.toString(processingVoteToHaltSet.size()));
}
}
}
}
enterSuperStepBarrier("doneComputingVerticesBarrier", getSuperStep());
outboundMessageQueue.sendRemainingMessages();
enterSuperStepBarrier("doneSendingMessagesBarrier", getSuperStep());
LOG.info("Partition " + getPartitionID() + " going back to run portion " + Integer.toString(getSuperStep()));
}
Aggregations