Search in sources :

Example 81 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project alluxio by Alluxio.

the class KeyValueRecordReader method nextKeyValue.

@Override
public synchronized boolean nextKeyValue() throws IOException {
    if (!mKeyValuePairIterator.hasNext()) {
        return false;
    }
    KeyValuePair pair;
    try {
        pair = mKeyValuePairIterator.next();
    } catch (AlluxioException e) {
        throw new IOException(e);
    }
    // TODO(cc): Implement a ByteBufferInputStream which is backed by a ByteBuffer so we could
    // benefit from zero-copy.
    mCurrentKey.set(new BytesWritable(BufferUtils.newByteArrayFromByteBuffer(pair.getKey())));
    mCurrentValue.set(new BytesWritable(BufferUtils.newByteArrayFromByteBuffer(pair.getValue())));
    mNumVisitedKeyValuePairs++;
    return true;
}
Also used : KeyValuePair(alluxio.client.keyvalue.KeyValuePair) BytesWritable(org.apache.hadoop.io.BytesWritable) IOException(java.io.IOException) AlluxioException(alluxio.exception.AlluxioException)

Example 82 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project voldemort by voldemort.

the class AvroStoreBuilderReducer method reduce.

@SuppressWarnings("unchecked")
@Override
public void reduce(AvroKey<ByteBuffer> keyAvro, Iterator<AvroValue<ByteBuffer>> iterator, OutputCollector<Text, Text> collector, Reporter reporter) throws IOException {
    ByteBuffer keyBuffer = keyAvro.datum();
    keyBuffer.rewind();
    byte[] keyBytes = null, valueBytes;
    keyBytes = new byte[keyBuffer.remaining()];
    keyBuffer.get(keyBytes);
    BytesWritable key = new BytesWritable(keyBytes);
    ArrayList<BytesWritable> valueList = new ArrayList();
    while (iterator.hasNext()) {
        ByteBuffer writable = iterator.next().datum();
        writable.rewind();
        // BytesWritable writable = iterator.next();
        valueBytes = null;
        valueBytes = new byte[writable.remaining()];
        writable.get(valueBytes);
        BytesWritable value = new BytesWritable(valueBytes);
        valueList.add(value);
    }
    writer.write(key, valueList.iterator(), reporter);
}
Also used : ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) ByteBuffer(java.nio.ByteBuffer)

Example 83 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project voldemort by voldemort.

the class HadoopStoreWriter method write.

@Override
public void write(BytesWritable key, Iterator<BytesWritable> iterator, Reporter reporter) throws IOException {
    // Read chunk id
    int chunkId = ReadOnlyUtils.chunk(key.getBytes(), getNumChunks());
    initFileStreams(chunkId);
    // Write key and position
    this.indexFileStream[chunkId].write(key.getBytes(), 0, key.getLength());
    this.indexFileSizeInBytes[chunkId] += key.getLength();
    this.indexFileStream[chunkId].writeInt(this.position[chunkId]);
    this.indexFileSizeInBytes[chunkId] += ByteUtils.SIZE_OF_INT;
    // Run key through checksum digest
    if (this.checkSumDigestIndex[chunkId] != null) {
        this.checkSumDigestIndex[chunkId].update(key.getBytes(), 0, key.getLength());
        this.checkSumDigestIndex[chunkId].update(this.position[chunkId]);
    }
    short numTuples = 0;
    ByteArrayOutputStream stream = new ByteArrayOutputStream();
    DataOutputStream valueStream = new DataOutputStream(stream);
    while (iterator.hasNext()) {
        BytesWritable writable = iterator.next();
        byte[] valueBytes = writable.getBytes();
        int offsetTillNow = 0;
        /**
             * Below, we read the node id, partition id and replica type of each record
             * coming in, and validate that it is consistent with the other IDs seen so
             * far. This is to catch potential regressions to the shuffling logic in:
             *
             * {@link AbstractStoreBuilderConfigurable#getPartition(byte[], byte[], int)}
             */
        // Read node Id
        int currentNodeId = ByteUtils.readInt(valueBytes, offsetTillNow);
        if (this.nodeId == -1) {
            this.nodeId = currentNodeId;
        } else if (this.nodeId != currentNodeId) {
            throw new IllegalArgumentException("Should not get various nodeId shuffled to us! " + "First nodeId seen: " + this.nodeId + ", currentNodeId: " + currentNodeId);
        }
        offsetTillNow += ByteUtils.SIZE_OF_INT;
        // Read partition id
        int currentPartitionId = ByteUtils.readInt(valueBytes, offsetTillNow);
        if (this.partitionId == -1) {
            this.partitionId = currentPartitionId;
        } else if (this.partitionId != currentPartitionId) {
            throw new IllegalArgumentException("Should not get various partitionId shuffled to us! " + "First partitionId seen: " + this.partitionId + ", currentPartitionId: " + currentPartitionId);
        }
        offsetTillNow += ByteUtils.SIZE_OF_INT;
        // Read replica type
        if (getSaveKeys()) {
            int currentReplicaType = (int) ByteUtils.readBytes(valueBytes, offsetTillNow, ByteUtils.SIZE_OF_BYTE);
            if (this.replicaType == -1) {
                this.replicaType = currentReplicaType;
            } else if (this.replicaType != currentReplicaType) {
                throw new IllegalArgumentException("Should not get various replicaType shuffled to us! " + "First replicaType seen: " + this.replicaType + ", currentReplicaType: " + currentReplicaType);
            }
            if (getBuildPrimaryReplicasOnly() && this.replicaType > 0) {
                throw new IllegalArgumentException("Should not get any replicaType > 0 shuffled to us" + " when buildPrimaryReplicasOnly mode is enabled!");
            }
            offsetTillNow += ByteUtils.SIZE_OF_BYTE;
        }
        int valueLength = writable.getLength() - offsetTillNow;
        if (getSaveKeys()) {
            // Write ( key_length, value_length, key, value )
            valueStream.write(valueBytes, offsetTillNow, valueLength);
        } else {
            // Write (value_length + value)
            valueStream.writeInt(valueLength);
            valueStream.write(valueBytes, offsetTillNow, valueLength);
        }
        numTuples++;
        // malicious ( We obviously expect collisions when we save keys )
        if (!getSaveKeys() && numTuples > 1)
            throw new VoldemortException("Duplicate keys detected for md5 sum " + ByteUtils.toHexString(ByteUtils.copy(key.getBytes(), 0, key.getLength())));
    }
    if (numTuples < 0) {
        // Overflow
        throw new VoldemortException("Found too many collisions: chunk " + chunkId + " has exceeded " + MAX_HASH_COLLISIONS + " collisions.");
    } else if (numTuples > 1) {
        // Update number of collisions + max keys per collision
        reporter.incrCounter(CollisionCounter.NUM_COLLISIONS, 1);
        long numCollisions = reporter.getCounter(CollisionCounter.MAX_COLLISIONS).getCounter();
        if (numTuples > numCollisions) {
            reporter.incrCounter(CollisionCounter.MAX_COLLISIONS, numTuples - numCollisions);
        }
    }
    // Flush the value
    valueStream.flush();
    byte[] value = stream.toByteArray();
    // First, if save keys flag set the number of keys
    if (getSaveKeys()) {
        this.valueFileStream[chunkId].writeShort(numTuples);
        this.valueFileSizeInBytes[chunkId] += ByteUtils.SIZE_OF_SHORT;
        this.position[chunkId] += ByteUtils.SIZE_OF_SHORT;
        if (this.checkSumDigestValue[chunkId] != null) {
            this.checkSumDigestValue[chunkId].update(numTuples);
        }
    }
    this.valueFileStream[chunkId].write(value);
    this.valueFileSizeInBytes[chunkId] += value.length;
    this.position[chunkId] += value.length;
    if (this.checkSumDigestValue[chunkId] != null) {
        this.checkSumDigestValue[chunkId].update(value);
    }
    if (this.position[chunkId] < 0)
        throw new VoldemortException("Chunk overflow exception: chunk " + chunkId + " has exceeded " + MAX_CHUNK_SIZE + " bytes.");
}
Also used : FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) DataOutputStream(java.io.DataOutputStream) BytesWritable(org.apache.hadoop.io.BytesWritable) ByteArrayOutputStream(java.io.ByteArrayOutputStream) VoldemortException(voldemort.VoldemortException)

Example 84 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project cdap by caskdata.

the class BytesStreamEventDecoder method getEventBody.

private BytesWritable getEventBody(StreamEvent event, BytesWritable result) {
    ByteBuffer body = event.getBody();
    if (body.hasArray()) {
        // which BytesWritable.set() does.
        if (body.array().length == body.remaining()) {
            return new BytesWritable(body.array());
        }
        // Otherwise, need to copy the byte[], done by the BytesWritable.set() method
        result.set(body.array(), body.arrayOffset() + body.position(), body.remaining());
        return result;
    }
    // Otherwise, need to copy to a new array
    byte[] copy = new byte[body.remaining()];
    body.mark();
    body.get(copy);
    body.reset();
    return new BytesWritable(copy);
}
Also used : BytesWritable(org.apache.hadoop.io.BytesWritable) ByteBuffer(java.nio.ByteBuffer)

Example 85 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project compiler by boalang.

the class AbstractCommit method parseJavaFile.

private boolean parseJavaFile(final String path, final ChangedFile.Builder fb, final String content, final String compliance, final int astLevel, final boolean storeOnError, Writer astWriter, String key) {
    try {
        final ASTParser parser = ASTParser.newParser(astLevel);
        parser.setKind(ASTParser.K_COMPILATION_UNIT);
        parser.setResolveBindings(true);
        parser.setSource(content.toCharArray());
        final Map options = JavaCore.getOptions();
        JavaCore.setComplianceOptions(compliance, options);
        parser.setCompilerOptions(options);
        final CompilationUnit cu = (CompilationUnit) parser.createAST(null);
        final JavaErrorCheckVisitor errorCheck = new JavaErrorCheckVisitor();
        cu.accept(errorCheck);
        if (!errorCheck.hasError || storeOnError) {
            final ASTRoot.Builder ast = ASTRoot.newBuilder();
            //final CommentsRoot.Builder comments = CommentsRoot.newBuilder();
            final Java7Visitor visitor;
            if (astLevel == AST.JLS8)
                visitor = new Java8Visitor(content, connector.nameIndices);
            else
                visitor = new Java7Visitor(content, connector.nameIndices);
            try {
                ast.addNamespaces(visitor.getNamespaces(cu));
                for (final String s : visitor.getImports()) ast.addImports(s);
            /*for (final Comment c : visitor.getComments())
						comments.addComments(c);*/
            } catch (final UnsupportedOperationException e) {
                return false;
            } catch (final Exception e) {
                if (debug)
                    System.err.println("Error visiting: " + path);
                e.printStackTrace();
                return false;
            }
            if (astWriter != null) {
                try {
                    astWriter.append(new Text(key), new BytesWritable(ast.build().toByteArray()));
                } catch (IOException e) {
                    e.printStackTrace();
                }
            } else
                fb.setAst(ast);
        //fb.setComments(comments);
        }
        return !errorCheck.hasError;
    } catch (final Exception e) {
        e.printStackTrace();
        return false;
    }
}
Also used : JavaErrorCheckVisitor(boa.datagen.util.JavaErrorCheckVisitor) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) Java7Visitor(boa.datagen.util.Java7Visitor) Java8Visitor(boa.datagen.util.Java8Visitor) ASTRoot(boa.types.Ast.ASTRoot)

Aggregations

BytesWritable (org.apache.hadoop.io.BytesWritable)275 Text (org.apache.hadoop.io.Text)73 LongWritable (org.apache.hadoop.io.LongWritable)59 Test (org.junit.Test)53 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)46 IntWritable (org.apache.hadoop.io.IntWritable)44 ArrayList (java.util.ArrayList)39 Path (org.apache.hadoop.fs.Path)38 IOException (java.io.IOException)36 Configuration (org.apache.hadoop.conf.Configuration)33 FloatWritable (org.apache.hadoop.io.FloatWritable)33 Writable (org.apache.hadoop.io.Writable)32 BooleanWritable (org.apache.hadoop.io.BooleanWritable)31 List (java.util.List)30 SequenceFile (org.apache.hadoop.io.SequenceFile)27 Random (java.util.Random)24 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)24 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)23 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)22 FileSystem (org.apache.hadoop.fs.FileSystem)21