use of org.apache.hadoop.io.BytesWritable in project hive by apache.
the class CreateSequenceFile method main.
public static void main(String[] args) throws Exception {
// Read parameters
int lines = 10;
List<String> extraArgs = new ArrayList<String>();
for (int ai = 0; ai < args.length; ai++) {
if (args[ai].equals("-line") && ai + 1 < args.length) {
lines = Integer.parseInt(args[ai + 1]);
ai++;
} else {
extraArgs.add(args[ai]);
}
}
if (extraArgs.size() != 1) {
usage();
}
JobConf conf = new JobConf(CreateSequenceFile.class);
ThriftSerializer serializer = new ThriftSerializer();
// Open files
SequenceFile.Writer writer = new SequenceFile.Writer(FileSystem.get(conf), conf, new Path(extraArgs.get(0)), BytesWritable.class, BytesWritable.class);
// write to file
BytesWritable key = new BytesWritable();
Random rand = new Random(20081215);
for (int i = 0; i < lines; i++) {
ArrayList<Integer> alist = new ArrayList<Integer>();
alist.add(i);
alist.add(i * 2);
alist.add(i * 3);
ArrayList<String> slist = new ArrayList<String>();
slist.add("" + i * 10);
slist.add("" + i * 100);
slist.add("" + i * 1000);
ArrayList<IntString> islist = new ArrayList<IntString>();
islist.add(new IntString(i * i, "" + i * i * i, i));
HashMap<String, String> hash = new HashMap<String, String>();
hash.put("key_" + i, "value_" + i);
Map<String, Map<String, Map<String, PropValueUnion>>> unionMap = new HashMap<String, Map<String, Map<String, PropValueUnion>>>();
Map<String, Map<String, PropValueUnion>> erMap = new HashMap<String, Map<String, PropValueUnion>>();
Map<String, PropValueUnion> attrMap = new HashMap<String, PropValueUnion>();
erMap.put("erVal" + i, attrMap);
attrMap.put("value_" + i, PropValueUnion.doubleValue(1.0));
unionMap.put("key_" + i, erMap);
Complex complex = new Complex(rand.nextInt(), "record_" + (new Integer(i)).toString(), alist, slist, islist, hash, unionMap, PropValueUnion.stringValue("test" + i), PropValueUnion.unionMStringString(hash), PropValueUnion.lString(slist));
Writable value = serializer.serialize(complex);
writer.append(key, value);
}
// Add an all-null record
Complex complex = new Complex(0, null, null, null, null, null, null, null, null, null);
Writable value = serializer.serialize(complex);
writer.append(key, value);
// Close files
writer.close();
}
use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.
the class BinaryProtocol method writeObject.
/**
* Write the given object to the stream. If it is a Text or BytesWritable,
* write it directly. Otherwise, write it to a buffer and then write the
* length and data to the stream.
* @param obj the object to write
* @throws IOException
*/
private void writeObject(Writable obj) throws IOException {
// in C++ as the natural translations.
if (obj instanceof Text) {
Text t = (Text) obj;
int len = t.getLength();
WritableUtils.writeVInt(stream, len);
stream.write(t.getBytes(), 0, len);
} else if (obj instanceof BytesWritable) {
BytesWritable b = (BytesWritable) obj;
int len = b.getLength();
WritableUtils.writeVInt(stream, len);
stream.write(b.getBytes(), 0, len);
} else {
buffer.reset();
obj.write(buffer);
int length = buffer.getLength();
WritableUtils.writeVInt(stream, length);
stream.write(buffer.getData(), 0, length);
}
}
use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.
the class TestSequenceFileAsBinaryInputFormat method testBinary.
@Test
public void testBinary() throws IOException {
JobConf job = new JobConf();
FileSystem fs = FileSystem.getLocal(job);
Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
Path file = new Path(dir, "testbinary.seq");
Random r = new Random();
long seed = r.nextLong();
r.setSeed(seed);
fs.delete(dir, true);
FileInputFormat.setInputPaths(job, dir);
Text tkey = new Text();
Text tval = new Text();
SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, file, Text.class, Text.class);
try {
for (int i = 0; i < RECORDS; ++i) {
tkey.set(Integer.toString(r.nextInt(), 36));
tval.set(Long.toString(r.nextLong(), 36));
writer.append(tkey, tval);
}
} finally {
writer.close();
}
InputFormat<BytesWritable, BytesWritable> bformat = new SequenceFileAsBinaryInputFormat();
int count = 0;
r.setSeed(seed);
BytesWritable bkey = new BytesWritable();
BytesWritable bval = new BytesWritable();
Text cmpkey = new Text();
Text cmpval = new Text();
DataInputBuffer buf = new DataInputBuffer();
final int NUM_SPLITS = 3;
FileInputFormat.setInputPaths(job, file);
for (InputSplit split : bformat.getSplits(job, NUM_SPLITS)) {
RecordReader<BytesWritable, BytesWritable> reader = bformat.getRecordReader(split, job, Reporter.NULL);
try {
while (reader.next(bkey, bval)) {
tkey.set(Integer.toString(r.nextInt(), 36));
tval.set(Long.toString(r.nextLong(), 36));
buf.reset(bkey.getBytes(), bkey.getLength());
cmpkey.readFields(buf);
buf.reset(bval.getBytes(), bval.getLength());
cmpval.readFields(buf);
assertTrue("Keys don't match: " + "*" + cmpkey.toString() + ":" + tkey.toString() + "*", cmpkey.toString().equals(tkey.toString()));
assertTrue("Vals don't match: " + "*" + cmpval.toString() + ":" + tval.toString() + "*", cmpval.toString().equals(tval.toString()));
++count;
}
} finally {
reader.close();
}
}
assertEquals("Some records not found", RECORDS, count);
}
use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.
the class TestSequenceFileAsBinaryOutputFormat method testBinary.
@Test
public void testBinary() throws IOException {
JobConf job = new JobConf();
FileSystem fs = FileSystem.getLocal(job);
Path dir = new Path(new Path(new Path(System.getProperty("test.build.data", ".")), FileOutputCommitter.TEMP_DIR_NAME), "_" + attempt);
Path file = new Path(dir, "testbinary.seq");
Random r = new Random();
long seed = r.nextLong();
r.setSeed(seed);
fs.delete(dir, true);
if (!fs.mkdirs(dir)) {
fail("Failed to create output directory");
}
job.set(JobContext.TASK_ATTEMPT_ID, attempt);
FileOutputFormat.setOutputPath(job, dir.getParent().getParent());
FileOutputFormat.setWorkOutputPath(job, dir);
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);
SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
BytesWritable bkey = new BytesWritable();
BytesWritable bval = new BytesWritable();
RecordWriter<BytesWritable, BytesWritable> writer = new SequenceFileAsBinaryOutputFormat().getRecordWriter(fs, job, file.toString(), Reporter.NULL);
IntWritable iwritable = new IntWritable();
DoubleWritable dwritable = new DoubleWritable();
DataOutputBuffer outbuf = new DataOutputBuffer();
LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
try {
for (int i = 0; i < RECORDS; ++i) {
iwritable = new IntWritable(r.nextInt());
iwritable.write(outbuf);
bkey.set(outbuf.getData(), 0, outbuf.getLength());
outbuf.reset();
dwritable = new DoubleWritable(r.nextDouble());
dwritable.write(outbuf);
bval.set(outbuf.getData(), 0, outbuf.getLength());
outbuf.reset();
writer.write(bkey, bval);
}
} finally {
writer.close(Reporter.NULL);
}
InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>();
int count = 0;
r.setSeed(seed);
DataInputBuffer buf = new DataInputBuffer();
final int NUM_SPLITS = 3;
SequenceFileInputFormat.addInputPath(job, file);
LOG.info("Reading data by SequenceFileInputFormat");
for (InputSplit split : iformat.getSplits(job, NUM_SPLITS)) {
RecordReader<IntWritable, DoubleWritable> reader = iformat.getRecordReader(split, job, Reporter.NULL);
try {
int sourceInt;
double sourceDouble;
while (reader.next(iwritable, dwritable)) {
sourceInt = r.nextInt();
sourceDouble = r.nextDouble();
assertEquals("Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get());
assertTrue("Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0);
++count;
}
} finally {
reader.close();
}
}
assertEquals("Some records not found", RECORDS, count);
}
use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.
the class TestSequenceFileInputFormat method testFormat.
@Test
public void testFormat() throws Exception {
JobConf job = new JobConf(conf);
FileSystem fs = FileSystem.getLocal(conf);
Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
Path file = new Path(dir, "test.seq");
Reporter reporter = Reporter.NULL;
int seed = new Random().nextInt();
//LOG.info("seed = "+seed);
Random random = new Random(seed);
fs.delete(dir, true);
FileInputFormat.setInputPaths(job, dir);
// for a variety of lengths
for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
//LOG.info("creating; entries = " + length);
// create a file with length entries
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, BytesWritable.class);
try {
for (int i = 0; i < length; i++) {
IntWritable key = new IntWritable(i);
byte[] data = new byte[random.nextInt(10)];
random.nextBytes(data);
BytesWritable value = new BytesWritable(data);
writer.append(key, value);
}
} finally {
writer.close();
}
// try splitting the file in a variety of sizes
InputFormat<IntWritable, BytesWritable> format = new SequenceFileInputFormat<IntWritable, BytesWritable>();
IntWritable key = new IntWritable();
BytesWritable value = new BytesWritable();
for (int i = 0; i < 3; i++) {
int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
//LOG.info("splitting: requesting = " + numSplits);
InputSplit[] splits = format.getSplits(job, numSplits);
//LOG.info("splitting: got = " + splits.length);
// check each split
BitSet bits = new BitSet(length);
for (int j = 0; j < splits.length; j++) {
RecordReader<IntWritable, BytesWritable> reader = format.getRecordReader(splits[j], job, reporter);
try {
int count = 0;
while (reader.next(key, value)) {
// if (bits.get(key.get())) {
// LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
// LOG.info("@"+reader.getPos());
// }
assertFalse("Key in multiple partitions.", bits.get(key.get()));
bits.set(key.get());
count++;
}
//LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
} finally {
reader.close();
}
}
assertEquals("Some keys in no partition.", length, bits.cardinality());
}
}
}
Aggregations