use of org.apache.hadoop.io.IntWritable in project hive by apache.
the class TestGenericUDFSubstringIndex method runAndVerify.
private void runAndVerify(String str, String delim, Integer count, String expResult, GenericUDF udf) throws HiveException {
DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new Text(str) : null);
DeferredObject valueObj1 = new DeferredJavaObject(delim != null ? new Text(delim) : delim);
DeferredObject valueObj2 = new DeferredJavaObject(count != null ? new IntWritable(count) : null);
DeferredObject[] args = { valueObj0, valueObj1, valueObj2 };
Text output = (Text) udf.evaluate(args);
assertEquals("substring_index() test ", expResult, output != null ? output.toString() : null);
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class ShuffleSchedulerImpl method hostFailed.
public synchronized void hostFailed(String hostname) {
if (hostFailures.containsKey(hostname)) {
IntWritable x = hostFailures.get(hostname);
x.set(x.get() + 1);
} else {
hostFailures.put(hostname, new IntWritable(1));
}
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestSequenceFileAsBinaryOutputFormat method testBinary.
@Test
public void testBinary() throws IOException {
JobConf job = new JobConf();
FileSystem fs = FileSystem.getLocal(job);
Path dir = new Path(new Path(new Path(System.getProperty("test.build.data", ".")), FileOutputCommitter.TEMP_DIR_NAME), "_" + attempt);
Path file = new Path(dir, "testbinary.seq");
Random r = new Random();
long seed = r.nextLong();
r.setSeed(seed);
fs.delete(dir, true);
if (!fs.mkdirs(dir)) {
fail("Failed to create output directory");
}
job.set(JobContext.TASK_ATTEMPT_ID, attempt);
FileOutputFormat.setOutputPath(job, dir.getParent().getParent());
FileOutputFormat.setWorkOutputPath(job, dir);
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);
SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
BytesWritable bkey = new BytesWritable();
BytesWritable bval = new BytesWritable();
RecordWriter<BytesWritable, BytesWritable> writer = new SequenceFileAsBinaryOutputFormat().getRecordWriter(fs, job, file.toString(), Reporter.NULL);
IntWritable iwritable = new IntWritable();
DoubleWritable dwritable = new DoubleWritable();
DataOutputBuffer outbuf = new DataOutputBuffer();
LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
try {
for (int i = 0; i < RECORDS; ++i) {
iwritable = new IntWritable(r.nextInt());
iwritable.write(outbuf);
bkey.set(outbuf.getData(), 0, outbuf.getLength());
outbuf.reset();
dwritable = new DoubleWritable(r.nextDouble());
dwritable.write(outbuf);
bval.set(outbuf.getData(), 0, outbuf.getLength());
outbuf.reset();
writer.write(bkey, bval);
}
} finally {
writer.close(Reporter.NULL);
}
InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>();
int count = 0;
r.setSeed(seed);
DataInputBuffer buf = new DataInputBuffer();
final int NUM_SPLITS = 3;
SequenceFileInputFormat.addInputPath(job, file);
LOG.info("Reading data by SequenceFileInputFormat");
for (InputSplit split : iformat.getSplits(job, NUM_SPLITS)) {
RecordReader<IntWritable, DoubleWritable> reader = iformat.getRecordReader(split, job, Reporter.NULL);
try {
int sourceInt;
double sourceDouble;
while (reader.next(iwritable, dwritable)) {
sourceInt = r.nextInt();
sourceDouble = r.nextDouble();
assertEquals("Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get());
assertTrue("Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0);
++count;
}
} finally {
reader.close();
}
}
assertEquals("Some records not found", RECORDS, count);
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestSequenceFileAsTextInputFormat method testFormat.
@Test
public void testFormat() throws Exception {
JobConf job = new JobConf(conf);
FileSystem fs = FileSystem.getLocal(conf);
Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
Path file = new Path(dir, "test.seq");
Reporter reporter = Reporter.NULL;
int seed = new Random().nextInt();
//LOG.info("seed = "+seed);
Random random = new Random(seed);
fs.delete(dir, true);
FileInputFormat.setInputPaths(job, dir);
// for a variety of lengths
for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
//LOG.info("creating; entries = " + length);
// create a file with length entries
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, LongWritable.class);
try {
for (int i = 0; i < length; i++) {
IntWritable key = new IntWritable(i);
LongWritable value = new LongWritable(10 * i);
writer.append(key, value);
}
} finally {
writer.close();
}
// try splitting the file in a variety of sizes
InputFormat<Text, Text> format = new SequenceFileAsTextInputFormat();
for (int i = 0; i < 3; i++) {
int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
//LOG.info("splitting: requesting = " + numSplits);
InputSplit[] splits = format.getSplits(job, numSplits);
//LOG.info("splitting: got = " + splits.length);
// check each split
BitSet bits = new BitSet(length);
for (int j = 0; j < splits.length; j++) {
RecordReader<Text, Text> reader = format.getRecordReader(splits[j], job, reporter);
Class readerClass = reader.getClass();
assertEquals("reader class is SequenceFileAsTextRecordReader.", SequenceFileAsTextRecordReader.class, readerClass);
Text value = reader.createValue();
Text key = reader.createKey();
try {
int count = 0;
while (reader.next(key, value)) {
// if (bits.get(key.get())) {
// LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
// LOG.info("@"+reader.getPos());
// }
int keyInt = Integer.parseInt(key.toString());
assertFalse("Key in multiple partitions.", bits.get(keyInt));
bits.set(keyInt);
count++;
}
//LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
} finally {
reader.close();
}
}
assertEquals("Some keys in no partition.", length, bits.cardinality());
}
}
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestSequenceFileInputFormat method testFormat.
@Test
public void testFormat() throws Exception {
JobConf job = new JobConf(conf);
FileSystem fs = FileSystem.getLocal(conf);
Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
Path file = new Path(dir, "test.seq");
Reporter reporter = Reporter.NULL;
int seed = new Random().nextInt();
//LOG.info("seed = "+seed);
Random random = new Random(seed);
fs.delete(dir, true);
FileInputFormat.setInputPaths(job, dir);
// for a variety of lengths
for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
//LOG.info("creating; entries = " + length);
// create a file with length entries
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, BytesWritable.class);
try {
for (int i = 0; i < length; i++) {
IntWritable key = new IntWritable(i);
byte[] data = new byte[random.nextInt(10)];
random.nextBytes(data);
BytesWritable value = new BytesWritable(data);
writer.append(key, value);
}
} finally {
writer.close();
}
// try splitting the file in a variety of sizes
InputFormat<IntWritable, BytesWritable> format = new SequenceFileInputFormat<IntWritable, BytesWritable>();
IntWritable key = new IntWritable();
BytesWritable value = new BytesWritable();
for (int i = 0; i < 3; i++) {
int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
//LOG.info("splitting: requesting = " + numSplits);
InputSplit[] splits = format.getSplits(job, numSplits);
//LOG.info("splitting: got = " + splits.length);
// check each split
BitSet bits = new BitSet(length);
for (int j = 0; j < splits.length; j++) {
RecordReader<IntWritable, BytesWritable> reader = format.getRecordReader(splits[j], job, reporter);
try {
int count = 0;
while (reader.next(key, value)) {
// if (bits.get(key.get())) {
// LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
// LOG.info("@"+reader.getPos());
// }
assertFalse("Key in multiple partitions.", bits.get(key.get()));
bits.set(key.get());
count++;
}
//LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
} finally {
reader.close();
}
}
assertEquals("Some keys in no partition.", length, bits.cardinality());
}
}
}
Aggregations