use of org.apache.hadoop.io.SequenceFile in project hadoop by apache.
the class TestCodec method sequenceFileCodecTest.
private static void sequenceFileCodecTest(Configuration conf, int lines, String codecClass, int blockSize) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
Path filePath = new Path("SequenceFileCodecTest." + codecClass);
// Configuration
conf.setInt("io.seqfile.compress.blocksize", blockSize);
// Create the SequenceFile
FileSystem fs = FileSystem.get(conf);
LOG.info("Creating SequenceFile with codec \"" + codecClass + "\"");
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, filePath, Text.class, Text.class, CompressionType.BLOCK, (CompressionCodec) Class.forName(codecClass).newInstance());
// Write some data
LOG.info("Writing to SequenceFile...");
for (int i = 0; i < lines; i++) {
Text key = new Text("key" + i);
Text value = new Text("value" + i);
writer.append(key, value);
}
writer.close();
// Read the data back and check
LOG.info("Reading from the SequenceFile...");
SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
Writable key = (Writable) reader.getKeyClass().newInstance();
Writable value = (Writable) reader.getValueClass().newInstance();
int lc = 0;
try {
while (reader.next(key, value)) {
assertEquals("key" + lc, key.toString());
assertEquals("value" + lc, value.toString());
lc++;
}
} finally {
reader.close();
}
assertEquals(lines, lc);
// Delete temporary files
fs.delete(filePath, false);
LOG.info("SUCCESS! Completed SequenceFileCodecTest with codec \"" + codecClass + "\"");
}
use of org.apache.hadoop.io.SequenceFile in project Cloud9 by lintool.
the class SequenceFileUtils method readValues.
@SuppressWarnings("unchecked")
public static <V extends Writable> List<V> readValues(Path path, FileSystem fs, int max) {
List<V> list = new ArrayList<V>();
try {
int k = 0;
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());
Writable key = (Writable) reader.getKeyClass().newInstance();
V value = (V) reader.getValueClass().newInstance();
while (reader.next(key, value)) {
k++;
list.add(value);
if (k >= max) {
break;
}
value = (V) reader.getValueClass().newInstance();
}
reader.close();
} catch (Exception e) {
throw new RuntimeException("Error reading SequenceFile " + path);
}
return list;
}
use of org.apache.hadoop.io.SequenceFile in project Cloud9 by lintool.
the class SequenceFileUtils method readKeys.
@SuppressWarnings("unchecked")
public static <K extends Writable> List<K> readKeys(Path path, FileSystem fs, int max) {
List<K> list = new ArrayList<K>();
try {
int k = 0;
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());
K key = (K) reader.getKeyClass().newInstance();
Writable value = (Writable) reader.getValueClass().newInstance();
while (reader.next(key, value)) {
k++;
list.add(key);
if (k >= max) {
break;
}
key = (K) reader.getKeyClass().newInstance();
}
reader.close();
} catch (Exception e) {
throw new RuntimeException("Error reading SequenceFile " + path);
}
return list;
}
use of org.apache.hadoop.io.SequenceFile in project Plume by tdunning.
the class MapRedSequenceFileTest method test.
@Test
public void test() throws Exception {
/*
* Create input which is SequenceFile<int,int> with data 1,2\n3,4
*/
Configuration conf = new Configuration();
Path p = new Path(inputPath);
FileSystem localFS = FileSystem.getLocal(conf);
if (localFS.exists(p)) {
// wipe it if needed
localFS.delete(p, true);
}
SequenceFile.Writer writer = SequenceFile.createWriter(localFS, conf, p, IntWritable.class, IntWritable.class);
writer.append(new IntWritable(1), new IntWritable(2));
writer.append(new IntWritable(3), new IntWritable(4));
writer.close();
String outputPath = "/tmp/output-plume-simpletest";
// Prepare input for test
FileSystem system = FileSystem.getLocal(new Configuration());
// Prepare output for test
system.delete(new Path(outputPath), true);
// Prepare workflow
OtherWorkflow workFlow = new OtherWorkflow();
// Execute it
MapRedExecutor executor = new MapRedExecutor();
executor.execute(workFlow, outputPath);
/*
* Read output which is SequenceFile<int,int> and assert that it has data 2,3\n4,5
*/
p = new Path(outputPath + "/1_1/1-r-00000");
SequenceFile.Reader reader = new SequenceFile.Reader(localFS, p, conf);
IntWritable key = new IntWritable(1);
IntWritable value = new IntWritable(1);
reader.next(key, value);
assertEquals(key.get(), 2);
assertEquals(value.get(), 3);
reader.next(key, value);
assertEquals(key.get(), 4);
assertEquals(value.get(), 5);
reader.close();
}
Aggregations