use of org.apache.hadoop.io.SequenceFile in project Cloud9 by lintool.
the class SequenceFileUtils method readValues.
@SuppressWarnings("unchecked")
public static <V extends Writable> List<V> readValues(Path path, FileSystem fs, int max) {
List<V> list = new ArrayList<V>();
try {
int k = 0;
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());
Writable key = (Writable) reader.getKeyClass().newInstance();
V value = (V) reader.getValueClass().newInstance();
while (reader.next(key, value)) {
k++;
list.add(value);
if (k >= max) {
break;
}
value = (V) reader.getValueClass().newInstance();
}
reader.close();
} catch (Exception e) {
throw new RuntimeException("Error reading SequenceFile " + path);
}
return list;
}
use of org.apache.hadoop.io.SequenceFile in project Cloud9 by lintool.
the class SequenceFileUtils method readKeys.
@SuppressWarnings("unchecked")
public static <K extends Writable> List<K> readKeys(Path path, FileSystem fs, int max) {
List<K> list = new ArrayList<K>();
try {
int k = 0;
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());
K key = (K) reader.getKeyClass().newInstance();
Writable value = (Writable) reader.getValueClass().newInstance();
while (reader.next(key, value)) {
k++;
list.add(key);
if (k >= max) {
break;
}
key = (K) reader.getKeyClass().newInstance();
}
reader.close();
} catch (Exception e) {
throw new RuntimeException("Error reading SequenceFile " + path);
}
return list;
}
use of org.apache.hadoop.io.SequenceFile in project Cloud9 by lintool.
the class ScanBlockCompressedSequenceFile method main.
public static void main(String[] args) throws IOException {
if (args.length != 1) {
System.out.println("usage: [SequenceFile]");
System.exit(-1);
}
List<Long> seekPoints = Lists.newArrayList();
long pos = -1;
long prevPos = -1;
int prevDocno = 0;
Path path = new Path(args[0]);
Configuration config = new Configuration();
SequenceFile.Reader reader = new SequenceFile.Reader(config, SequenceFile.Reader.file(path));
IntWritable key = new IntWritable();
ClueWarcRecord value = new ClueWarcRecord();
pos = reader.getPosition();
int cnt = 0;
while (reader.next(key, value)) {
if (prevPos != -1 && prevPos != pos) {
System.out.println("## beginning of block at " + prevPos + ", docno:" + prevDocno);
seekPoints.add(prevPos);
}
System.out.println("offset:" + pos + "\tdocno:" + key + "\tdocid:" + value.getDocid());
prevPos = pos;
pos = reader.getPosition();
prevDocno = key.get();
cnt++;
if (cnt > Integer.MAX_VALUE)
break;
}
reader.close();
reader = new SequenceFile.Reader(config, SequenceFile.Reader.file(path));
for (long p : seekPoints) {
reader.seek(p);
reader.next(key, value);
System.out.println("seeking to pos " + p + "\tdocno:" + key + "\tdocid:" + value.getDocid());
}
reader.close();
}
use of org.apache.hadoop.io.SequenceFile in project Plume by tdunning.
the class MapRedSequenceFileTest method test.
@Test
public void test() throws Exception {
/*
* Create input which is SequenceFile<int,int> with data 1,2\n3,4
*/
Configuration conf = new Configuration();
Path p = new Path(inputPath);
FileSystem localFS = FileSystem.getLocal(conf);
if (localFS.exists(p)) {
// wipe it if needed
localFS.delete(p, true);
}
SequenceFile.Writer writer = SequenceFile.createWriter(localFS, conf, p, IntWritable.class, IntWritable.class);
writer.append(new IntWritable(1), new IntWritable(2));
writer.append(new IntWritable(3), new IntWritable(4));
writer.close();
String outputPath = "/tmp/output-plume-simpletest";
// Prepare input for test
FileSystem system = FileSystem.getLocal(new Configuration());
// Prepare output for test
system.delete(new Path(outputPath), true);
// Prepare workflow
OtherWorkflow workFlow = new OtherWorkflow();
// Execute it
MapRedExecutor executor = new MapRedExecutor();
executor.execute(workFlow, outputPath);
/*
* Read output which is SequenceFile<int,int> and assert that it has data 2,3\n4,5
*/
p = new Path(outputPath + "/1_1/1-r-00000");
SequenceFile.Reader reader = new SequenceFile.Reader(localFS, p, conf);
IntWritable key = new IntWritable(1);
IntWritable value = new IntWritable(1);
reader.next(key, value);
assertEquals(key.get(), 2);
assertEquals(value.get(), 3);
reader.next(key, value);
assertEquals(key.get(), 4);
assertEquals(value.get(), 5);
reader.close();
}
use of org.apache.hadoop.io.SequenceFile in project incubator-gobblin by apache.
the class FsStateStore method putAll.
/**
* See {@link StateStore#putAll(String, String, Collection)}.
*
* <p>
* This implementation does not support putting the state objects into an existing store as
* append is to be supported by the Hadoop SequenceFile (HADOOP-7139).
* </p>
*/
@Override
public void putAll(String storeName, String tableName, Collection<T> states) throws IOException {
String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName;
Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName);
if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) {
throw new IOException("Failed to create a state file for table " + tmpTableName);
}
Closer closer = Closer.create();
try {
@SuppressWarnings("deprecation") SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath, Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec()));
for (T state : states) {
writer.append(new Text(Strings.nullToEmpty(state.getId())), state);
}
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
if (this.useTmpFileForPut) {
Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
renamePath(tmpTablePath, tablePath);
}
}
Aggregations