Search in sources :

Example 11 with SequenceFile

use of in project Cloud9 by lintool.

the class SequenceFileUtils method readValues.

public static <V extends Writable> List<V> readValues(Path path, FileSystem fs, int max) {
    List<V> list = new ArrayList<V>();
    try {
        int k = 0;
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());
        Writable key = (Writable) reader.getKeyClass().newInstance();
        V value = (V) reader.getValueClass().newInstance();
        while (, value)) {
            if (k >= max) {
            value = (V) reader.getValueClass().newInstance();
    } catch (Exception e) {
        throw new RuntimeException("Error reading SequenceFile " + path);
    return list;
Also used : SequenceFile( ArrayList(java.util.ArrayList) Writable( IOException(

Example 12 with SequenceFile

use of in project Cloud9 by lintool.

the class SequenceFileUtils method readKeys.

public static <K extends Writable> List<K> readKeys(Path path, FileSystem fs, int max) {
    List<K> list = new ArrayList<K>();
    try {
        int k = 0;
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());
        K key = (K) reader.getKeyClass().newInstance();
        Writable value = (Writable) reader.getValueClass().newInstance();
        while (, value)) {
            if (k >= max) {
            key = (K) reader.getKeyClass().newInstance();
    } catch (Exception e) {
        throw new RuntimeException("Error reading SequenceFile " + path);
    return list;
Also used : SequenceFile( ArrayList(java.util.ArrayList) Writable( IOException(

Example 13 with SequenceFile

use of in project Cloud9 by lintool.

the class ScanBlockCompressedSequenceFile method main.

public static void main(String[] args) throws IOException {
    if (args.length != 1) {
        System.out.println("usage: [SequenceFile]");
    List<Long> seekPoints = Lists.newArrayList();
    long pos = -1;
    long prevPos = -1;
    int prevDocno = 0;
    Path path = new Path(args[0]);
    Configuration config = new Configuration();
    SequenceFile.Reader reader = new SequenceFile.Reader(config, SequenceFile.Reader.file(path));
    IntWritable key = new IntWritable();
    ClueWarcRecord value = new ClueWarcRecord();
    pos = reader.getPosition();
    int cnt = 0;
    while (, value)) {
        if (prevPos != -1 && prevPos != pos) {
            System.out.println("## beginning of block at " + prevPos + ", docno:" + prevDocno);
        System.out.println("offset:" + pos + "\tdocno:" + key + "\tdocid:" + value.getDocid());
        prevPos = pos;
        pos = reader.getPosition();
        prevDocno = key.get();
        if (cnt > Integer.MAX_VALUE)
    reader = new SequenceFile.Reader(config, SequenceFile.Reader.file(path));
    for (long p : seekPoints) {;, value);
        System.out.println("seeking to pos " + p + "\tdocno:" + key + "\tdocid:" + value.getDocid());
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFile( IntWritable(

Example 14 with SequenceFile

use of in project Plume by tdunning.

the class MapRedSequenceFileTest method test.

public void test() throws Exception {
     * Create input which is SequenceFile<int,int> with data 1,2\n3,4
    Configuration conf = new Configuration();
    Path p = new Path(inputPath);
    FileSystem localFS = FileSystem.getLocal(conf);
    if (localFS.exists(p)) {
        // wipe it if needed
        localFS.delete(p, true);
    SequenceFile.Writer writer = SequenceFile.createWriter(localFS, conf, p, IntWritable.class, IntWritable.class);
    writer.append(new IntWritable(1), new IntWritable(2));
    writer.append(new IntWritable(3), new IntWritable(4));
    String outputPath = "/tmp/output-plume-simpletest";
    // Prepare input for test
    FileSystem system = FileSystem.getLocal(new Configuration());
    // Prepare output for test
    system.delete(new Path(outputPath), true);
    // Prepare workflow
    OtherWorkflow workFlow = new OtherWorkflow();
    // Execute it
    MapRedExecutor executor = new MapRedExecutor();
    executor.execute(workFlow, outputPath);
     * Read output which is SequenceFile<int,int> and assert that it has data 2,3\n4,5
    p = new Path(outputPath + "/1_1/1-r-00000");
    SequenceFile.Reader reader = new SequenceFile.Reader(localFS, p, conf);
    IntWritable key = new IntWritable(1);
    IntWritable value = new IntWritable(1);, value);
    assertEquals(key.get(), 2);
    assertEquals(value.get(), 3);, value);
    assertEquals(key.get(), 4);
    assertEquals(value.get(), 5);
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFile( FileSystem(org.apache.hadoop.fs.FileSystem) IntWritable( Test(org.junit.Test)

Example 15 with SequenceFile

use of in project incubator-gobblin by apache.

the class FsStateStore method putAll.

 * See {@link StateStore#putAll(String, String, Collection)}.
 * <p>
 *   This implementation does not support putting the state objects into an existing store as
 *   append is to be supported by the Hadoop SequenceFile (HADOOP-7139).
 * </p>
public void putAll(String storeName, String tableName, Collection<T> states) throws IOException {
    String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName;
    Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName);
    if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) {
        throw new IOException("Failed to create a state file for table " + tmpTableName);
    Closer closer = Closer.create();
    try {
        @SuppressWarnings("deprecation") SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath, Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec()));
        for (T state : states) {
            writer.append(new Text(Strings.nullToEmpty(state.getId())), state);
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
    if (this.useTmpFileForPut) {
        Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
        renamePath(tmpTablePath, tablePath);
Also used : Path(org.apache.hadoop.fs.Path) Closer( SequenceFile( DefaultCodec( Text( IOException(


SequenceFile ( Path (org.apache.hadoop.fs.Path)13 FileSystem (org.apache.hadoop.fs.FileSystem)12 Text ( File ( IOException ( Configuration (org.apache.hadoop.conf.Configuration)5 IntWritable ( LongWritable ( NullWritable ( TreeSet (java.util.TreeSet)3 HFileSystem (org.apache.hadoop.hbase.fs.HFileSystem)3 ImmutableBytesWritable ( Writable ( Closer ( ArrayList (java.util.ArrayList)2 DefaultCodec ( Before (org.junit.Before)2 Test (org.junit.Test)2 BufferedReader (