Search in sources :

Example 11 with SequenceFile

use of org.apache.hadoop.io.SequenceFile in project Cloud9 by lintool.

the class SequenceFileUtils method readValues.

@SuppressWarnings("unchecked")
public static <V extends Writable> List<V> readValues(Path path, FileSystem fs, int max) {
    List<V> list = new ArrayList<V>();
    try {
        int k = 0;
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());
        Writable key = (Writable) reader.getKeyClass().newInstance();
        V value = (V) reader.getValueClass().newInstance();
        while (reader.next(key, value)) {
            k++;
            list.add(value);
            if (k >= max) {
                break;
            }
            value = (V) reader.getValueClass().newInstance();
        }
        reader.close();
    } catch (Exception e) {
        throw new RuntimeException("Error reading SequenceFile " + path);
    }
    return list;
}
Also used : SequenceFile(org.apache.hadoop.io.SequenceFile) ArrayList(java.util.ArrayList) Writable(org.apache.hadoop.io.Writable) IOException(java.io.IOException)

Example 12 with SequenceFile

use of org.apache.hadoop.io.SequenceFile in project Cloud9 by lintool.

the class SequenceFileUtils method readKeys.

@SuppressWarnings("unchecked")
public static <K extends Writable> List<K> readKeys(Path path, FileSystem fs, int max) {
    List<K> list = new ArrayList<K>();
    try {
        int k = 0;
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());
        K key = (K) reader.getKeyClass().newInstance();
        Writable value = (Writable) reader.getValueClass().newInstance();
        while (reader.next(key, value)) {
            k++;
            list.add(key);
            if (k >= max) {
                break;
            }
            key = (K) reader.getKeyClass().newInstance();
        }
        reader.close();
    } catch (Exception e) {
        throw new RuntimeException("Error reading SequenceFile " + path);
    }
    return list;
}
Also used : SequenceFile(org.apache.hadoop.io.SequenceFile) ArrayList(java.util.ArrayList) Writable(org.apache.hadoop.io.Writable) IOException(java.io.IOException)

Example 13 with SequenceFile

use of org.apache.hadoop.io.SequenceFile in project Cloud9 by lintool.

the class ScanBlockCompressedSequenceFile method main.

public static void main(String[] args) throws IOException {
    if (args.length != 1) {
        System.out.println("usage: [SequenceFile]");
        System.exit(-1);
    }
    List<Long> seekPoints = Lists.newArrayList();
    long pos = -1;
    long prevPos = -1;
    int prevDocno = 0;
    Path path = new Path(args[0]);
    Configuration config = new Configuration();
    SequenceFile.Reader reader = new SequenceFile.Reader(config, SequenceFile.Reader.file(path));
    IntWritable key = new IntWritable();
    ClueWarcRecord value = new ClueWarcRecord();
    pos = reader.getPosition();
    int cnt = 0;
    while (reader.next(key, value)) {
        if (prevPos != -1 && prevPos != pos) {
            System.out.println("## beginning of block at " + prevPos + ", docno:" + prevDocno);
            seekPoints.add(prevPos);
        }
        System.out.println("offset:" + pos + "\tdocno:" + key + "\tdocid:" + value.getDocid());
        prevPos = pos;
        pos = reader.getPosition();
        prevDocno = key.get();
        cnt++;
        if (cnt > Integer.MAX_VALUE)
            break;
    }
    reader.close();
    reader = new SequenceFile.Reader(config, SequenceFile.Reader.file(path));
    for (long p : seekPoints) {
        reader.seek(p);
        reader.next(key, value);
        System.out.println("seeking to pos " + p + "\tdocno:" + key + "\tdocid:" + value.getDocid());
    }
    reader.close();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFile(org.apache.hadoop.io.SequenceFile) IntWritable(org.apache.hadoop.io.IntWritable)

Example 14 with SequenceFile

use of org.apache.hadoop.io.SequenceFile in project Plume by tdunning.

the class MapRedSequenceFileTest method test.

@Test
public void test() throws Exception {
    /*
     * Create input which is SequenceFile<int,int> with data 1,2\n3,4
     */
    Configuration conf = new Configuration();
    Path p = new Path(inputPath);
    FileSystem localFS = FileSystem.getLocal(conf);
    if (localFS.exists(p)) {
        // wipe it if needed
        localFS.delete(p, true);
    }
    SequenceFile.Writer writer = SequenceFile.createWriter(localFS, conf, p, IntWritable.class, IntWritable.class);
    writer.append(new IntWritable(1), new IntWritable(2));
    writer.append(new IntWritable(3), new IntWritable(4));
    writer.close();
    String outputPath = "/tmp/output-plume-simpletest";
    // Prepare input for test
    FileSystem system = FileSystem.getLocal(new Configuration());
    // Prepare output for test
    system.delete(new Path(outputPath), true);
    // Prepare workflow
    OtherWorkflow workFlow = new OtherWorkflow();
    // Execute it
    MapRedExecutor executor = new MapRedExecutor();
    executor.execute(workFlow, outputPath);
    /*
     * Read output which is SequenceFile<int,int> and assert that it has data 2,3\n4,5
     */
    p = new Path(outputPath + "/1_1/1-r-00000");
    SequenceFile.Reader reader = new SequenceFile.Reader(localFS, p, conf);
    IntWritable key = new IntWritable(1);
    IntWritable value = new IntWritable(1);
    reader.next(key, value);
    assertEquals(key.get(), 2);
    assertEquals(value.get(), 3);
    reader.next(key, value);
    assertEquals(key.get(), 4);
    assertEquals(value.get(), 5);
    reader.close();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 15 with SequenceFile

use of org.apache.hadoop.io.SequenceFile in project incubator-gobblin by apache.

the class FsStateStore method putAll.

/**
 * See {@link StateStore#putAll(String, String, Collection)}.
 *
 * <p>
 *   This implementation does not support putting the state objects into an existing store as
 *   append is to be supported by the Hadoop SequenceFile (HADOOP-7139).
 * </p>
 */
@Override
public void putAll(String storeName, String tableName, Collection<T> states) throws IOException {
    String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName;
    Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName);
    if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) {
        throw new IOException("Failed to create a state file for table " + tmpTableName);
    }
    Closer closer = Closer.create();
    try {
        @SuppressWarnings("deprecation") SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath, Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec()));
        for (T state : states) {
            writer.append(new Text(Strings.nullToEmpty(state.getId())), state);
        }
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
    if (this.useTmpFileForPut) {
        Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
        renamePath(tmpTablePath, tablePath);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Closer(com.google.common.io.Closer) SequenceFile(org.apache.hadoop.io.SequenceFile) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException)

Aggregations

SequenceFile (org.apache.hadoop.io.SequenceFile)20 Path (org.apache.hadoop.fs.Path)13 FileSystem (org.apache.hadoop.fs.FileSystem)12 Text (org.apache.hadoop.io.Text)10 File (java.io.File)7 IOException (java.io.IOException)5 Configuration (org.apache.hadoop.conf.Configuration)5 IntWritable (org.apache.hadoop.io.IntWritable)4 LongWritable (org.apache.hadoop.io.LongWritable)4 NullWritable (org.apache.hadoop.io.NullWritable)4 TreeSet (java.util.TreeSet)3 HFileSystem (org.apache.hadoop.hbase.fs.HFileSystem)3 ImmutableBytesWritable (org.apache.hadoop.hbase.io.ImmutableBytesWritable)3 Writable (org.apache.hadoop.io.Writable)3 Closer (com.google.common.io.Closer)2 ArrayList (java.util.ArrayList)2 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)2 Before (org.junit.Before)2 Test (org.junit.Test)2 BufferedReader (java.io.BufferedReader)1