Search in sources :

Example 16 with SequenceFile

use of in project incubator-gobblin by apache.

the class FsStateStore method put.

 * See {@link StateStore#put(String, String, T)}.
 * <p>
 *   This implementation does not support putting the state object into an existing store as
 *   append is to be supported by the Hadoop SequenceFile (HADOOP-7139).
 * </p>
public void put(String storeName, String tableName, T state) throws IOException {
    String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName;
    Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName);
    if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) {
        throw new IOException("Failed to create a state file for table " + tmpTableName);
    Closer closer = Closer.create();
    try {
        @SuppressWarnings("deprecation") SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath, Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec()));
        writer.append(new Text(Strings.nullToEmpty(state.getId())), state);
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
    if (this.useTmpFileForPut) {
        Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
        renamePath(tmpTablePath, tablePath);
Also used : Path(org.apache.hadoop.fs.Path) Closer( SequenceFile( DefaultCodec( Text( IOException(

Example 17 with SequenceFile

use of in project hadoop by apache.

the class TestCodec method sequenceFileCodecTest.

private static void sequenceFileCodecTest(Configuration conf, int lines, String codecClass, int blockSize) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
    Path filePath = new Path("SequenceFileCodecTest." + codecClass);
    // Configuration
    conf.setInt("io.seqfile.compress.blocksize", blockSize);
    // Create the SequenceFile
    FileSystem fs = FileSystem.get(conf);"Creating SequenceFile with codec \"" + codecClass + "\"");
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, filePath, Text.class, Text.class, CompressionType.BLOCK, (CompressionCodec) Class.forName(codecClass).newInstance());
    // Write some data"Writing to SequenceFile...");
    for (int i = 0; i < lines; i++) {
        Text key = new Text("key" + i);
        Text value = new Text("value" + i);
        writer.append(key, value);
    // Read the data back and check"Reading from the SequenceFile...");
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
    Writable key = (Writable) reader.getKeyClass().newInstance();
    Writable value = (Writable) reader.getValueClass().newInstance();
    int lc = 0;
    try {
        while (, value)) {
            assertEquals("key" + lc, key.toString());
            assertEquals("value" + lc, value.toString());
    } finally {
    assertEquals(lines, lc);
    // Delete temporary files
    fs.delete(filePath, false);"SUCCESS! Completed SequenceFileCodecTest with codec \"" + codecClass + "\"");
Also used : Path(org.apache.hadoop.fs.Path) SequenceFile( FileSystem(org.apache.hadoop.fs.FileSystem) LineReader(org.apache.hadoop.util.LineReader) InputStreamReader( BufferedReader( Writable( Text(

Example 18 with SequenceFile

use of in project flink by splunk.

the class HadoopIOFormatsITCase method preSubmit.

protected void preSubmit() throws Exception {
    resultPath = new String[] { getTempDirPath("result0"), getTempDirPath("result1") };
    File sequenceFile = createAndRegisterTempFile("seqFile");
    sequenceFileInPath = sequenceFile.toURI().toString();
    // Create a sequence file
    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    FileSystem fs = FileSystem.get(URI.create(sequenceFile.getAbsolutePath()), conf);
    Path path = new Path(sequenceFile.getAbsolutePath());
    // ------------------ Long / Text Key Value pair: ------------
    int kvCount = 4;
    LongWritable key = new LongWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
        for (int i = 0; i < kvCount; i++) {
            if (i == 1) {
                // write key = 0 a bit more often.
                for (int a = 0; a < 15; a++) {
                    value.set(i + " - somestring");
                    writer.append(key, value);
            value.set(i + " - somestring");
            writer.append(key, value);
    } finally {
    // ------------------ Long / Text Key Value pair: ------------
    File sequenceFileNull = createAndRegisterTempFile("seqFileNullKey");
    sequenceFileInPathNull = sequenceFileNull.toURI().toString();
    path = new Path(sequenceFileInPathNull);
    LongWritable value1 = new LongWritable();
    SequenceFile.Writer writer1 = null;
    try {
        writer1 = SequenceFile.createWriter(fs, conf, path, NullWritable.class, value1.getClass());
        for (int i = 0; i < kvCount; i++) {
            writer1.append(NullWritable.get(), value1);
    } finally {
Also used : Path(org.apache.hadoop.fs.Path) Text( NullWritable( SequenceFile( FileSystem(org.apache.hadoop.fs.FileSystem) LongWritable( SequenceFile( File(

Example 19 with SequenceFile

use of in project kylin by apache.

the class HFileOutputFormat3 method writePartitions.

 * Write out a {@link SequenceFile} that can be read by
 * {@link TotalOrderPartitioner} that contains the split points in startKeys.
private static void writePartitions(Configuration conf, Path partitionsPath, List<ImmutableBytesWritable> startKeys) throws IOException {"Writing partition information to " + partitionsPath);
    if (startKeys.isEmpty()) {
        throw new IllegalArgumentException("No regions passed");
    // We're generating a list of split points, and we don't ever
    // have keys < the first region (which has an empty start key)
    // so we need to remove it. Otherwise we would end up with an
    // empty reducer with index 0
    TreeSet<ImmutableBytesWritable> sorted = new TreeSet<ImmutableBytesWritable>(startKeys);
    ImmutableBytesWritable first = sorted.first();
    if (!Arrays.equals(first.get(), HConstants.EMPTY_BYTE_ARRAY)) {
        throw new IllegalArgumentException("First region of table should have empty start key. Instead has: " + Bytes.toStringBinary(first.get()));
    // Write the actual file
    FileSystem fs = partitionsPath.getFileSystem(conf);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath, ImmutableBytesWritable.class, NullWritable.class);
    try {
        for (ImmutableBytesWritable startKey : sorted) {
            writer.append(startKey, NullWritable.get());
    } finally {
Also used : ImmutableBytesWritable( SequenceFile( TreeSet(java.util.TreeSet) FileSystem(org.apache.hadoop.fs.FileSystem) HFileSystem(org.apache.hadoop.hbase.fs.HFileSystem)

Example 20 with SequenceFile

use of in project avro by apache.

the class TestSequenceFileReader method testNonAvroReducer.

public void testNonAvroReducer() throws Exception {
    JobConf job = new JobConf();
    Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
    outputPath.getFileSystem(job).delete(outputPath, true);
    // configure input for Avro from sequence file
    AvroJob.setInputSchema(job, SCHEMA);
    FileInputFormat.setInputPaths(job, file().toURI().toString());
    // mapper is default, identity
    // use a hadoop reducer that consumes Avro input
    AvroJob.setMapOutputSchema(job, SCHEMA);
    // configure outputPath for non-Avro SequenceFile
    FileOutputFormat.setOutputPath(job, outputPath);
    // output key/value classes are default, LongWritable/Text
    checkFile(new SequenceFileReader<>(new File(outputPath.toString() + "/part-00000")));
Also used : Path(org.apache.hadoop.fs.Path) JobConf(org.apache.hadoop.mapred.JobConf) SequenceFile( File( Test(org.junit.Test)


SequenceFile ( Path (org.apache.hadoop.fs.Path)13 FileSystem (org.apache.hadoop.fs.FileSystem)12 Text ( File ( IOException ( Configuration (org.apache.hadoop.conf.Configuration)5 IntWritable ( LongWritable ( NullWritable ( TreeSet (java.util.TreeSet)3 HFileSystem (org.apache.hadoop.hbase.fs.HFileSystem)3 ImmutableBytesWritable ( Writable ( Closer ( ArrayList (java.util.ArrayList)2 DefaultCodec ( Before (org.junit.Before)2 Test (org.junit.Test)2 BufferedReader (