Search in sources :

Example 1 with SequenceFile

use of in project hbase by apache.

the class HFileOutputFormat2 method writePartitions.

   * Write out a {@link SequenceFile} that can be read by
   * {@link TotalOrderPartitioner} that contains the split points in startKeys.
private static void writePartitions(Configuration conf, Path partitionsPath, List<ImmutableBytesWritable> startKeys) throws IOException {"Writing partition information to " + partitionsPath);
    if (startKeys.isEmpty()) {
        throw new IllegalArgumentException("No regions passed");
    // We're generating a list of split points, and we don't ever
    // have keys < the first region (which has an empty start key)
    // so we need to remove it. Otherwise we would end up with an
    // empty reducer with index 0
    TreeSet<ImmutableBytesWritable> sorted = new TreeSet<>(startKeys);
    ImmutableBytesWritable first = sorted.first();
    if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
        throw new IllegalArgumentException("First region of table should have empty start key. Instead has: " + Bytes.toStringBinary(first.get()));
    // Write the actual file
    FileSystem fs = partitionsPath.getFileSystem(conf);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath, ImmutableBytesWritable.class, NullWritable.class);
    try {
        for (ImmutableBytesWritable startKey : sorted) {
            writer.append(startKey, NullWritable.get());
    } finally {
Also used : ImmutableBytesWritable( SequenceFile( TreeSet(java.util.TreeSet) FileSystem(org.apache.hadoop.fs.FileSystem) HFileSystem(org.apache.hadoop.hbase.fs.HFileSystem)

Example 2 with SequenceFile

use of in project flink by apache.

the class HadoopIOFormatsITCase method preSubmit.

protected void preSubmit() throws Exception {
    resultPath = new String[] { getTempDirPath("result0"), getTempDirPath("result1") };
    File sequenceFile = createAndRegisterTempFile("seqFile");
    sequenceFileInPath = sequenceFile.toURI().toString();
    // Create a sequence file
    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    FileSystem fs = FileSystem.get(URI.create(sequenceFile.getAbsolutePath()), conf);
    Path path = new Path(sequenceFile.getAbsolutePath());
    //  ------------------ Long / Text Key Value pair: ------------
    int kvCount = 4;
    LongWritable key = new LongWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
        for (int i = 0; i < kvCount; i++) {
            if (i == 1) {
                // write key = 0 a bit more often.
                for (int a = 0; a < 15; a++) {
                    value.set(i + " - somestring");
                    writer.append(key, value);
            value.set(i + " - somestring");
            writer.append(key, value);
    } finally {
    //  ------------------ Long / Text Key Value pair: ------------
    File sequenceFileNull = createAndRegisterTempFile("seqFileNullKey");
    sequenceFileInPathNull = sequenceFileNull.toURI().toString();
    path = new Path(sequenceFileInPathNull);
    LongWritable value1 = new LongWritable();
    SequenceFile.Writer writer1 = null;
    try {
        writer1 = SequenceFile.createWriter(fs, conf, path, NullWritable.class, value1.getClass());
        for (int i = 0; i < kvCount; i++) {
            writer1.append(NullWritable.get(), value1);
    } finally {
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.flink.configuration.Configuration) Text( NullWritable( SequenceFile( FileSystem(org.apache.hadoop.fs.FileSystem) LongWritable( SequenceFile( File(

Example 3 with SequenceFile

use of in project nifi by apache.

the class KeyValueReader method readSequenceFile.

public Set<FlowFile> readSequenceFile(Path file, Configuration configuration, FileSystem fileSystem) throws IOException {
    final SequenceFile.Reader reader;
    Set<FlowFile> flowFiles = new HashSet<>();
    reader = new SequenceFile.Reader(configuration, Reader.file(fileSystem.makeQualified(file)));
    final Text key = new Text();
    final KeyValueWriterCallback callback = new KeyValueWriterCallback(reader);
    final String inputfileName = file.getName() + "." + System.nanoTime() + ".";
    int counter = 0;
    LOG.debug("Read from SequenceFile: {} ", new Object[] { file });
    try {
        while ( {
            String fileName = key.toString();
            // the key may be a file name, and may not
            if (LOOKS_LIKE_FILENAME.matcher(fileName).matches()) {
                if (fileName.contains(File.separator)) {
                    fileName = StringUtils.substringAfterLast(fileName, File.separator);
                fileName = fileName + "." + System.nanoTime();
            } else {
                fileName = inputfileName + ++counter;
            FlowFile flowFile = session.create();
            flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), fileName);
            callback.key = key;
            try {
                flowFile = session.write(flowFile, callback);
            } catch (ProcessException e) {
                LOG.error("Could not write to flowfile {}", new Object[] { flowFile }, e);
    } finally {
    return flowFiles;
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) ProcessException(org.apache.nifi.processor.exception.ProcessException) Reader( SequenceFile( Text( HashSet(java.util.HashSet)

Example 4 with SequenceFile

use of in project elephant-bird by twitter.

the class TestSequenceFileStorage method setUp.

public void setUp() throws Exception {
    // create local Pig server
    pigServer = PigTestUtil.makePigServer();
    // create temp SequenceFile
    File tempFile = File.createTempFile("test", ".txt");
    tempFilename = tempFile.getAbsolutePath();
    Path path = new Path("file:///" + tempFilename);
    Configuration conf = new Configuration();
    FileSystem fs = path.getFileSystem(conf);
    IntWritable key = new IntWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
        for (int i = 0; i < DATA.length; ++i) {
            writer.append(key, value);
    } finally {
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFile( FileSystem(org.apache.hadoop.fs.FileSystem) Text( SequenceFile( File( IntWritable( Before(org.junit.Before)

Example 5 with SequenceFile

use of in project elephant-bird by twitter.

the class AbstractTestWritableConverter method setup.

public void setup() throws IOException {
    // create local Pig server
    pigServer = PigTestUtil.makePigServer();
    // create temp SequenceFile
    final File tempFile = File.createTempFile("test", ".txt");
    tempFilename = tempFile.getAbsolutePath();
    final Path path = new Path("file:///" + tempFilename);
    final Configuration conf = new Configuration();
    final FileSystem fs = path.getFileSystem(conf);
    final IntWritable key = new IntWritable();
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), writableClass);
        for (int i = 0; i < data.length; ++i) {
            writer.append(key, data[i]);
    } finally {
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFile( FileSystem(org.apache.hadoop.fs.FileSystem) SequenceFile( File( IntWritable( Before(org.junit.Before)


SequenceFile ( Path (org.apache.hadoop.fs.Path)10 FileSystem (org.apache.hadoop.fs.FileSystem)9 Text ( IOException ( Configuration (org.apache.hadoop.conf.Configuration)5 File ( IntWritable ( Writable ( Closer ( ArrayList (java.util.ArrayList)2 TreeSet (java.util.TreeSet)2 HFileSystem (org.apache.hadoop.hbase.fs.HFileSystem)2 ImmutableBytesWritable ( LongWritable ( NullWritable ( DefaultCodec ( Before (org.junit.Before)2 BufferedReader ( InputStreamReader (