Search in sources :

Example 6 with ImmutableBytesWritable

use of org.apache.hadoop.hbase.io.ImmutableBytesWritable in project hbase by apache.

the class HFileOutputFormat2 method writePartitions.

/**
   * Write out a {@link SequenceFile} that can be read by
   * {@link TotalOrderPartitioner} that contains the split points in startKeys.
   */
@SuppressWarnings("deprecation")
private static void writePartitions(Configuration conf, Path partitionsPath, List<ImmutableBytesWritable> startKeys) throws IOException {
    LOG.info("Writing partition information to " + partitionsPath);
    if (startKeys.isEmpty()) {
        throw new IllegalArgumentException("No regions passed");
    }
    // We're generating a list of split points, and we don't ever
    // have keys < the first region (which has an empty start key)
    // so we need to remove it. Otherwise we would end up with an
    // empty reducer with index 0
    TreeSet<ImmutableBytesWritable> sorted = new TreeSet<>(startKeys);
    ImmutableBytesWritable first = sorted.first();
    if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
        throw new IllegalArgumentException("First region of table should have empty start key. Instead has: " + Bytes.toStringBinary(first.get()));
    }
    sorted.remove(first);
    // Write the actual file
    FileSystem fs = partitionsPath.getFileSystem(conf);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath, ImmutableBytesWritable.class, NullWritable.class);
    try {
        for (ImmutableBytesWritable startKey : sorted) {
            writer.append(startKey, NullWritable.get());
        }
    } finally {
        writer.close();
    }
}
Also used : ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) SequenceFile(org.apache.hadoop.io.SequenceFile) TreeSet(java.util.TreeSet) FileSystem(org.apache.hadoop.fs.FileSystem) HFileSystem(org.apache.hadoop.hbase.fs.HFileSystem)

Example 7 with ImmutableBytesWritable

use of org.apache.hadoop.hbase.io.ImmutableBytesWritable in project hive by apache.

the class HiveHBaseTableInputFormat method getRecordReader.

@Override
public RecordReader<ImmutableBytesWritable, ResultWritable> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException {
    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getTableSplit();
    if (conn == null) {
        conn = ConnectionFactory.createConnection(HBaseConfiguration.create(jobConf));
    }
    initializeTable(conn, tableSplit.getTable());
    setScan(HiveHBaseInputFormatUtil.getScan(jobConf));
    Job job = new Job(jobConf);
    TaskAttemptContext tac = ShimLoader.getHadoopShims().newTaskAttemptContext(job.getConfiguration(), reporter);
    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(tableSplit, tac);
    try {
        recordReader.initialize(tableSplit, tac);
    } catch (InterruptedException e) {
        // Free up the HTable connections
        closeTable();
        if (conn != null) {
            conn.close();
            conn = null;
        }
        throw new IOException("Failed to initialize RecordReader", e);
    }
    return new RecordReader<ImmutableBytesWritable, ResultWritable>() {

        @Override
        public void close() throws IOException {
            recordReader.close();
            closeTable();
            if (conn != null) {
                conn.close();
                conn = null;
            }
        }

        @Override
        public ImmutableBytesWritable createKey() {
            return new ImmutableBytesWritable();
        }

        @Override
        public ResultWritable createValue() {
            return new ResultWritable(new Result());
        }

        @Override
        public long getPos() throws IOException {
            return 0;
        }

        @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;
            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }
            return progress;
        }

        @Override
        public boolean next(ImmutableBytesWritable rowKey, ResultWritable value) throws IOException {
            boolean next = false;
            try {
                next = recordReader.nextKeyValue();
                if (next) {
                    rowKey.set(recordReader.getCurrentValue().getRow());
                    value.setResult(recordReader.getCurrentValue());
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }
            return next;
        }
    };
}
Also used : ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) RecordReader(org.apache.hadoop.mapred.RecordReader) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) Result(org.apache.hadoop.hbase.client.Result) TableSplit(org.apache.hadoop.hbase.mapreduce.TableSplit) Job(org.apache.hadoop.mapreduce.Job)

Example 8 with ImmutableBytesWritable

use of org.apache.hadoop.hbase.io.ImmutableBytesWritable in project hive by apache.

the class HiveHFileOutputFormat method getHiveRecordWriter.

@Override
public RecordWriter getHiveRecordWriter(final JobConf jc, final Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, final Progressable progressable) throws IOException {
    // Read configuration for the target path, first from jobconf, then from table properties
    String hfilePath = getFamilyPath(jc, tableProperties);
    if (hfilePath == null) {
        throw new RuntimeException("Please set " + HFILE_FAMILY_PATH + " to target location for HFiles");
    }
    // Target path's last component is also the column family name.
    final Path columnFamilyPath = new Path(hfilePath);
    final String columnFamilyName = columnFamilyPath.getName();
    final byte[] columnFamilyNameBytes = Bytes.toBytes(columnFamilyName);
    final Job job = new Job(jc);
    setCompressOutput(job, isCompressed);
    setOutputPath(job, finalOutPath);
    // Create the HFile writer
    final org.apache.hadoop.mapreduce.TaskAttemptContext tac = ShimLoader.getHadoopShims().newTaskAttemptContext(job.getConfiguration(), progressable);
    final Path outputdir = FileOutputFormat.getOutputPath(tac);
    final Path taskAttemptOutputdir = new FileOutputCommitter(outputdir, tac).getWorkPath();
    final org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable, KeyValue> fileWriter = getFileWriter(tac);
    // Individual columns are going to be pivoted to HBase cells,
    // and for each row, they need to be written out in order
    // of column name, so sort the column names now, creating a
    // mapping to their column position.  However, the first
    // column is interpreted as the row key.
    String columnList = tableProperties.getProperty("columns");
    String[] columnArray = columnList.split(",");
    final SortedMap<byte[], Integer> columnMap = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
    int i = 0;
    for (String columnName : columnArray) {
        if (i != 0) {
            columnMap.put(Bytes.toBytes(columnName), i);
        }
        ++i;
    }
    return new RecordWriter() {

        @Override
        public void close(boolean abort) throws IOException {
            try {
                fileWriter.close(null);
                if (abort) {
                    return;
                }
                // Move the hfiles file(s) from the task output directory to the
                // location specified by the user.
                FileSystem fs = outputdir.getFileSystem(jc);
                fs.mkdirs(columnFamilyPath);
                Path srcDir = taskAttemptOutputdir;
                for (; ; ) {
                    FileStatus[] files = fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER);
                    if ((files == null) || (files.length == 0)) {
                        throw new IOException("No family directories found in " + srcDir);
                    }
                    if (files.length != 1) {
                        throw new IOException("Multiple family directories found in " + srcDir);
                    }
                    srcDir = files[0].getPath();
                    if (srcDir.getName().equals(columnFamilyName)) {
                        break;
                    }
                    if (files[0].isFile()) {
                        throw new IOException("No family directories found in " + taskAttemptOutputdir + ". " + "The last component in hfile path should match column family name " + columnFamilyName);
                    }
                }
                for (FileStatus regionFile : fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER)) {
                    fs.rename(regionFile.getPath(), new Path(columnFamilyPath, regionFile.getPath().getName()));
                }
                // Hive actually wants a file as task output (not a directory), so
                // replace the empty directory with an empty file to keep it happy.
                fs.delete(taskAttemptOutputdir, true);
                fs.createNewFile(taskAttemptOutputdir);
            } catch (InterruptedException ex) {
                throw new IOException(ex);
            }
        }

        private void writeText(Text text) throws IOException {
            // Decompose the incoming text row into fields.
            String s = text.toString();
            String[] fields = s.split("");
            assert (fields.length <= (columnMap.size() + 1));
            // First field is the row key.
            byte[] rowKeyBytes = Bytes.toBytes(fields[0]);
            // Remaining fields are cells addressed by column name within row.
            for (Map.Entry<byte[], Integer> entry : columnMap.entrySet()) {
                byte[] columnNameBytes = entry.getKey();
                int iColumn = entry.getValue();
                String val;
                if (iColumn >= fields.length) {
                    // trailing blank field
                    val = "";
                } else {
                    val = fields[iColumn];
                    if ("\\N".equals(val)) {
                        // omit nulls
                        continue;
                    }
                }
                byte[] valBytes = Bytes.toBytes(val);
                KeyValue kv = new KeyValue(rowKeyBytes, columnFamilyNameBytes, columnNameBytes, valBytes);
                try {
                    fileWriter.write(null, kv);
                } catch (IOException e) {
                    LOG.error("Failed while writing row: " + s);
                    throw e;
                } catch (InterruptedException ex) {
                    throw new IOException(ex);
                }
            }
        }

        private void writePut(PutWritable put) throws IOException {
            ImmutableBytesWritable row = new ImmutableBytesWritable(put.getPut().getRow());
            SortedMap<byte[], List<Cell>> cells = put.getPut().getFamilyCellMap();
            for (Map.Entry<byte[], List<Cell>> entry : cells.entrySet()) {
                Collections.sort(entry.getValue(), new CellComparator());
                for (Cell c : entry.getValue()) {
                    try {
                        fileWriter.write(row, KeyValueUtil.copyToNewKeyValue(c));
                    } catch (InterruptedException e) {
                        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
                    }
                }
            }
        }

        @Override
        public void write(Writable w) throws IOException {
            if (w instanceof Text) {
                writeText((Text) w);
            } else if (w instanceof PutWritable) {
                writePut((PutWritable) w);
            } else {
                throw new IOException("Unexpected writable " + w);
            }
        }
    };
}
Also used : InterruptedIOException(java.io.InterruptedIOException) KeyValue(org.apache.hadoop.hbase.KeyValue) FileStatus(org.apache.hadoop.fs.FileStatus) Writable(org.apache.hadoop.io.Writable) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) FileSystem(org.apache.hadoop.fs.FileSystem) List(java.util.List) CellComparator(org.apache.hadoop.hbase.CellComparator) Job(org.apache.hadoop.mapreduce.Job) Cell(org.apache.hadoop.hbase.Cell) Path(org.apache.hadoop.fs.Path) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) Text(org.apache.hadoop.io.Text) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) TreeMap(java.util.TreeMap) Map(java.util.Map) TreeMap(java.util.TreeMap) SortedMap(java.util.SortedMap)

Example 9 with ImmutableBytesWritable

use of org.apache.hadoop.hbase.io.ImmutableBytesWritable in project hbase by apache.

the class TestGroupingTableMap method shouldNotCallCollectonSinceFindUniqueKeyValueMoreThanOnes.

@Test
@SuppressWarnings({ "deprecation", "unchecked" })
public void shouldNotCallCollectonSinceFindUniqueKeyValueMoreThanOnes() throws Exception {
    GroupingTableMap gTableMap = null;
    try {
        Result result = mock(Result.class);
        Reporter reporter = mock(Reporter.class);
        gTableMap = new GroupingTableMap();
        Configuration cfg = new Configuration();
        cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB");
        JobConf jobConf = new JobConf(cfg);
        gTableMap.configure(jobConf);
        byte[] row = {};
        List<Cell> keyValues = ImmutableList.<Cell>of(new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("1111")), new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("2222")), new KeyValue(row, "familyB".getBytes(), "qualifierB".getBytes(), Bytes.toBytes("3333")));
        when(result.listCells()).thenReturn(keyValues);
        OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock = mock(OutputCollector.class);
        gTableMap.map(null, result, outputCollectorMock, reporter);
        verify(result).listCells();
        verifyZeroInteractions(outputCollectorMock);
    } finally {
        if (gTableMap != null)
            gTableMap.close();
    }
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) Configuration(org.apache.hadoop.conf.Configuration) Reporter(org.apache.hadoop.mapred.Reporter) JobConf(org.apache.hadoop.mapred.JobConf) Cell(org.apache.hadoop.hbase.Cell) Result(org.apache.hadoop.hbase.client.Result) Test(org.junit.Test)

Example 10 with ImmutableBytesWritable

use of org.apache.hadoop.hbase.io.ImmutableBytesWritable in project hbase by apache.

the class TestGroupingTableMap method shouldCreateNewKeyAlthoughExtraKey.

@Test
@SuppressWarnings({ "deprecation", "unchecked" })
public void shouldCreateNewKeyAlthoughExtraKey() throws Exception {
    GroupingTableMap gTableMap = null;
    try {
        Result result = mock(Result.class);
        Reporter reporter = mock(Reporter.class);
        gTableMap = new GroupingTableMap();
        Configuration cfg = new Configuration();
        cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB");
        JobConf jobConf = new JobConf(cfg);
        gTableMap.configure(jobConf);
        byte[] row = {};
        List<Cell> keyValues = ImmutableList.<Cell>of(new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), Bytes.toBytes("1111")), new KeyValue(row, "familyB".getBytes(), "qualifierB".getBytes(), Bytes.toBytes("2222")), new KeyValue(row, "familyC".getBytes(), "qualifierC".getBytes(), Bytes.toBytes("3333")));
        when(result.listCells()).thenReturn(keyValues);
        OutputCollector<ImmutableBytesWritable, Result> outputCollectorMock = mock(OutputCollector.class);
        gTableMap.map(null, result, outputCollectorMock, reporter);
        verify(result).listCells();
        verify(outputCollectorMock, times(1)).collect(any(ImmutableBytesWritable.class), any(Result.class));
        verifyNoMoreInteractions(outputCollectorMock);
    } finally {
        if (gTableMap != null)
            gTableMap.close();
    }
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) Configuration(org.apache.hadoop.conf.Configuration) Reporter(org.apache.hadoop.mapred.Reporter) JobConf(org.apache.hadoop.mapred.JobConf) Cell(org.apache.hadoop.hbase.Cell) Result(org.apache.hadoop.hbase.client.Result) Test(org.junit.Test)

Aggregations

ImmutableBytesWritable (org.apache.hadoop.hbase.io.ImmutableBytesWritable)262 Test (org.junit.Test)79 Expression (org.apache.phoenix.expression.Expression)32 IOException (java.io.IOException)26 PSmallint (org.apache.phoenix.schema.types.PSmallint)25 Result (org.apache.hadoop.hbase.client.Result)24 PTable (org.apache.phoenix.schema.PTable)24 ArrayList (java.util.ArrayList)23 Cell (org.apache.hadoop.hbase.Cell)23 KeyValue (org.apache.hadoop.hbase.KeyValue)23 LiteralExpression (org.apache.phoenix.expression.LiteralExpression)23 PTinyint (org.apache.phoenix.schema.types.PTinyint)23 PhoenixArray (org.apache.phoenix.schema.types.PhoenixArray)23 PDataType (org.apache.phoenix.schema.types.PDataType)20 PUnsignedSmallint (org.apache.phoenix.schema.types.PUnsignedSmallint)20 PUnsignedTinyint (org.apache.phoenix.schema.types.PUnsignedTinyint)20 List (java.util.List)19 Configuration (org.apache.hadoop.conf.Configuration)19 SQLException (java.sql.SQLException)18 Put (org.apache.hadoop.hbase.client.Put)17