Search in sources :

Example 1 with InvalidLabelException

use of org.apache.hadoop.hbase.security.visibility.InvalidLabelException in project hbase by apache.

the class TextSortReducer method reduce.

@Override
protected void reduce(ImmutableBytesWritable rowKey, java.lang.Iterable<Text> lines, Reducer<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue>.Context<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue> context) throws java.io.IOException, InterruptedException {
    // although reduce() is called per-row, handle pathological case
    long threshold = context.getConfiguration().getLong("reducer.row.threshold", 1L * (1 << 30));
    Iterator<Text> iter = lines.iterator();
    while (iter.hasNext()) {
        Set<KeyValue> kvs = new TreeSet<>(CellComparator.COMPARATOR);
        long curSize = 0;
        // stop at the end or the RAM threshold
        while (iter.hasNext() && curSize < threshold) {
            Text line = iter.next();
            byte[] lineBytes = line.getBytes();
            try {
                ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, line.getLength());
                // Retrieve timestamp if exists
                ts = parsed.getTimestamp(ts);
                cellVisibilityExpr = parsed.getCellVisibility();
                ttl = parsed.getCellTTL();
                // create tags for the parsed line
                List<Tag> tags = new ArrayList<>();
                if (cellVisibilityExpr != null) {
                    tags.addAll(kvCreator.getVisibilityExpressionResolver().createVisibilityExpTags(cellVisibilityExpr));
                }
                // into puts
                if (ttl > 0) {
                    tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl)));
                }
                for (int i = 0; i < parsed.getColumnCount(); i++) {
                    if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex()) {
                        continue;
                    }
                    // Creating the KV which needs to be directly written to HFiles. Using the Facade
                    // KVCreator for creation of kvs.
                    Cell cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags);
                    KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
                    kvs.add(kv);
                    curSize += kv.heapSize();
                }
            } catch (ImportTsv.TsvParser.BadTsvLineException | IllegalArgumentException | InvalidLabelException badLine) {
                if (skipBadLines) {
                    System.err.println("Bad line." + badLine.getMessage());
                    incrementBadLineCount(1);
                    continue;
                }
                throw new IOException(badLine);
            }
        }
        context.setStatus("Read " + kvs.size() + " entries of " + kvs.getClass() + "(" + StringUtils.humanReadableInt(curSize) + ")");
        int index = 0;
        for (KeyValue kv : kvs) {
            context.write(rowKey, kv);
            if (++index > 0 && index % 100 == 0)
                context.setStatus("Wrote " + index + " key values.");
        }
        // if we have more entries to process
        if (iter.hasNext()) {
            // force flush because we cannot guarantee intra-row sorted order
            context.write(null, null);
        }
    }
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) InvalidLabelException(org.apache.hadoop.hbase.security.visibility.InvalidLabelException) TreeSet(java.util.TreeSet) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) Tag(org.apache.hadoop.hbase.Tag) Cell(org.apache.hadoop.hbase.Cell)

Example 2 with InvalidLabelException

use of org.apache.hadoop.hbase.security.visibility.InvalidLabelException in project hbase by apache.

the class TextSortReducer method reduce.

@Override
protected void reduce(ImmutableBytesWritable rowKey, java.lang.Iterable<Text> lines, Reducer<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue>.Context context) throws java.io.IOException, InterruptedException {
    // although reduce() is called per-row, handle pathological case
    long threshold = context.getConfiguration().getLong("reducer.row.threshold", 1L * (1 << 30));
    Iterator<Text> iter = lines.iterator();
    while (iter.hasNext()) {
        Set<KeyValue> kvs = new TreeSet<>(CellComparator.getInstance());
        long curSize = 0;
        // stop at the end or the RAM threshold
        while (iter.hasNext() && curSize < threshold) {
            Text line = iter.next();
            byte[] lineBytes = line.getBytes();
            try {
                ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, line.getLength());
                // Retrieve timestamp if exists
                ts = parsed.getTimestamp(ts);
                cellVisibilityExpr = parsed.getCellVisibility();
                ttl = parsed.getCellTTL();
                // create tags for the parsed line
                List<Tag> tags = new ArrayList<>();
                if (cellVisibilityExpr != null) {
                    tags.addAll(kvCreator.getVisibilityExpressionResolver().createVisibilityExpTags(cellVisibilityExpr));
                }
                // into puts
                if (ttl > 0) {
                    tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl)));
                }
                for (int i = 0; i < parsed.getColumnCount(); i++) {
                    if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex()) {
                        continue;
                    }
                    // Creating the KV which needs to be directly written to HFiles. Using the Facade
                    // KVCreator for creation of kvs.
                    Cell cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags);
                    KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
                    kvs.add(kv);
                    curSize += kv.heapSize();
                }
            } catch (ImportTsv.TsvParser.BadTsvLineException | IllegalArgumentException | InvalidLabelException badLine) {
                if (skipBadLines) {
                    System.err.println("Bad line." + badLine.getMessage());
                    incrementBadLineCount(1);
                    continue;
                }
                throw new IOException(badLine);
            }
        }
        context.setStatus("Read " + kvs.size() + " entries of " + kvs.getClass() + "(" + StringUtils.humanReadableInt(curSize) + ")");
        int index = 0;
        for (KeyValue kv : kvs) {
            context.write(rowKey, kv);
            if (++index > 0 && index % 100 == 0)
                context.setStatus("Wrote " + index + " key values.");
        }
        // if we have more entries to process
        if (iter.hasNext()) {
            // force flush because we cannot guarantee intra-row sorted order
            context.write(null, null);
        }
    }
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) InvalidLabelException(org.apache.hadoop.hbase.security.visibility.InvalidLabelException) TreeSet(java.util.TreeSet) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) Tag(org.apache.hadoop.hbase.Tag) Cell(org.apache.hadoop.hbase.Cell)

Example 3 with InvalidLabelException

use of org.apache.hadoop.hbase.security.visibility.InvalidLabelException in project hbase by apache.

the class TsvImporterMapper method map.

/**
 * Convert a line of TSV text into an HBase table row.
 */
@Override
public void map(LongWritable offset, Text value, Context context) throws IOException {
    byte[] lineBytes = value.getBytes();
    try {
        ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength());
        ImmutableBytesWritable rowKey = new ImmutableBytesWritable(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength());
        // Retrieve timestamp if exists
        ts = parsed.getTimestamp(ts);
        cellVisibilityExpr = parsed.getCellVisibility();
        ttl = parsed.getCellTTL();
        // create tags for the parsed line
        if (hfileOutPath != null) {
            tags.clear();
            if (cellVisibilityExpr != null) {
                tags.addAll(kvCreator.getVisibilityExpressionResolver().createVisibilityExpTags(cellVisibilityExpr));
            }
            // into puts
            if (ttl > 0) {
                tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl)));
            }
        }
        Put put = new Put(rowKey.copyBytes());
        for (int i = 0; i < parsed.getColumnCount(); i++) {
            if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex() || (skipEmptyColumns && parsed.getColumnLength(i) == 0)) {
                continue;
            }
            populatePut(lineBytes, parsed, put, i);
        }
        context.write(rowKey, put);
    } catch (ImportTsv.TsvParser.BadTsvLineException | IllegalArgumentException | InvalidLabelException badLine) {
        if (logBadLines) {
            System.err.println(value);
        }
        System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
        if (skipBadLines) {
            incrementBadLineCount(1);
            return;
        }
        throw new IOException(badLine);
    } catch (InterruptedException e) {
        LOG.error("Interrupted while emitting put", e);
        Thread.currentThread().interrupt();
    }
}
Also used : ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) IOException(java.io.IOException) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) Put(org.apache.hadoop.hbase.client.Put) BadTsvLineException(org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException) InvalidLabelException(org.apache.hadoop.hbase.security.visibility.InvalidLabelException)

Aggregations

IOException (java.io.IOException)3 ArrayBackedTag (org.apache.hadoop.hbase.ArrayBackedTag)3 InvalidLabelException (org.apache.hadoop.hbase.security.visibility.InvalidLabelException)3 ArrayList (java.util.ArrayList)2 TreeSet (java.util.TreeSet)2 Cell (org.apache.hadoop.hbase.Cell)2 KeyValue (org.apache.hadoop.hbase.KeyValue)2 Tag (org.apache.hadoop.hbase.Tag)2 Text (org.apache.hadoop.io.Text)2 Put (org.apache.hadoop.hbase.client.Put)1 ImmutableBytesWritable (org.apache.hadoop.hbase.io.ImmutableBytesWritable)1 BadTsvLineException (org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException)1