Search in sources :

Example 6 with SortedMap

use of java.util.SortedMap in project hive by apache.

the class HiveHFileOutputFormat method getHiveRecordWriter.

@Override
public RecordWriter getHiveRecordWriter(final JobConf jc, final Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, final Progressable progressable) throws IOException {
    // Read configuration for the target path, first from jobconf, then from table properties
    String hfilePath = getFamilyPath(jc, tableProperties);
    if (hfilePath == null) {
        throw new RuntimeException("Please set " + HFILE_FAMILY_PATH + " to target location for HFiles");
    }
    // Target path's last component is also the column family name.
    final Path columnFamilyPath = new Path(hfilePath);
    final String columnFamilyName = columnFamilyPath.getName();
    final byte[] columnFamilyNameBytes = Bytes.toBytes(columnFamilyName);
    final Job job = new Job(jc);
    setCompressOutput(job, isCompressed);
    setOutputPath(job, finalOutPath);
    // Create the HFile writer
    final org.apache.hadoop.mapreduce.TaskAttemptContext tac = ShimLoader.getHadoopShims().newTaskAttemptContext(job.getConfiguration(), progressable);
    final Path outputdir = FileOutputFormat.getOutputPath(tac);
    final Path taskAttemptOutputdir = new FileOutputCommitter(outputdir, tac).getWorkPath();
    final org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable, KeyValue> fileWriter = getFileWriter(tac);
    // Individual columns are going to be pivoted to HBase cells,
    // and for each row, they need to be written out in order
    // of column name, so sort the column names now, creating a
    // mapping to their column position.  However, the first
    // column is interpreted as the row key.
    String columnList = tableProperties.getProperty("columns");
    String[] columnArray = columnList.split(",");
    final SortedMap<byte[], Integer> columnMap = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
    int i = 0;
    for (String columnName : columnArray) {
        if (i != 0) {
            columnMap.put(Bytes.toBytes(columnName), i);
        }
        ++i;
    }
    return new RecordWriter() {

        @Override
        public void close(boolean abort) throws IOException {
            try {
                fileWriter.close(null);
                if (abort) {
                    return;
                }
                // Move the hfiles file(s) from the task output directory to the
                // location specified by the user.
                FileSystem fs = outputdir.getFileSystem(jc);
                fs.mkdirs(columnFamilyPath);
                Path srcDir = taskAttemptOutputdir;
                for (; ; ) {
                    FileStatus[] files = fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER);
                    if ((files == null) || (files.length == 0)) {
                        throw new IOException("No family directories found in " + srcDir);
                    }
                    if (files.length != 1) {
                        throw new IOException("Multiple family directories found in " + srcDir);
                    }
                    srcDir = files[0].getPath();
                    if (srcDir.getName().equals(columnFamilyName)) {
                        break;
                    }
                    if (files[0].isFile()) {
                        throw new IOException("No family directories found in " + taskAttemptOutputdir + ". " + "The last component in hfile path should match column family name " + columnFamilyName);
                    }
                }
                for (FileStatus regionFile : fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER)) {
                    fs.rename(regionFile.getPath(), new Path(columnFamilyPath, regionFile.getPath().getName()));
                }
                // Hive actually wants a file as task output (not a directory), so
                // replace the empty directory with an empty file to keep it happy.
                fs.delete(taskAttemptOutputdir, true);
                fs.createNewFile(taskAttemptOutputdir);
            } catch (InterruptedException ex) {
                throw new IOException(ex);
            }
        }

        private void writeText(Text text) throws IOException {
            // Decompose the incoming text row into fields.
            String s = text.toString();
            String[] fields = s.split("");
            assert (fields.length <= (columnMap.size() + 1));
            // First field is the row key.
            byte[] rowKeyBytes = Bytes.toBytes(fields[0]);
            // Remaining fields are cells addressed by column name within row.
            for (Map.Entry<byte[], Integer> entry : columnMap.entrySet()) {
                byte[] columnNameBytes = entry.getKey();
                int iColumn = entry.getValue();
                String val;
                if (iColumn >= fields.length) {
                    // trailing blank field
                    val = "";
                } else {
                    val = fields[iColumn];
                    if ("\\N".equals(val)) {
                        // omit nulls
                        continue;
                    }
                }
                byte[] valBytes = Bytes.toBytes(val);
                KeyValue kv = new KeyValue(rowKeyBytes, columnFamilyNameBytes, columnNameBytes, valBytes);
                try {
                    fileWriter.write(null, kv);
                } catch (IOException e) {
                    LOG.error("Failed while writing row: " + s);
                    throw e;
                } catch (InterruptedException ex) {
                    throw new IOException(ex);
                }
            }
        }

        private void writePut(PutWritable put) throws IOException {
            ImmutableBytesWritable row = new ImmutableBytesWritable(put.getPut().getRow());
            SortedMap<byte[], List<Cell>> cells = put.getPut().getFamilyCellMap();
            for (Map.Entry<byte[], List<Cell>> entry : cells.entrySet()) {
                Collections.sort(entry.getValue(), new CellComparator());
                for (Cell c : entry.getValue()) {
                    try {
                        fileWriter.write(row, KeyValueUtil.copyToNewKeyValue(c));
                    } catch (InterruptedException e) {
                        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
                    }
                }
            }
        }

        @Override
        public void write(Writable w) throws IOException {
            if (w instanceof Text) {
                writeText((Text) w);
            } else if (w instanceof PutWritable) {
                writePut((PutWritable) w);
            } else {
                throw new IOException("Unexpected writable " + w);
            }
        }
    };
}
Also used : InterruptedIOException(java.io.InterruptedIOException) KeyValue(org.apache.hadoop.hbase.KeyValue) FileStatus(org.apache.hadoop.fs.FileStatus) Writable(org.apache.hadoop.io.Writable) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) FileSystem(org.apache.hadoop.fs.FileSystem) List(java.util.List) CellComparator(org.apache.hadoop.hbase.CellComparator) Job(org.apache.hadoop.mapreduce.Job) Cell(org.apache.hadoop.hbase.Cell) Path(org.apache.hadoop.fs.Path) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) Text(org.apache.hadoop.io.Text) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) TreeMap(java.util.TreeMap) Map(java.util.Map) TreeMap(java.util.TreeMap) SortedMap(java.util.SortedMap)

Example 7 with SortedMap

use of java.util.SortedMap in project hive by apache.

the class MockUtils method init.

static HBaseStore init(Configuration conf, HTableInterface htable, final SortedMap<String, Cell> rows) throws IOException {
    ((HiveConf) conf).setVar(ConfVars.METASTORE_EXPRESSION_PROXY_CLASS, NOOPProxy.class.getName());
    Mockito.when(htable.get(Mockito.any(Get.class))).thenAnswer(new Answer<Result>() {

        @Override
        public Result answer(InvocationOnMock invocation) throws Throwable {
            Get get = (Get) invocation.getArguments()[0];
            Cell cell = rows.get(new String(get.getRow()));
            if (cell == null) {
                return new Result();
            } else {
                return Result.create(new Cell[] { cell });
            }
        }
    });
    Mockito.when(htable.get(Mockito.anyListOf(Get.class))).thenAnswer(new Answer<Result[]>() {

        @Override
        public Result[] answer(InvocationOnMock invocation) throws Throwable {
            @SuppressWarnings("unchecked") List<Get> gets = (List<Get>) invocation.getArguments()[0];
            Result[] results = new Result[gets.size()];
            for (int i = 0; i < gets.size(); i++) {
                Cell cell = rows.get(new String(gets.get(i).getRow()));
                Result result;
                if (cell == null) {
                    result = new Result();
                } else {
                    result = Result.create(new Cell[] { cell });
                }
                results[i] = result;
            }
            return results;
        }
    });
    Mockito.when(htable.getScanner(Mockito.any(Scan.class))).thenAnswer(new Answer<ResultScanner>() {

        @Override
        public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
            Scan scan = (Scan) invocation.getArguments()[0];
            List<Result> results = new ArrayList<Result>();
            String start = new String(scan.getStartRow());
            String stop = new String(scan.getStopRow());
            SortedMap<String, Cell> sub = rows.subMap(start, stop);
            for (Map.Entry<String, Cell> e : sub.entrySet()) {
                results.add(Result.create(new Cell[] { e.getValue() }));
            }
            final Iterator<Result> iter = results.iterator();
            return new ResultScanner() {

                @Override
                public Result next() throws IOException {
                    return null;
                }

                @Override
                public Result[] next(int nbRows) throws IOException {
                    return new Result[0];
                }

                @Override
                public void close() {
                }

                @Override
                public Iterator<Result> iterator() {
                    return iter;
                }
            };
        }
    });
    Mockito.doAnswer(new Answer<Void>() {

        @Override
        public Void answer(InvocationOnMock invocation) throws Throwable {
            Put put = (Put) invocation.getArguments()[0];
            rows.put(new String(put.getRow()), put.getFamilyCellMap().firstEntry().getValue().get(0));
            return null;
        }
    }).when(htable).put(Mockito.any(Put.class));
    Mockito.when(htable.checkAndPut(Mockito.any(byte[].class), Mockito.any(byte[].class), Mockito.any(byte[].class), Mockito.any(byte[].class), Mockito.any(Put.class))).thenAnswer(new Answer<Boolean>() {

        @Override
        public Boolean answer(InvocationOnMock invocation) throws Throwable {
            // Always say it succeeded and overwrite
            Put put = (Put) invocation.getArguments()[4];
            rows.put(new String(put.getRow()), put.getFamilyCellMap().firstEntry().getValue().get(0));
            return true;
        }
    });
    Mockito.doAnswer(new Answer<Void>() {

        @Override
        public Void answer(InvocationOnMock invocation) throws Throwable {
            Delete del = (Delete) invocation.getArguments()[0];
            rows.remove(new String(del.getRow()));
            return null;
        }
    }).when(htable).delete(Mockito.any(Delete.class));
    Mockito.when(htable.checkAndDelete(Mockito.any(byte[].class), Mockito.any(byte[].class), Mockito.any(byte[].class), Mockito.any(byte[].class), Mockito.any(Delete.class))).thenAnswer(new Answer<Boolean>() {

        @Override
        public Boolean answer(InvocationOnMock invocation) throws Throwable {
            // Always say it succeeded
            Delete del = (Delete) invocation.getArguments()[4];
            rows.remove(new String(del.getRow()));
            return true;
        }
    });
    // Mock connection
    HBaseConnection hconn = Mockito.mock(HBaseConnection.class);
    Mockito.when(hconn.getHBaseTable(Mockito.anyString())).thenReturn(htable);
    HiveConf.setVar(conf, HiveConf.ConfVars.METASTORE_HBASE_CONNECTION_CLASS, HBaseReadWrite.TEST_CONN);
    HBaseReadWrite.setTestConnection(hconn);
    HBaseReadWrite.setConf(conf);
    HBaseStore store = new HBaseStore();
    store.setConf(conf);
    return store;
}
Also used : Delete(org.apache.hadoop.hbase.client.Delete) Result(org.apache.hadoop.hbase.client.Result) Iterator(java.util.Iterator) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ArrayList(java.util.ArrayList) List(java.util.List) Cell(org.apache.hadoop.hbase.Cell) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) IOException(java.io.IOException) Put(org.apache.hadoop.hbase.client.Put) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Get(org.apache.hadoop.hbase.client.Get) SortedMap(java.util.SortedMap) Scan(org.apache.hadoop.hbase.client.Scan)

Example 8 with SortedMap

use of java.util.SortedMap in project hive by apache.

the class HBaseUtils method hashStorageDescriptor.

/**
   * Produce a hash for the storage descriptor
   * @param sd storage descriptor to hash
   * @param md message descriptor to use to generate the hash
   * @return the hash as a byte array
   */
static byte[] hashStorageDescriptor(StorageDescriptor sd, MessageDigest md) {
    // Note all maps and lists have to be absolutely sorted.  Otherwise we'll produce different
    // results for hashes based on the OS or JVM being used.
    md.reset();
    for (FieldSchema fs : sd.getCols()) {
        md.update(fs.getName().getBytes(ENCODING));
        md.update(fs.getType().getBytes(ENCODING));
        if (fs.getComment() != null)
            md.update(fs.getComment().getBytes(ENCODING));
    }
    if (sd.getInputFormat() != null) {
        md.update(sd.getInputFormat().getBytes(ENCODING));
    }
    if (sd.getOutputFormat() != null) {
        md.update(sd.getOutputFormat().getBytes(ENCODING));
    }
    md.update(sd.isCompressed() ? "true".getBytes(ENCODING) : "false".getBytes(ENCODING));
    md.update(Integer.toString(sd.getNumBuckets()).getBytes(ENCODING));
    if (sd.getSerdeInfo() != null) {
        SerDeInfo serde = sd.getSerdeInfo();
        if (serde.getName() != null) {
            md.update(serde.getName().getBytes(ENCODING));
        }
        if (serde.getSerializationLib() != null) {
            md.update(serde.getSerializationLib().getBytes(ENCODING));
        }
        if (serde.getParameters() != null) {
            SortedMap<String, String> params = new TreeMap<>(serde.getParameters());
            for (Map.Entry<String, String> param : params.entrySet()) {
                md.update(param.getKey().getBytes(ENCODING));
                md.update(param.getValue().getBytes(ENCODING));
            }
        }
    }
    if (sd.getBucketCols() != null) {
        SortedSet<String> bucketCols = new TreeSet<>(sd.getBucketCols());
        for (String bucket : bucketCols) md.update(bucket.getBytes(ENCODING));
    }
    if (sd.getSortCols() != null) {
        SortedSet<Order> orders = new TreeSet<>(sd.getSortCols());
        for (Order order : orders) {
            md.update(order.getCol().getBytes(ENCODING));
            md.update(Integer.toString(order.getOrder()).getBytes(ENCODING));
        }
    }
    if (sd.getSkewedInfo() != null) {
        SkewedInfo skewed = sd.getSkewedInfo();
        if (skewed.getSkewedColNames() != null) {
            SortedSet<String> colnames = new TreeSet<>(skewed.getSkewedColNames());
            for (String colname : colnames) md.update(colname.getBytes(ENCODING));
        }
        if (skewed.getSkewedColValues() != null) {
            SortedSet<String> sortedOuterList = new TreeSet<>();
            for (List<String> innerList : skewed.getSkewedColValues()) {
                SortedSet<String> sortedInnerList = new TreeSet<>(innerList);
                sortedOuterList.add(StringUtils.join(sortedInnerList, "."));
            }
            for (String colval : sortedOuterList) md.update(colval.getBytes(ENCODING));
        }
        if (skewed.getSkewedColValueLocationMaps() != null) {
            SortedMap<String, String> sortedMap = new TreeMap<>();
            for (Map.Entry<List<String>, String> smap : skewed.getSkewedColValueLocationMaps().entrySet()) {
                SortedSet<String> sortedKey = new TreeSet<>(smap.getKey());
                sortedMap.put(StringUtils.join(sortedKey, "."), smap.getValue());
            }
            for (Map.Entry<String, String> e : sortedMap.entrySet()) {
                md.update(e.getKey().getBytes(ENCODING));
                md.update(e.getValue().getBytes(ENCODING));
            }
        }
    }
    return md.digest();
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ByteString(com.google.protobuf.ByteString) TreeMap(java.util.TreeMap) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) TreeSet(java.util.TreeSet) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Example 9 with SortedMap

use of java.util.SortedMap in project hive by apache.

the class HiveRelDecorrelator method decorrelateRel.

/**
   * Rewrite LogicalProject.
   *
   * @param rel the project rel to rewrite
   */
public Frame decorrelateRel(LogicalProject rel) throws SemanticException {
    //
    // Rewrite logic:
    //
    // 1. Pass along any correlated variables coming from the input.
    //
    final RelNode oldInput = rel.getInput();
    Frame frame = getInvoke(oldInput, rel);
    if (frame == null) {
        // If input has not been rewritten, do not rewrite this rel.
        return null;
    }
    final List<RexNode> oldProjects = rel.getProjects();
    final List<RelDataTypeField> relOutput = rel.getRowType().getFieldList();
    // LogicalProject projects the original expressions,
    // plus any correlated variables the input wants to pass along.
    final List<Pair<RexNode, String>> projects = Lists.newArrayList();
    // and produce the correlated variables in the new output.
    if (cm.mapRefRelToCorRef.containsKey(rel)) {
        frame = decorrelateInputWithValueGenerator(rel);
    }
    // LogicalProject projects the original expressions
    final Map<Integer, Integer> mapOldToNewOutputs = new HashMap<>();
    int newPos;
    for (newPos = 0; newPos < oldProjects.size(); newPos++) {
        projects.add(newPos, Pair.of(decorrelateExpr(oldProjects.get(newPos)), relOutput.get(newPos).getName()));
        mapOldToNewOutputs.put(newPos, newPos);
    }
    // Project any correlated variables the input wants to pass along.
    final SortedMap<CorDef, Integer> corDefOutputs = new TreeMap<>();
    for (Map.Entry<CorDef, Integer> entry : frame.corDefOutputs.entrySet()) {
        projects.add(RexInputRef.of2(entry.getValue(), frame.r.getRowType().getFieldList()));
        corDefOutputs.put(entry.getKey(), newPos);
        newPos++;
    }
    RelNode newProject = HiveProject.create(frame.r, Pair.left(projects), Pair.right(projects));
    return register(rel, newProject, mapOldToNewOutputs, corDefOutputs);
}
Also used : HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelNode(org.apache.calcite.rel.RelNode) Map(java.util.Map) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) NavigableMap(java.util.NavigableMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) RexNode(org.apache.calcite.rex.RexNode) Pair(org.apache.calcite.util.Pair)

Example 10 with SortedMap

use of java.util.SortedMap in project hive by apache.

the class HiveRelDecorrelator method decorrelateRel.

public Frame decorrelateRel(HiveJoin rel) throws SemanticException {
    //
    // Rewrite logic:
    //
    // 1. rewrite join condition.
    // 2. map output positions and produce cor vars if any.
    //
    final RelNode oldLeft = rel.getInput(0);
    final RelNode oldRight = rel.getInput(1);
    final Frame leftFrame = getInvoke(oldLeft, rel);
    final Frame rightFrame = getInvoke(oldRight, rel);
    if (leftFrame == null || rightFrame == null) {
        // If any input has not been rewritten, do not rewrite this rel.
        return null;
    }
    final RelNode newJoin = HiveJoin.getJoin(rel.getCluster(), leftFrame.r, rightFrame.r, decorrelateExpr(rel.getCondition()), rel.getJoinType());
    // Create the mapping between the output of the old correlation rel
    // and the new join rel
    Map<Integer, Integer> mapOldToNewOutputs = Maps.newHashMap();
    int oldLeftFieldCount = oldLeft.getRowType().getFieldCount();
    int newLeftFieldCount = leftFrame.r.getRowType().getFieldCount();
    int oldRightFieldCount = oldRight.getRowType().getFieldCount();
    assert rel.getRowType().getFieldCount() == oldLeftFieldCount + oldRightFieldCount;
    // Left input positions are not changed.
    mapOldToNewOutputs.putAll(leftFrame.oldToNewOutputs);
    // Right input positions are shifted by newLeftFieldCount.
    for (int i = 0; i < oldRightFieldCount; i++) {
        mapOldToNewOutputs.put(i + oldLeftFieldCount, rightFrame.oldToNewOutputs.get(i) + newLeftFieldCount);
    }
    final SortedMap<CorDef, Integer> corDefOutputs = new TreeMap<>(leftFrame.corDefOutputs);
    // Right input positions are shifted by newLeftFieldCount.
    for (Map.Entry<CorDef, Integer> entry : rightFrame.corDefOutputs.entrySet()) {
        corDefOutputs.put(entry.getKey(), entry.getValue() + newLeftFieldCount);
    }
    return register(rel, newJoin, mapOldToNewOutputs, corDefOutputs);
}
Also used : RelNode(org.apache.calcite.rel.RelNode) TreeMap(java.util.TreeMap) Map(java.util.Map) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) NavigableMap(java.util.NavigableMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap)

Aggregations

SortedMap (java.util.SortedMap)216 Map (java.util.Map)162 TreeMap (java.util.TreeMap)108 HashMap (java.util.HashMap)59 Iterator (java.util.Iterator)31 NavigableMap (java.util.NavigableMap)31 ArrayList (java.util.ArrayList)27 Test (org.junit.Test)25 IOException (java.io.IOException)20 ImmutableMap (com.google.common.collect.ImmutableMap)19 JASIExpr (org.matheclipse.core.convert.JASIExpr)16 IExpr (org.matheclipse.core.interfaces.IExpr)16 List (java.util.List)15 File (java.io.File)14 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)14 ImmutableSortedMap (com.google.common.collect.ImmutableSortedMap)12 Entry (java.util.Map.Entry)12 ConcurrentNavigableMap (java.util.concurrent.ConcurrentNavigableMap)12 ConcurrentMap (java.util.concurrent.ConcurrentMap)11 LinkedHashMap (java.util.LinkedHashMap)9