Search in sources :

Example 26 with Entry

use of java.util.Map.Entry in project hive by apache.

the class TestHiveAccumuloTableOutputFormat method testBinarySerializationOnStringFallsBackToUtf8.

@Test
public void testBinarySerializationOnStringFallsBackToUtf8() throws Exception {
    Instance inst = new MockInstance(test.getMethodName());
    Connector conn = inst.getConnector("root", new PasswordToken(""));
    HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat();
    String table = test.getMethodName();
    conn.tableOperations().create(table);
    JobConf conf = new JobConf();
    conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName());
    conf.set(AccumuloConnectionParameters.USER_NAME, "root");
    conf.set(AccumuloConnectionParameters.USER_PASS, "");
    conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true);
    conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName());
    FileSystem local = FileSystem.getLocal(conf);
    outputFormat.checkOutputSpecs(local, conf);
    RecordWriter<Text, Mutation> recordWriter = outputFormat.getRecordWriter(local, conf, null, null);
    List<String> names = Arrays.asList("row", "col1", "col2");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
    Properties tableProperties = new Properties();
    tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq1,cf:cq2");
    tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names));
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
    tableProperties.setProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, ColumnEncoding.BINARY.getName());
    AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
    LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
    AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, accumuloSerDeParams.getRowIdFactory());
    TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
    LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(Arrays.asList("row", "cq1", "cq2"), Arrays.asList(stringTypeInfo, stringTypeInfo, stringTypeInfo), serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI);
    ByteArrayRef bytes = new ByteArrayRef();
    bytes.setData("row value1 value2".getBytes());
    struct.init(bytes, 0, bytes.getData().length);
    // Serialize the struct into a mutation
    Mutation m = serializer.serialize(struct, structOI);
    // Write the mutation
    recordWriter.write(new Text(table), m);
    // Close the writer
    recordWriter.close(null);
    Iterator<Entry<Key, Value>> iter = conn.createScanner(table, new Authorizations()).iterator();
    Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
    Entry<Key, Value> entry = iter.next();
    Key k = entry.getKey();
    Value v = entry.getValue();
    Assert.assertEquals("row", k.getRow().toString());
    Assert.assertEquals("cf", k.getColumnFamily().toString());
    Assert.assertEquals("cq1", k.getColumnQualifier().toString());
    Assert.assertEquals("", k.getColumnVisibility().toString());
    Assert.assertEquals("value1", new String(v.get()));
    Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
    entry = iter.next();
    k = entry.getKey();
    v = entry.getValue();
    Assert.assertEquals("row", k.getRow().toString());
    Assert.assertEquals("cf", k.getColumnFamily().toString());
    Assert.assertEquals("cq2", k.getColumnQualifier().toString());
    Assert.assertEquals("", k.getColumnVisibility().toString());
    Assert.assertEquals("value2", new String(v.get()));
    Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext());
}
Also used : Connector(org.apache.accumulo.core.client.Connector) Configuration(org.apache.hadoop.conf.Configuration) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) Instance(org.apache.accumulo.core.client.Instance) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) Properties(java.util.Properties) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) Entry(java.util.Map.Entry) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf) AccumuloRowSerializer(org.apache.hadoop.hive.accumulo.serde.AccumuloRowSerializer) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) Authorizations(org.apache.accumulo.core.security.Authorizations) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) Text(org.apache.hadoop.io.Text) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) AccumuloSerDe(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe) AccumuloSerDeParameters(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) Value(org.apache.accumulo.core.data.Value) Mutation(org.apache.accumulo.core.data.Mutation) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 27 with Entry

use of java.util.Map.Entry in project hive by apache.

the class ReaderWriter method writeDatum.

public static void writeDatum(DataOutput out, Object val) throws IOException {
    // write the data type
    byte type = DataType.findType(val);
    out.write(type);
    switch(type) {
        case DataType.LIST:
            List<?> list = (List<?>) val;
            int sz = list.size();
            out.writeInt(sz);
            for (int i = 0; i < sz; i++) {
                writeDatum(out, list.get(i));
            }
            return;
        case DataType.MAP:
            Map<?, ?> m = (Map<?, ?>) val;
            out.writeInt(m.size());
            Iterator<?> i = m.entrySet().iterator();
            while (i.hasNext()) {
                Entry<?, ?> entry = (Entry<?, ?>) i.next();
                writeDatum(out, entry.getKey());
                writeDatum(out, entry.getValue());
            }
            return;
        case DataType.INTEGER:
            new VIntWritable((Integer) val).write(out);
            return;
        case DataType.LONG:
            new VLongWritable((Long) val).write(out);
            return;
        case DataType.FLOAT:
            out.writeFloat((Float) val);
            return;
        case DataType.DOUBLE:
            out.writeDouble((Double) val);
            return;
        case DataType.BOOLEAN:
            out.writeBoolean((Boolean) val);
            return;
        case DataType.BYTE:
            out.writeByte((Byte) val);
            return;
        case DataType.SHORT:
            out.writeShort((Short) val);
            return;
        case DataType.STRING:
            String s = (String) val;
            byte[] utfBytes = s.getBytes(ReaderWriter.UTF8);
            out.writeInt(utfBytes.length);
            out.write(utfBytes);
            return;
        case DataType.BINARY:
            byte[] ba = (byte[]) val;
            out.writeInt(ba.length);
            out.write(ba);
            return;
        case DataType.NULL:
            //for NULL we just write out the type
            return;
        case DataType.CHAR:
            new HiveCharWritable((HiveChar) val).write(out);
            return;
        case DataType.VARCHAR:
            new HiveVarcharWritable((HiveVarchar) val).write(out);
            return;
        case DataType.DECIMAL:
            new HiveDecimalWritable((HiveDecimal) val).write(out);
            return;
        case DataType.DATE:
            new DateWritable((Date) val).write(out);
            return;
        case DataType.TIMESTAMP:
            new TimestampWritable((java.sql.Timestamp) val).write(out);
            return;
        default:
            throw new IOException("Unexpected data type " + type + " found in stream.");
    }
}
Also used : VIntWritable(org.apache.hadoop.io.VIntWritable) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) Entry(java.util.Map.Entry) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) ArrayList(java.util.ArrayList) List(java.util.List) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) IOException(java.io.IOException) Date(java.sql.Date) VLongWritable(org.apache.hadoop.io.VLongWritable) HashMap(java.util.HashMap) Map(java.util.Map)

Example 28 with Entry

use of java.util.Map.Entry in project hive by apache.

the class LlapTaskSchedulerService method schedulePendingTasks.

@VisibleForTesting
protected void schedulePendingTasks() {
    writeLock.lock();
    try {
        if (LOG.isDebugEnabled()) {
            LOG.debug("ScheduleRun: {}", constructPendingTaskCountsLogMessage());
        }
        Iterator<Entry<Priority, List<TaskInfo>>> pendingIterator = pendingTasks.entrySet().iterator();
        Resource totalResource = getTotalResources();
        while (pendingIterator.hasNext()) {
            Entry<Priority, List<TaskInfo>> entry = pendingIterator.next();
            List<TaskInfo> taskListAtPriority = entry.getValue();
            Iterator<TaskInfo> taskIter = taskListAtPriority.iterator();
            boolean scheduledAllAtPriority = true;
            while (taskIter.hasNext()) {
                // TODO Optimization: Add a check to see if there's any capacity available. No point in
                // walking through all active nodes, if they don't have potential capacity.
                TaskInfo taskInfo = taskIter.next();
                if (taskInfo.getNumPreviousAssignAttempts() == 1) {
                    dagStats.registerDelayedAllocation();
                }
                taskInfo.triedAssigningTask();
                ScheduleResult scheduleResult = scheduleTask(taskInfo, totalResource);
                if (LOG.isDebugEnabled()) {
                    LOG.debug("ScheduleResult for Task: {} = {}", taskInfo, scheduleResult);
                }
                if (scheduleResult == ScheduleResult.SCHEDULED) {
                    taskIter.remove();
                } else {
                    if (scheduleResult == ScheduleResult.INADEQUATE_TOTAL_RESOURCES) {
                        LOG.info("Inadequate total resources before scheduling pending tasks." + " Signalling scheduler timeout monitor thread to start timer.");
                        startTimeoutMonitor();
                    // TODO Nothing else should be done for this task. Move on.
                    }
                    // Try pre-empting a task so that a higher priority task can take it's place.
                    // Preempt only if there's no pending preemptions to avoid preempting twice for a task.
                    String[] potentialHosts;
                    if (scheduleResult == ScheduleResult.DELAYED_LOCALITY) {
                        // Add the task to the delayed task queue if it does not already exist.
                        maybeAddToDelayedTaskQueue(taskInfo);
                        // Try preempting a lower priority task in any case.
                        // preempt only on specific hosts, if no preemptions already exist on those.
                        potentialHosts = taskInfo.requestedHosts;
                        //Protect against a bad location being requested.
                        if (potentialHosts == null || potentialHosts.length == 0) {
                            potentialHosts = null;
                        }
                    } else {
                        // preempt on any host.
                        potentialHosts = null;
                    }
                    // At this point we're dealing with all return types, except ScheduleResult.SCHEDULED.
                    if (potentialHosts != null) {
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Attempting to preempt on requested host for task={}, potentialHosts={}", taskInfo, Arrays.toString(potentialHosts));
                        }
                        // Preempt on specific host
                        boolean shouldPreempt = true;
                        for (String host : potentialHosts) {
                            // Preempt only if there are no pending preemptions on the same host
                            // When the premption registers, the request at the highest priority will be given the slot,
                            // even if the initial preemption was caused by some other task.
                            // TODO Maybe register which task the preemption was for, to avoid a bad non-local allocation.
                            MutableInt pendingHostPreemptions = pendingPreemptionsPerHost.get(host);
                            if (pendingHostPreemptions != null && pendingHostPreemptions.intValue() > 0) {
                                shouldPreempt = false;
                                LOG.debug("Not preempting for task={}. Found an existing preemption request on host={}, pendingPreemptionCount={}", taskInfo.task, host, pendingHostPreemptions.intValue());
                                break;
                            }
                        }
                        if (shouldPreempt) {
                            if (LOG.isDebugEnabled()) {
                                LOG.debug("Attempting to preempt for {} on potential hosts={}. TotalPendingPreemptions={}", taskInfo.task, Arrays.toString(potentialHosts), pendingPreemptions.get());
                            }
                            preemptTasks(entry.getKey().getPriority(), 1, potentialHosts);
                        } else {
                            if (LOG.isDebugEnabled()) {
                                LOG.debug("Not preempting for {} on potential hosts={}. An existing preemption request exists", taskInfo.task, Arrays.toString(potentialHosts));
                            }
                        }
                    } else {
                        // Either DELAYED_RESOURCES or DELAYED_LOCALITY with an unknown requested host.
                        // Request for a preemption if there's none pending. If a single preemption is pending,
                        // and this is the next task to be assigned, it will be assigned once that slot becomes available.
                        LOG.debug("Attempting to preempt on any host for task={}, pendingPreemptions={}", taskInfo.task, pendingPreemptions.get());
                        if (pendingPreemptions.get() == 0) {
                            if (LOG.isDebugEnabled()) {
                                LOG.debug("Attempting to preempt for task={}, priority={} on any available host", taskInfo.task, taskInfo.priority);
                            }
                            preemptTasks(entry.getKey().getPriority(), 1, null);
                        } else {
                            if (LOG.isDebugEnabled()) {
                                LOG.debug("Skipping preemption since there are {} pending preemption request. For task={}", pendingPreemptions.get(), taskInfo);
                            }
                        }
                    }
                    // Since there was an allocation failure - don't try assigning tasks at the next priority.
                    scheduledAllAtPriority = false;
                    // Don't break if this allocation failure was a result of a LOCALITY_DELAY. Others could still be allocated.
                    if (scheduleResult != ScheduleResult.DELAYED_LOCALITY) {
                        break;
                    }
                }
            // end of else - i.e. could not allocate
            }
            // end of loop over pending tasks
            if (taskListAtPriority.isEmpty()) {
                // Remove the entry, if there's nothing left at the specific priority level
                pendingIterator.remove();
            }
            if (!scheduledAllAtPriority) {
                LOG.debug("Unable to schedule all requests at priority={}. Skipping subsequent priority levels", entry.getKey());
                // Don't attempt scheduling for additional priorities
                break;
            }
        }
    } finally {
        writeLock.unlock();
    }
}
Also used : Priority(org.apache.hadoop.yarn.api.records.Priority) Resource(org.apache.hadoop.yarn.api.records.Resource) Entry(java.util.Map.Entry) MutableInt(org.apache.commons.lang.mutable.MutableInt) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 29 with Entry

use of java.util.Map.Entry in project hive by apache.

the class TypedBytesOutput method writeMap.

/**
   * Writes a map as a typed bytes sequence.
   *
   * @param map
   *          the map to be written
   * @throws IOException
   */
@SuppressWarnings("unchecked")
public void writeMap(Map map) throws IOException {
    writeMapHeader(map.size());
    Set<Entry> entries = map.entrySet();
    for (Entry entry : entries) {
        write(entry.getKey());
        write(entry.getValue());
    }
}
Also used : Entry(java.util.Map.Entry)

Example 30 with Entry

use of java.util.Map.Entry in project hive by apache.

the class TestReaderWriter method test.

@Test
public void test() throws MetaException, CommandNeedRetryException, IOException, ClassNotFoundException {
    driver.run("drop table mytbl");
    driver.run("create table mytbl (a string, b int)");
    Iterator<Entry<String, String>> itr = hiveConf.iterator();
    Map<String, String> map = new HashMap<String, String>();
    while (itr.hasNext()) {
        Entry<String, String> kv = itr.next();
        map.put(kv.getKey(), kv.getValue());
    }
    WriterContext cntxt = runsInMaster(map);
    File writeCntxtFile = File.createTempFile("hcat-write", "temp");
    writeCntxtFile.deleteOnExit();
    // Serialize context.
    ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(writeCntxtFile));
    oos.writeObject(cntxt);
    oos.flush();
    oos.close();
    // Now, deserialize it.
    ObjectInputStream ois = new ObjectInputStream(new FileInputStream(writeCntxtFile));
    cntxt = (WriterContext) ois.readObject();
    ois.close();
    runsInSlave(cntxt);
    commit(map, true, cntxt);
    ReaderContext readCntxt = runsInMaster(map, false);
    File readCntxtFile = File.createTempFile("hcat-read", "temp");
    readCntxtFile.deleteOnExit();
    oos = new ObjectOutputStream(new FileOutputStream(readCntxtFile));
    oos.writeObject(readCntxt);
    oos.flush();
    oos.close();
    ois = new ObjectInputStream(new FileInputStream(readCntxtFile));
    readCntxt = (ReaderContext) ois.readObject();
    ois.close();
    for (int i = 0; i < readCntxt.numSplits(); i++) {
        runsInSlave(readCntxt, i);
    }
}
Also used : HashMap(java.util.HashMap) ObjectOutputStream(java.io.ObjectOutputStream) FileInputStream(java.io.FileInputStream) WriterContext(org.apache.hive.hcatalog.data.transfer.WriterContext) Entry(java.util.Map.Entry) FileOutputStream(java.io.FileOutputStream) ReaderContext(org.apache.hive.hcatalog.data.transfer.ReaderContext) File(java.io.File) ObjectInputStream(java.io.ObjectInputStream) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Aggregations

Entry (java.util.Map.Entry)1041 HashMap (java.util.HashMap)295 Map (java.util.Map)288 ArrayList (java.util.ArrayList)258 List (java.util.List)177 Iterator (java.util.Iterator)113 IOException (java.io.IOException)109 Test (org.junit.Test)77 Set (java.util.Set)68 LinkedHashMap (java.util.LinkedHashMap)64 HashSet (java.util.HashSet)62 File (java.io.File)56 Collection (java.util.Collection)42 TreeMap (java.util.TreeMap)36 Properties (java.util.Properties)35 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)35 TestSuite (junit.framework.TestSuite)33 LinkedList (java.util.LinkedList)31 NamedIcon (jmri.jmrit.catalog.NamedIcon)28 Collectors (java.util.stream.Collectors)27