Example 36 with LinkedList

use of java.util.LinkedList in project hive by apache.

the class Hive method trashFiles.

   * Trashes or deletes all files under a directory. Leaves the directory as is.
   * @param fs FileSystem to use
   * @param statuses fileStatuses of files to be deleted
   * @param conf hive configuration
   * @return true if deletion successful
   * @throws IOException
public static boolean trashFiles(final FileSystem fs, final FileStatus[] statuses, final Configuration conf) throws IOException {
    boolean result = true;
    if (statuses == null || statuses.length == 0) {
        return false;
    final List<Future<Boolean>> futures = new LinkedList<>();
    final ExecutorService pool = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25) > 0 ? Executors.newFixedThreadPool(conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25), new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Delete-Thread-%d").build()) : null;
    final SessionState parentSession = SessionState.get();
    for (final FileStatus status : statuses) {
        if (null == pool) {
            result &= FileUtils.moveToTrash(fs, status.getPath(), conf);
        } else {
            futures.add(pool.submit(new Callable<Boolean>() {

                public Boolean call() throws Exception {
                    return FileUtils.moveToTrash(fs, status.getPath(), conf);
    if (null != pool) {
        for (Future<Boolean> future : futures) {
            try {
                result &= future.get();
            } catch (InterruptedException | ExecutionException e) {
                LOG.error("Failed to delete: ", e);
                throw new IOException(e);
    return result;
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) FileStatus(org.apache.hadoop.fs.FileStatus) IOException( LinkedList(java.util.LinkedList) Callable(java.util.concurrent.Callable) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) ThreadFactoryBuilder( ExecutionException(java.util.concurrent.ExecutionException)

Example 37 with LinkedList

use of java.util.LinkedList in project hive by apache.

the class DecimalColumnStatsAggregator method extrapolate.

public void extrapolate(ColumnStatisticsData extrapolateData, int numParts, int numPartsWithStats, Map<String, Double> adjustedIndexMap, Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
    int rightBorderInd = numParts;
    DecimalColumnStatsData extrapolateDecimalData = new DecimalColumnStatsData();
    Map<String, DecimalColumnStatsData> extractedAdjustedStatsMap = new HashMap<>();
    for (Map.Entry<String, ColumnStatisticsData> entry : adjustedStatsMap.entrySet()) {
        extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getDecimalStats());
    List<Map.Entry<String, DecimalColumnStatsData>> list = new LinkedList<Map.Entry<String, DecimalColumnStatsData>>(extractedAdjustedStatsMap.entrySet());
    // get the lowValue
    Collections.sort(list, new Comparator<Map.Entry<String, DecimalColumnStatsData>>() {

        public int compare(Map.Entry<String, DecimalColumnStatsData> o1, Map.Entry<String, DecimalColumnStatsData> o2) {
            return o1.getValue().getLowValue().compareTo(o2.getValue().getLowValue());
    double minInd = adjustedIndexMap.get(list.get(0).getKey());
    double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
    double lowValue = 0;
    double min = HBaseUtils.getDoubleValue(list.get(0).getValue().getLowValue());
    double max = HBaseUtils.getDoubleValue(list.get(list.size() - 1).getValue().getLowValue());
    if (minInd == maxInd) {
        lowValue = min;
    } else if (minInd < maxInd) {
        // left border is the min
        lowValue = (max - (max - min) * maxInd / (maxInd - minInd));
    } else {
        // right border is the min
        lowValue = (max - (max - min) * (rightBorderInd - maxInd) / (minInd - maxInd));
    // get the highValue
    Collections.sort(list, new Comparator<Map.Entry<String, DecimalColumnStatsData>>() {

        public int compare(Map.Entry<String, DecimalColumnStatsData> o1, Map.Entry<String, DecimalColumnStatsData> o2) {
            return o1.getValue().getHighValue().compareTo(o2.getValue().getHighValue());
    minInd = adjustedIndexMap.get(list.get(0).getKey());
    maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
    double highValue = 0;
    min = HBaseUtils.getDoubleValue(list.get(0).getValue().getHighValue());
    max = HBaseUtils.getDoubleValue(list.get(list.size() - 1).getValue().getHighValue());
    if (minInd == maxInd) {
        highValue = min;
    } else if (minInd < maxInd) {
        // right border is the max
        highValue = (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd));
    } else {
        // left border is the max
        highValue = (min + (max - min) * minInd / (minInd - maxInd));
    // get the #nulls
    long numNulls = 0;
    for (Map.Entry<String, DecimalColumnStatsData> entry : extractedAdjustedStatsMap.entrySet()) {
        numNulls += entry.getValue().getNumNulls();
    // we scale up sumNulls based on the number of partitions
    numNulls = numNulls * numParts / numPartsWithStats;
    // get the ndv
    long ndv = 0;
    long ndvMin = 0;
    long ndvMax = 0;
    Collections.sort(list, new Comparator<Map.Entry<String, DecimalColumnStatsData>>() {

        public int compare(Map.Entry<String, DecimalColumnStatsData> o1, Map.Entry<String, DecimalColumnStatsData> o2) {
            return o1.getValue().getNumDVs() < o2.getValue().getNumDVs() ? -1 : 1;
    long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();
    long higherBound = 0;
    for (Map.Entry<String, DecimalColumnStatsData> entry : list) {
        higherBound += entry.getValue().getNumDVs();
    if (useDensityFunctionForNDVEstimation && densityAvg != 0.0) {
        ndv = (long) ((highValue - lowValue) / densityAvg);
        if (ndv < lowerBound) {
            ndv = lowerBound;
        } else if (ndv > higherBound) {
            ndv = higherBound;
    } else {
        minInd = adjustedIndexMap.get(list.get(0).getKey());
        maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
        ndvMin = list.get(0).getValue().getNumDVs();
        ndvMax = list.get(list.size() - 1).getValue().getNumDVs();
        if (minInd == maxInd) {
            ndv = ndvMin;
        } else if (minInd < maxInd) {
            // right border is the max
            ndv = (long) (ndvMin + (ndvMax - ndvMin) * (rightBorderInd - minInd) / (maxInd - minInd));
        } else {
            // left border is the max
            ndv = (long) (ndvMin + (ndvMax - ndvMin) * minInd / (minInd - maxInd));
Also used : HashMap(java.util.HashMap) LinkedList(java.util.LinkedList) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) HashMap(java.util.HashMap) Map(java.util.Map) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 38 with LinkedList

use of java.util.LinkedList in project hive by apache.

the class TestDateWritable method testDaylightSavingsTime.

public void testDaylightSavingsTime() throws Exception {
    LinkedList<DtMismatch> bad = new LinkedList<>();
    for (String timeZone : TimeZone.getAvailableIDs()) {
        TimeZone previousDefault = TimeZone.getDefault();
        assertEquals("Default timezone should now be " + timeZone, timeZone, TimeZone.getDefault().getID());
        ExecutorService threadPool = Executors.newFixedThreadPool(1);
        try {
            // TODO: pointless
            threadPool.submit(new DateTestCallable(bad, timeZone)).get();
        } finally {
    StringBuilder errors = new StringBuilder("\nDATE MISMATCH:\n");
    for (DtMismatch dm : bad) {
        errors.append("E ").append(": ").append(dm.expected).append(" != ").append(dm.found).append("\n");
    if (!bad.isEmpty())
        throw new Exception(bad.size() + " mismatches, see logs");
Also used : TimeZone(java.util.TimeZone) ExecutorService(java.util.concurrent.ExecutorService) LinkedList(java.util.LinkedList) ExecutionException(java.util.concurrent.ExecutionException)

Example 39 with LinkedList

use of java.util.LinkedList in project storm by apache.

the class IsolationScheduler method hostAssignableSlots.

// returns list of list of slots, reverse sorted by number of slots
private LinkedList<HostAssignableSlots> hostAssignableSlots(Cluster cluster) {
    List<WorkerSlot> assignableSlots = cluster.getAssignableSlots();
    Map<String, List<WorkerSlot>> hostAssignableSlots = new HashMap<String, List<WorkerSlot>>();
    for (WorkerSlot slot : assignableSlots) {
        String host = cluster.getHost(slot.getNodeId());
        List<WorkerSlot> slots = hostAssignableSlots.get(host);
        if (slots == null) {
            slots = new ArrayList<WorkerSlot>();
            hostAssignableSlots.put(host, slots);
    List<HostAssignableSlots> sortHostAssignSlots = new ArrayList<HostAssignableSlots>();
    for (Map.Entry<String, List<WorkerSlot>> entry : hostAssignableSlots.entrySet()) {
        sortHostAssignSlots.add(new HostAssignableSlots(entry.getKey(), entry.getValue()));
    Collections.sort(sortHostAssignSlots, new Comparator<HostAssignableSlots>() {

        public int compare(HostAssignableSlots o1, HostAssignableSlots o2) {
            return o2.getWorkerSlots().size() - o1.getWorkerSlots().size();
    return new LinkedList<HostAssignableSlots>(sortHostAssignSlots);
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) WorkerSlot(org.apache.storm.scheduler.WorkerSlot) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) Map(java.util.Map)

Example 40 with LinkedList

use of java.util.LinkedList in project storm by apache.

the class DefaultResourceAwareStrategy method sortRacks.

     * Sort racks
     * @param topoId                topology id
     * @param scheduleAssignmentMap calculated assignments so far
     * @return a sorted list of racks
     * Racks are sorted by two criteria. 1) the number executors of the topology that needs to be scheduled is already on the rack in descending order.
     * The reasoning to sort based on  criterion 1 is so we schedule the rest of a topology on the same rack as the existing executors of the topology.
     * 2) the subordinate/subservient resource availability percentage of a rack in descending order
     * We calculate the resource availability percentage by dividing the resource availability on the rack by the resource availability of the entire cluster
     * By doing this calculation, racks that have exhausted or little of one of the resources mentioned above will be ranked after racks that have more balanced resource availability.
     * So we will be less likely to pick a rack that have a lot of one resource but a low amount of another.
TreeSet<ObjectResources> sortRacks(final String topoId, final Map<WorkerSlot, Collection<ExecutorDetails>> scheduleAssignmentMap) {
    AllResources allResources = new AllResources("Cluster");
    List<ObjectResources> racks = allResources.objectResources;
    final Map<String, String> nodeIdToRackId = new HashMap<String, String>();
    for (Map.Entry<String, List<String>> entry : _clusterInfo.entrySet()) {
        String rackId = entry.getKey();
        List<String> nodeIds = entry.getValue();
        ObjectResources rack = new ObjectResources(rackId);
        for (String nodeId : nodeIds) {
            RAS_Node node = _nodes.getNodeById(this.NodeHostnameToId(nodeId));
            double availMem = node.getAvailableMemoryResources();
            double availCpu = node.getAvailableCpuResources();
            double totalMem = node.getTotalMemoryResources();
            double totalCpu = node.getTotalCpuResources();
            rack.availMem += availMem;
            rack.totalMem += totalMem;
            rack.availCpu += availCpu;
            rack.totalCpu += totalCpu;
            allResources.availMemResourcesOverall += availMem;
            allResources.availCpuResourcesOverall += availCpu;
            allResources.totalMemResourcesOverall += totalMem;
            allResources.totalCpuResourcesOverall += totalCpu;
    LOG.debug("Cluster Overall Avail [ CPU {} MEM {} ] Total [ CPU {} MEM {} ]", allResources.availCpuResourcesOverall, allResources.availMemResourcesOverall, allResources.totalCpuResourcesOverall, allResources.totalMemResourcesOverall);
    return sortObjectResources(allResources, new ExistingScheduleFunc() {

        public int getNumExistingSchedule(String objectId) {
            String rackId = objectId;
            //Get execs already assigned in rack
            Collection<ExecutorDetails> execs = new LinkedList<ExecutorDetails>();
            if (_cluster.getAssignmentById(topoId) != null) {
                for (Map.Entry<ExecutorDetails, WorkerSlot> entry : _cluster.getAssignmentById(topoId).getExecutorToSlot().entrySet()) {
                    String nodeId = entry.getValue().getNodeId();
                    String hostname = idToNode(nodeId).getHostname();
                    ExecutorDetails exec = entry.getKey();
                    if (nodeIdToRackId.get(hostname) != null && nodeIdToRackId.get(hostname).equals(rackId)) {
            // get execs already scheduled in the current scheduling
            for (Map.Entry<WorkerSlot, Collection<ExecutorDetails>> entry : scheduleAssignmentMap.entrySet()) {
                WorkerSlot workerSlot = entry.getKey();
                String nodeId = workerSlot.getNodeId();
                String hostname = idToNode(nodeId).getHostname();
                if (nodeIdToRackId.get(hostname).equals(rackId)) {
            return execs.size();
Also used : ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) HashMap(java.util.HashMap) RAS_Node(org.apache.storm.scheduler.resource.RAS_Node) WorkerSlot(org.apache.storm.scheduler.WorkerSlot) Collection(java.util.Collection) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)


