Search in sources :

Example 61 with MutablePair

use of org.apache.commons.lang3.tuple.MutablePair in project shifu by ShifuML.

the class TreeModel method getFeatureImportances.

/**
 * Get feature importance of current model.
 *
 * @return map of feature importance, key is column index.
 */
public Map<Integer, MutablePair<String, Double>> getFeatureImportances() {
    Map<Integer, MutablePair<String, Double>> importancesSum = new HashMap<Integer, MutablePair<String, Double>>();
    Map<Integer, String> nameMapping = this.getIndependentTreeModel().getNumNameMapping();
    int treeSize = this.getIndependentTreeModel().getTrees().size();
    // such case we only support treeModel is one element list
    if (this.getIndependentTreeModel().getTrees().size() != 1) {
        throw new RuntimeException("Bagging model cannot be supported in Tree Model one element feature importance computing.");
    }
    for (TreeNode tree : this.getIndependentTreeModel().getTrees().get(0)) {
        // get current tree importance at first
        Map<Integer, Double> subImportances = tree.computeFeatureImportance();
        // merge feature importance from different trees
        for (Entry<Integer, Double> entry : subImportances.entrySet()) {
            String featureName = nameMapping.get(entry.getKey());
            MutablePair<String, Double> importance = MutablePair.of(featureName, entry.getValue());
            if (!importancesSum.containsKey(entry.getKey())) {
                importance.setValue(importance.getValue() / treeSize);
                importancesSum.put(entry.getKey(), importance);
            } else {
                MutablePair<String, Double> current = importancesSum.get(entry.getKey());
                current.setValue(current.getValue() + importance.getValue() / treeSize);
                importancesSum.put(entry.getKey(), current);
            }
        }
    }
    return importancesSum;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) MutablePair(org.apache.commons.lang3.tuple.MutablePair) TreeNode(ml.shifu.shifu.core.dtrain.dt.TreeNode)

Example 62 with MutablePair

use of org.apache.commons.lang3.tuple.MutablePair in project openlmis-stockmanagement by OpenLMIS.

the class Resource2Db method resourceCsvToBatchedPair.

/*
   converts a Resource which is a CSV, into a Pair where Pair.left is the SQL column names,
   and Pair.right is the rows of data which go into those columns (each row is an array, the array
   matches the order of the columns
   */
Pair<List<String>, List<Object[]>> resourceCsvToBatchedPair(final Resource resource) throws IOException {
    XLOGGER.entry(resource.getDescription());
    // parse CSV
    try (InputStreamReader isReader = new InputStreamReader(resource.getInputStream())) {
        CSVParser parser = CSVFormat.DEFAULT.withHeader().withNullString("").parse(isReader);
        // read header row
        MutablePair<List<String>, List<Object[]>> readData = new MutablePair<>();
        readData.setLeft(new ArrayList<>(parser.getHeaderMap().keySet()));
        XLOGGER.info("Read header: " + readData.getLeft());
        // read data rows
        List<Object[]> rows = new ArrayList<>();
        for (CSVRecord record : parser.getRecords()) {
            if (!record.isConsistent()) {
                throw new IllegalArgumentException("CSV record inconsistent: " + record);
            }
            List theRow = IteratorUtils.toList(record.iterator());
            rows.add(theRow.toArray());
        }
        readData.setRight(rows);
        XLOGGER.exit("Records read: " + readData.getRight().size());
        return readData;
    }
}
Also used : MutablePair(org.apache.commons.lang3.tuple.MutablePair) InputStreamReader(java.io.InputStreamReader) CSVParser(org.apache.commons.csv.CSVParser) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) CSVRecord(org.apache.commons.csv.CSVRecord)

Example 63 with MutablePair

use of org.apache.commons.lang3.tuple.MutablePair in project DataX by alibaba.

the class HdfsHelper method transportOneRecord.

public static MutablePair<Text, Boolean> transportOneRecord(Record record, char fieldDelimiter, List<Configuration> columnsConfiguration, TaskPluginCollector taskPluginCollector) {
    MutablePair<List<Object>, Boolean> transportResultList = transportOneRecord(record, columnsConfiguration, taskPluginCollector);
    // 保存<转换后的数据,是否是脏数据>
    MutablePair<Text, Boolean> transportResult = new MutablePair<Text, Boolean>();
    transportResult.setRight(false);
    if (null != transportResultList) {
        Text recordResult = new Text(StringUtils.join(transportResultList.getLeft(), fieldDelimiter));
        transportResult.setRight(transportResultList.getRight());
        transportResult.setLeft(recordResult);
    }
    return transportResult;
}
Also used : MutablePair(org.apache.commons.lang3.tuple.MutablePair) Text(org.apache.hadoop.io.Text)

Example 64 with MutablePair

use of org.apache.commons.lang3.tuple.MutablePair in project hive by apache.

the class DateIntervalSplitter method getIntervals.

@Override
public List<MutablePair<String, String>> getIntervals(String lowerBound, String upperBound, int numPartitions, TypeInfo typeInfo) {
    List<MutablePair<String, String>> intervals = new ArrayList<>();
    Date dateLower = Date.valueOf(lowerBound);
    Date dateUpper = Date.valueOf(upperBound);
    double dateInterval = (dateUpper.getTime() - dateLower.getTime()) / (double) numPartitions;
    Date splitDateLower, splitDateUpper;
    for (int i = 0; i < numPartitions; i++) {
        splitDateLower = new Date(Math.round(dateLower.getTime() + dateInterval * i));
        splitDateUpper = new Date(Math.round(dateLower.getTime() + dateInterval * (i + 1)));
        if (splitDateLower.compareTo(splitDateUpper) < 0) {
            intervals.add(new MutablePair<String, String>(splitDateLower.toString(), splitDateUpper.toString()));
        }
    }
    return intervals;
}
Also used : MutablePair(org.apache.commons.lang3.tuple.MutablePair) ArrayList(java.util.ArrayList) Date(java.sql.Date)

Example 65 with MutablePair

use of org.apache.commons.lang3.tuple.MutablePair in project hive by apache.

the class JdbcInputFormat method getSplits.

/**
 * {@inheritDoc}
 */
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    try {
        String partitionColumn = job.get(Constants.JDBC_PARTITION_COLUMN);
        int numPartitions = job.getInt(Constants.JDBC_NUM_PARTITIONS, -1);
        String lowerBound = job.get(Constants.JDBC_LOW_BOUND);
        String upperBound = job.get(Constants.JDBC_UPPER_BOUND);
        InputSplit[] splits;
        if (!job.getBoolean(Constants.JDBC_SPLIT_QUERY, true) || numPartitions <= 1) {
            // We will not split this query if:
            // 1. hive.sql.query.split is set to false (either manually or automatically by calcite
            // 2. numPartitions == 1
            splits = new InputSplit[1];
            splits[0] = new JdbcInputSplit(FileInputFormat.getInputPaths(job)[0]);
            LOGGER.info("Creating 1 input split " + splits[0]);
            return splits;
        }
        dbAccessor = DatabaseAccessorFactory.getAccessor(job);
        Path[] tablePaths = FileInputFormat.getInputPaths(job);
        // We will split this query into n splits
        LOGGER.debug("Creating {} input splits", numPartitions);
        if (partitionColumn != null) {
            List<String> columnNames = dbAccessor.getColumnNames(job);
            if (!columnNames.contains(partitionColumn)) {
                throw new IOException("Cannot find partitionColumn:" + partitionColumn + " in " + columnNames);
            }
            List<TypeInfo> hiveColumnTypesList = TypeInfoUtils.getTypeInfosFromTypeString(job.get(serdeConstants.LIST_COLUMN_TYPES));
            TypeInfo typeInfo = hiveColumnTypesList.get(columnNames.indexOf(partitionColumn));
            if (!(typeInfo instanceof PrimitiveTypeInfo)) {
                throw new IOException(partitionColumn + " is a complex type, only primitive type can be a partition column");
            }
            if (lowerBound == null || upperBound == null) {
                Pair<String, String> boundary = dbAccessor.getBounds(job, partitionColumn, lowerBound == null, upperBound == null);
                if (lowerBound == null) {
                    lowerBound = boundary.getLeft();
                }
                if (upperBound == null) {
                    upperBound = boundary.getRight();
                }
            }
            if (lowerBound == null) {
                throw new IOException("lowerBound of " + partitionColumn + " cannot be null");
            }
            if (upperBound == null) {
                throw new IOException("upperBound of " + partitionColumn + " cannot be null");
            }
            IntervalSplitter intervalSplitter = IntervalSplitterFactory.newIntervalSpitter(typeInfo);
            List<MutablePair<String, String>> intervals = intervalSplitter.getIntervals(lowerBound, upperBound, numPartitions, typeInfo);
            if (intervals.size() <= 1) {
                LOGGER.debug("Creating 1 input splits");
                splits = new InputSplit[1];
                splits[0] = new JdbcInputSplit(FileInputFormat.getInputPaths(job)[0]);
                return splits;
            }
            intervals.get(0).setLeft(null);
            intervals.get(intervals.size() - 1).setRight(null);
            splits = new InputSplit[intervals.size()];
            for (int i = 0; i < intervals.size(); i++) {
                splits[i] = new JdbcInputSplit(partitionColumn, intervals.get(i).getLeft(), intervals.get(i).getRight(), tablePaths[0]);
            }
        } else {
            int numRecords = dbAccessor.getTotalNumberOfRecords(job);
            if (numRecords < numPartitions) {
                numPartitions = numRecords;
            }
            int numRecordsPerSplit = numRecords / numPartitions;
            int numSplitsWithExtraRecords = numRecords % numPartitions;
            LOGGER.debug("Num records = {}", numRecords);
            splits = new InputSplit[numPartitions];
            int offset = 0;
            for (int i = 0; i < numPartitions; i++) {
                int numRecordsInThisSplit = numRecordsPerSplit;
                if (i < numSplitsWithExtraRecords) {
                    numRecordsInThisSplit++;
                }
                splits[i] = new JdbcInputSplit(numRecordsInThisSplit, offset, tablePaths[0]);
                offset += numRecordsInThisSplit;
            }
        }
        dbAccessor = null;
        LOGGER.info("Num input splits created {}", splits.length);
        for (InputSplit split : splits) {
            LOGGER.info("split:" + split.toString());
        }
        return splits;
    } catch (Exception e) {
        LOGGER.error("Error while splitting input data.", e);
        throw new IOException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) IntervalSplitter(org.apache.hive.storage.jdbc.spitter.IntervalSplitter) IOException(java.io.IOException) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) IOException(java.io.IOException) MutablePair(org.apache.commons.lang3.tuple.MutablePair) InputSplit(org.apache.hadoop.mapred.InputSplit)

Aggregations

MutablePair (org.apache.commons.lang3.tuple.MutablePair)67 Pair (org.apache.commons.lang3.tuple.Pair)33 Test (org.junit.Test)28 Message (com.microsoft.azure.sdk.iot.device.Message)27 IotHubTransportMessage (com.microsoft.azure.sdk.iot.device.transport.IotHubTransportMessage)27 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)23 HashMap (java.util.HashMap)18 MqttDeviceTwin (com.microsoft.azure.sdk.iot.device.transport.mqtt.MqttDeviceTwin)17 ArrayList (java.util.ArrayList)17 IOException (java.io.IOException)9 DeviceOperations (com.microsoft.azure.sdk.iot.device.DeviceTwin.DeviceOperations)8 AreaId (bwem.area.typedef.AreaId)7 List (java.util.List)7 Map (java.util.Map)6 MqttDeviceMethod (com.microsoft.azure.sdk.iot.device.transport.mqtt.MqttDeviceMethod)5 MiniTile (bwem.tile.MiniTile)4 Altitude (bwem.typedef.Altitude)4 ContainerStartRequest (com.datatorrent.stram.StreamingContainerAgent.ContainerStartRequest)4 ContainerRequest (org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest)4 WalkPosition (org.openbw.bwapi4j.WalkPosition)4