use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinPersistableTableContainer in project hive by apache.
the class SparkHashTableSinkOperator method closeOp.
@Override
public void closeOp(boolean abort) throws HiveException {
try {
MapJoinPersistableTableContainer[] mapJoinTables = htsOperator.mapJoinTables;
byte tag = conf.getTag();
if (mapJoinTables == null || mapJoinTables.length < tag || mapJoinTables[tag] == null) {
LOG.debug("mapJoinTable is null");
} else if (abort) {
if (LOG.isDebugEnabled()) {
LOG.debug("Aborting, skip dumping side-table for tag: " + tag);
}
} else {
String method = PerfLogger.SPARK_FLUSH_HASHTABLE + getName();
perfLogger.PerfLogBegin(CLASS_NAME, method);
try {
flushToFile(mapJoinTables[tag], tag);
} finally {
perfLogger.PerfLogEnd(CLASS_NAME, method);
}
}
super.closeOp(abort);
} catch (HiveException e) {
throw e;
} catch (Exception e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinPersistableTableContainer in project hive by apache.
the class HashTableSinkOperator method flushToFile.
protected void flushToFile() throws IOException, HiveException {
// get tmp file URI
Path tmpURI = getExecContext().getLocalWork().getTmpPath();
LOG.info("Temp URI for side table: {}", tmpURI);
for (byte tag = 0; tag < mapJoinTables.length; tag++) {
// get the key and value
MapJoinPersistableTableContainer tableContainer = mapJoinTables[tag];
if (tableContainer == null) {
continue;
}
// get current input file name
String bigBucketFileName = getExecContext().getCurrentBigBucketFile();
String fileName = getExecContext().getLocalWork().getBucketFileName(bigBucketFileName);
// get the tmp URI path; it will be a hdfs path if not local mode
// TODO [MM gap?]: this doesn't work, however this is MR only.
// The path for writer and reader mismatch:
// Dump the side-table for tag ... -local-10004/HashTable-Stage-1/MapJoin-a-00-(ds%3D2008-04-08)mm_2.hashtable
// Load back 1 hashtable file -local-10004/HashTable-Stage-1/MapJoin-a-00-srcsortbucket3outof4.txt.hashtable
// Hive3 probably won't support MR so do we really care?
String dumpFilePrefix = conf.getDumpFilePrefix();
Path path = Utilities.generatePath(tmpURI, dumpFilePrefix, tag, fileName);
console.printInfo(Utilities.now() + "\tDump the side-table for tag: " + tag + " with group count: " + tableContainer.size() + " into file: " + path);
// get the hashtable file and path
FileSystem fs = path.getFileSystem(hconf);
ObjectOutputStream out = new ObjectOutputStream(new BufferedOutputStream(fs.create(path)));
try {
mapJoinTableSerdes[tag].persist(out, tableContainer);
} finally {
out.close();
}
tableContainer.clear();
FileStatus status = fs.getFileStatus(path);
console.printInfo(Utilities.now() + "\tUploaded 1 File to: " + path + " (" + status.getLen() + " bytes)");
}
}
use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinPersistableTableContainer in project hive by apache.
the class HashTableSinkOperator method process.
/*
* This operator only process small tables Read the key/value pairs Load them into hashtable
*/
@Override
public void process(Object row, int tag) throws HiveException {
byte alias = (byte) tag;
// compute keys and values as StandardObjects. Use non-optimized key (MR).
Object[] currentKey = new Object[joinKeys[alias].size()];
for (int keyIndex = 0; keyIndex < joinKeys[alias].size(); ++keyIndex) {
currentKey[keyIndex] = joinKeys[alias].get(keyIndex).evaluate(row);
}
MapJoinKeyObject key = new MapJoinKeyObject();
key.readFromRow(currentKey, joinKeysObjectInspectors[alias]);
Object[] value = emptyObjectArray;
if ((hasFilter(alias) && filterMaps[alias].length > 0) || joinValues[alias].size() > 0) {
value = JoinUtil.computeMapJoinValues(row, joinValues[alias], joinValuesObjectInspectors[alias], joinFilters[alias], joinFilterObjectInspectors[alias], filterMaps == null ? null : filterMaps[alias]);
}
MapJoinPersistableTableContainer tableContainer = mapJoinTables[alias];
MapJoinRowContainer rowContainer = tableContainer.get(key);
if (rowContainer == null) {
if (value.length != 0) {
rowContainer = new MapJoinEagerRowContainer();
rowContainer.addRow(value);
} else {
rowContainer = emptyRowContainer;
}
rowNumber++;
if (rowNumber > hashTableScale && rowNumber % hashTableScale == 0) {
memoryExhaustionHandler.checkMemoryStatus(tableContainer.size(), rowNumber);
}
tableContainer.put(key, rowContainer);
} else if (rowContainer == emptyRowContainer) {
rowContainer = rowContainer.copy();
rowContainer.addRow(value);
tableContainer.put(key, rowContainer);
} else {
rowContainer.addRow(value);
}
}
Aggregations