use of java.io.ObjectOutputStream in project hive by apache.
the class HashTableSinkOperator method flushToFile.
protected void flushToFile() throws IOException, HiveException {
// get tmp file URI
Path tmpURI = getExecContext().getLocalWork().getTmpPath();
if (isLogInfoEnabled) {
LOG.info("Temp URI for side table: " + tmpURI);
}
for (byte tag = 0; tag < mapJoinTables.length; tag++) {
// get the key and value
MapJoinPersistableTableContainer tableContainer = mapJoinTables[tag];
if (tableContainer == null) {
continue;
}
// get current input file name
String bigBucketFileName = getExecContext().getCurrentBigBucketFile();
String fileName = getExecContext().getLocalWork().getBucketFileName(bigBucketFileName);
// get the tmp URI path; it will be a hdfs path if not local mode
String dumpFilePrefix = conf.getDumpFilePrefix();
Path path = Utilities.generatePath(tmpURI, dumpFilePrefix, tag, fileName);
console.printInfo(Utilities.now() + "\tDump the side-table for tag: " + tag + " with group count: " + tableContainer.size() + " into file: " + path);
// get the hashtable file and path
FileSystem fs = path.getFileSystem(hconf);
ObjectOutputStream out = new ObjectOutputStream(new BufferedOutputStream(fs.create(path), 4096));
try {
mapJoinTableSerdes[tag].persist(out, tableContainer);
} finally {
out.close();
}
tableContainer.clear();
FileStatus status = fs.getFileStatus(path);
console.printInfo(Utilities.now() + "\tUploaded 1 File to: " + path + " (" + status.getLen() + " bytes)");
}
}
use of java.io.ObjectOutputStream in project hive by apache.
the class SparkHashTableSinkOperator method flushToFile.
protected void flushToFile(MapJoinPersistableTableContainer tableContainer, byte tag) throws Exception {
MapredLocalWork localWork = getExecContext().getLocalWork();
BucketMapJoinContext mapJoinCtx = localWork.getBucketMapjoinContext();
Path inputPath = getExecContext().getCurrentInputPath();
String bigInputPath = null;
if (inputPath != null && mapJoinCtx != null) {
Set<String> aliases = ((SparkBucketMapJoinContext) mapJoinCtx).getPosToAliasMap().get((int) tag);
bigInputPath = mapJoinCtx.getMappingBigFile(aliases.iterator().next(), inputPath.toString());
}
// get tmp file URI
Path tmpURI = localWork.getTmpHDFSPath();
LOG.info("Temp URI for side table: " + tmpURI);
// get current bucket file name
String fileName = localWork.getBucketFileName(bigInputPath);
// get the tmp URI path; it will be a hdfs path if not local mode
String dumpFilePrefix = conf.getDumpFilePrefix();
Path path = Utilities.generatePath(tmpURI, dumpFilePrefix, tag, fileName);
FileSystem fs = path.getFileSystem(htsOperator.getConfiguration());
short replication = fs.getDefaultReplication(path);
// Create the folder and its parents if not there
fs.mkdirs(path);
while (true) {
path = new Path(path, getOperatorId() + "-" + Math.abs(Utilities.randGen.nextInt()));
try {
// This will guarantee file name uniqueness.
if (fs.createNewFile(path)) {
break;
}
} catch (FileExistsException e) {
// No problem, use a new name
}
}
// TODO find out numOfPartitions for the big table
int numOfPartitions = replication;
replication = (short) Math.max(minReplication, numOfPartitions);
htsOperator.console.printInfo(Utilities.now() + "\tDump the side-table for tag: " + tag + " with group count: " + tableContainer.size() + " into file: " + path);
try {
// get the hashtable file and path
OutputStream os = null;
ObjectOutputStream out = null;
MapJoinTableContainerSerDe mapJoinTableSerde = htsOperator.mapJoinTableSerdes[tag];
try {
os = fs.create(path, replication);
out = new ObjectOutputStream(new BufferedOutputStream(os, 4096));
mapJoinTableSerde.persist(out, tableContainer);
} finally {
if (out != null) {
out.close();
} else if (os != null) {
os.close();
}
}
FileStatus status = fs.getFileStatus(path);
htsOperator.console.printInfo(Utilities.now() + "\tUploaded 1 File to: " + path + " (" + status.getLen() + " bytes)");
} catch (Exception e) {
// Failed to dump the side-table, remove the partial file
try {
fs.delete(path, false);
} catch (Exception ex) {
LOG.warn("Got exception in deleting partial side-table dump for tag: " + tag + ", file " + path, ex);
}
throw e;
}
tableContainer.clear();
}
use of java.io.ObjectOutputStream in project hive by apache.
the class InputJobInfo method writeObject.
/**
* Serialize this object, compressing the partitions which can exceed the
* allowed jobConf size.
* @see <a href="https://issues.apache.org/jira/browse/HCATALOG-453">HCATALOG-453</a>
*/
private void writeObject(ObjectOutputStream oos) throws IOException {
oos.defaultWriteObject();
Deflater def = new Deflater(Deflater.BEST_COMPRESSION);
ObjectOutputStream partInfoWriter = new ObjectOutputStream(new DeflaterOutputStream(oos, def));
partInfoWriter.writeObject(partitions);
partInfoWriter.close();
}
use of java.io.ObjectOutputStream in project hive by apache.
the class DataReaderMaster method main.
public static void main(String[] args) throws FileNotFoundException, IOException {
// This config contains all the configuration that master node wants to provide
// to the HCatalog.
Properties externalConfigs = new Properties();
externalConfigs.load(new FileReader(args[0]));
Map<String, String> config = new HashMap<String, String>();
for (Entry<Object, Object> kv : externalConfigs.entrySet()) {
config.put((String) kv.getKey(), (String) kv.getValue());
}
// This piece of code runs in master node and gets necessary context.
ReaderContext context = runsInMaster(config);
ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(new File(args[1])));
oos.writeObject(context);
oos.flush();
oos.close();
// Master node will serialize readercontext and will make it available at slaves.
}
use of java.io.ObjectOutputStream in project hive by apache.
the class DataWriterMaster method main.
public static void main(String[] args) throws FileNotFoundException, IOException, ClassNotFoundException {
// This config contains all the configuration that master node wants to provide
// to the HCatalog.
Properties externalConfigs = new Properties();
externalConfigs.load(new FileReader(args[0]));
Map<String, String> config = new HashMap<String, String>();
for (Entry<Object, Object> kv : externalConfigs.entrySet()) {
System.err.println("k: " + kv.getKey() + "\t v: " + kv.getValue());
config.put((String) kv.getKey(), (String) kv.getValue());
}
if (args.length == 3 && "commit".equalsIgnoreCase(args[2])) {
// Then, master commits if everything goes well.
ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File(args[1])));
WriterContext cntxt = (WriterContext) ois.readObject();
commit(config, true, cntxt);
System.exit(0);
}
// This piece of code runs in master node and gets necessary context.
WriterContext cntxt = runsInMaster(config);
// Master node will serialize writercontext and will make it available at slaves.
File f = new File(args[1]);
f.delete();
ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(f));
oos.writeObject(cntxt);
oos.flush();
oos.close();
}
Aggregations