Search in sources :

Example 16 with ObjectOutputStream

use of java.io.ObjectOutputStream in project hive by apache.

the class HashTableSinkOperator method flushToFile.

protected void flushToFile() throws IOException, HiveException {
    // get tmp file URI
    Path tmpURI = getExecContext().getLocalWork().getTmpPath();
    if (isLogInfoEnabled) {
        LOG.info("Temp URI for side table: " + tmpURI);
    }
    for (byte tag = 0; tag < mapJoinTables.length; tag++) {
        // get the key and value
        MapJoinPersistableTableContainer tableContainer = mapJoinTables[tag];
        if (tableContainer == null) {
            continue;
        }
        // get current input file name
        String bigBucketFileName = getExecContext().getCurrentBigBucketFile();
        String fileName = getExecContext().getLocalWork().getBucketFileName(bigBucketFileName);
        // get the tmp URI path; it will be a hdfs path if not local mode
        String dumpFilePrefix = conf.getDumpFilePrefix();
        Path path = Utilities.generatePath(tmpURI, dumpFilePrefix, tag, fileName);
        console.printInfo(Utilities.now() + "\tDump the side-table for tag: " + tag + " with group count: " + tableContainer.size() + " into file: " + path);
        // get the hashtable file and path
        FileSystem fs = path.getFileSystem(hconf);
        ObjectOutputStream out = new ObjectOutputStream(new BufferedOutputStream(fs.create(path), 4096));
        try {
            mapJoinTableSerdes[tag].persist(out, tableContainer);
        } finally {
            out.close();
        }
        tableContainer.clear();
        FileStatus status = fs.getFileStatus(path);
        console.printInfo(Utilities.now() + "\tUploaded 1 File to: " + path + " (" + status.getLen() + " bytes)");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) MapJoinPersistableTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinPersistableTableContainer) FileSystem(org.apache.hadoop.fs.FileSystem) ObjectOutputStream(java.io.ObjectOutputStream) BufferedOutputStream(java.io.BufferedOutputStream)

Example 17 with ObjectOutputStream

use of java.io.ObjectOutputStream in project hive by apache.

the class SparkHashTableSinkOperator method flushToFile.

protected void flushToFile(MapJoinPersistableTableContainer tableContainer, byte tag) throws Exception {
    MapredLocalWork localWork = getExecContext().getLocalWork();
    BucketMapJoinContext mapJoinCtx = localWork.getBucketMapjoinContext();
    Path inputPath = getExecContext().getCurrentInputPath();
    String bigInputPath = null;
    if (inputPath != null && mapJoinCtx != null) {
        Set<String> aliases = ((SparkBucketMapJoinContext) mapJoinCtx).getPosToAliasMap().get((int) tag);
        bigInputPath = mapJoinCtx.getMappingBigFile(aliases.iterator().next(), inputPath.toString());
    }
    // get tmp file URI
    Path tmpURI = localWork.getTmpHDFSPath();
    LOG.info("Temp URI for side table: " + tmpURI);
    // get current bucket file name
    String fileName = localWork.getBucketFileName(bigInputPath);
    // get the tmp URI path; it will be a hdfs path if not local mode
    String dumpFilePrefix = conf.getDumpFilePrefix();
    Path path = Utilities.generatePath(tmpURI, dumpFilePrefix, tag, fileName);
    FileSystem fs = path.getFileSystem(htsOperator.getConfiguration());
    short replication = fs.getDefaultReplication(path);
    // Create the folder and its parents if not there
    fs.mkdirs(path);
    while (true) {
        path = new Path(path, getOperatorId() + "-" + Math.abs(Utilities.randGen.nextInt()));
        try {
            // This will guarantee file name uniqueness.
            if (fs.createNewFile(path)) {
                break;
            }
        } catch (FileExistsException e) {
        // No problem, use a new name
        }
    }
    // TODO find out numOfPartitions for the big table
    int numOfPartitions = replication;
    replication = (short) Math.max(minReplication, numOfPartitions);
    htsOperator.console.printInfo(Utilities.now() + "\tDump the side-table for tag: " + tag + " with group count: " + tableContainer.size() + " into file: " + path);
    try {
        // get the hashtable file and path
        OutputStream os = null;
        ObjectOutputStream out = null;
        MapJoinTableContainerSerDe mapJoinTableSerde = htsOperator.mapJoinTableSerdes[tag];
        try {
            os = fs.create(path, replication);
            out = new ObjectOutputStream(new BufferedOutputStream(os, 4096));
            mapJoinTableSerde.persist(out, tableContainer);
        } finally {
            if (out != null) {
                out.close();
            } else if (os != null) {
                os.close();
            }
        }
        FileStatus status = fs.getFileStatus(path);
        htsOperator.console.printInfo(Utilities.now() + "\tUploaded 1 File to: " + path + " (" + status.getLen() + " bytes)");
    } catch (Exception e) {
        // Failed to dump the side-table, remove the partial file
        try {
            fs.delete(path, false);
        } catch (Exception ex) {
            LOG.warn("Got exception in deleting partial side-table dump for tag: " + tag + ", file " + path, ex);
        }
        throw e;
    }
    tableContainer.clear();
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) OutputStream(java.io.OutputStream) BufferedOutputStream(java.io.BufferedOutputStream) ObjectOutputStream(java.io.ObjectOutputStream) ObjectOutputStream(java.io.ObjectOutputStream) FileExistsException(org.apache.commons.io.FileExistsException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) SparkBucketMapJoinContext(org.apache.hadoop.hive.ql.plan.SparkBucketMapJoinContext) BucketMapJoinContext(org.apache.hadoop.hive.ql.plan.BucketMapJoinContext) FileSystem(org.apache.hadoop.fs.FileSystem) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) BufferedOutputStream(java.io.BufferedOutputStream) FileExistsException(org.apache.commons.io.FileExistsException)

Example 18 with ObjectOutputStream

use of java.io.ObjectOutputStream in project hive by apache.

the class InputJobInfo method writeObject.

/**
   * Serialize this object, compressing the partitions which can exceed the
   * allowed jobConf size.
   * @see <a href="https://issues.apache.org/jira/browse/HCATALOG-453">HCATALOG-453</a>
   */
private void writeObject(ObjectOutputStream oos) throws IOException {
    oos.defaultWriteObject();
    Deflater def = new Deflater(Deflater.BEST_COMPRESSION);
    ObjectOutputStream partInfoWriter = new ObjectOutputStream(new DeflaterOutputStream(oos, def));
    partInfoWriter.writeObject(partitions);
    partInfoWriter.close();
}
Also used : Deflater(java.util.zip.Deflater) DeflaterOutputStream(java.util.zip.DeflaterOutputStream) ObjectOutputStream(java.io.ObjectOutputStream)

Example 19 with ObjectOutputStream

use of java.io.ObjectOutputStream in project hive by apache.

the class DataReaderMaster method main.

public static void main(String[] args) throws FileNotFoundException, IOException {
    // This config contains all the configuration that master node wants to provide
    // to the HCatalog.
    Properties externalConfigs = new Properties();
    externalConfigs.load(new FileReader(args[0]));
    Map<String, String> config = new HashMap<String, String>();
    for (Entry<Object, Object> kv : externalConfigs.entrySet()) {
        config.put((String) kv.getKey(), (String) kv.getValue());
    }
    // This piece of code runs in master node and gets necessary context.
    ReaderContext context = runsInMaster(config);
    ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(new File(args[1])));
    oos.writeObject(context);
    oos.flush();
    oos.close();
// Master node will serialize readercontext and will make it available  at slaves.
}
Also used : HashMap(java.util.HashMap) ReaderContext(org.apache.hive.hcatalog.data.transfer.ReaderContext) FileOutputStream(java.io.FileOutputStream) FileReader(java.io.FileReader) Properties(java.util.Properties) ObjectOutputStream(java.io.ObjectOutputStream) File(java.io.File)

Example 20 with ObjectOutputStream

use of java.io.ObjectOutputStream in project hive by apache.

the class DataWriterMaster method main.

public static void main(String[] args) throws FileNotFoundException, IOException, ClassNotFoundException {
    // This config contains all the configuration that master node wants to provide
    // to the HCatalog.
    Properties externalConfigs = new Properties();
    externalConfigs.load(new FileReader(args[0]));
    Map<String, String> config = new HashMap<String, String>();
    for (Entry<Object, Object> kv : externalConfigs.entrySet()) {
        System.err.println("k: " + kv.getKey() + "\t v: " + kv.getValue());
        config.put((String) kv.getKey(), (String) kv.getValue());
    }
    if (args.length == 3 && "commit".equalsIgnoreCase(args[2])) {
        // Then, master commits if everything goes well.
        ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File(args[1])));
        WriterContext cntxt = (WriterContext) ois.readObject();
        commit(config, true, cntxt);
        System.exit(0);
    }
    // This piece of code runs in master node and gets necessary context.
    WriterContext cntxt = runsInMaster(config);
    // Master node will serialize writercontext and will make it available at slaves.
    File f = new File(args[1]);
    f.delete();
    ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(f));
    oos.writeObject(cntxt);
    oos.flush();
    oos.close();
}
Also used : HashMap(java.util.HashMap) Properties(java.util.Properties) ObjectOutputStream(java.io.ObjectOutputStream) FileInputStream(java.io.FileInputStream) WriterContext(org.apache.hive.hcatalog.data.transfer.WriterContext) FileOutputStream(java.io.FileOutputStream) FileReader(java.io.FileReader) File(java.io.File) ObjectInputStream(java.io.ObjectInputStream)

Aggregations

ObjectOutputStream (java.io.ObjectOutputStream)958 ByteArrayOutputStream (java.io.ByteArrayOutputStream)657 ObjectInputStream (java.io.ObjectInputStream)386 ByteArrayInputStream (java.io.ByteArrayInputStream)332 IOException (java.io.IOException)312 FileOutputStream (java.io.FileOutputStream)132 Test (org.junit.Test)130 File (java.io.File)75 BufferedOutputStream (java.io.BufferedOutputStream)46 ObjectOutput (java.io.ObjectOutput)35 OutputStream (java.io.OutputStream)35 HashMap (java.util.HashMap)35 FileInputStream (java.io.FileInputStream)24 ArrayList (java.util.ArrayList)24 InputStream (java.io.InputStream)22 FileNotFoundException (java.io.FileNotFoundException)20 Serializable (java.io.Serializable)15 Test (org.testng.annotations.Test)15 NotSerializableException (java.io.NotSerializableException)14 Map (java.util.Map)13