Search in sources :

Example 1 with BlobAddress

use of org.knime.core.data.container.BlobDataCell.BlobAddress in project knime-core by knime.

the class BufferFromFileIteratorVersion1x method readDataCell.

/**
 * Reads a datacell from inStream, does no exception handling.
 */
private DataCell readDataCell(final DCObjectInputStream inStream) throws IOException {
    if (m_tableFormatReader.getReadVersion() == 1) {
        return readDataCellVersion1(inStream);
    }
    inStream.setCurrentClassLoader(null);
    byte identifier = inStream.readByte();
    if (identifier == BYTE_TYPE_MISSING) {
        return DataType.getMissingCell();
    }
    final boolean isSerialized = identifier == BYTE_TYPE_SERIALIZATION;
    if (isSerialized) {
        identifier = inStream.readByte();
    }
    CellClassInfo type = m_tableFormatReader.getTypeForChar(identifier);
    Class<? extends DataCell> cellClass = type.getCellClass();
    boolean isBlob = BlobDataCell.class.isAssignableFrom(cellClass);
    if (isBlob) {
        BlobAddress address = inStream.readBlobAddress();
        Buffer blobBuffer = m_tableFormatReader.getBuffer();
        if (address.getBufferID() != blobBuffer.getBufferID()) {
            ContainerTable cnTbl = blobBuffer.getGlobalRepository().get(address.getBufferID());
            if (cnTbl == null) {
                throw new IOException("Unable to retrieve table that owns the blob cell");
            }
            blobBuffer = cnTbl.getBuffer();
        }
        return new BlobWrapperDataCell(blobBuffer, address, type);
    }
    if (isSerialized) {
        try {
            ClassLoader cellLoader = cellClass.getClassLoader();
            inStream.setCurrentClassLoader(cellLoader);
            return (DataCell) inStream.readObject();
        } catch (ClassNotFoundException cnfe) {
            IOException ioe = new IOException(cnfe.getMessage());
            ioe.initCause(cnfe);
            throw ioe;
        }
    } else {
        DataCellSerializer<? extends DataCell> serializer = type.getSerializer();
        assert serializer != null;
        return inStream.readDataCell(serializer);
    }
}
Also used : IOException(java.io.IOException) BlobAddress(org.knime.core.data.container.BlobDataCell.BlobAddress) DataCell(org.knime.core.data.DataCell)

Example 2 with BlobAddress

use of org.knime.core.data.container.BlobDataCell.BlobAddress in project knime-core by knime.

the class Buffer method writeBlobDataCell.

private void writeBlobDataCell(final BlobDataCell cell, final BlobAddress a) throws IOException {
    DataCellSerializer<DataCell> ser = ((DefaultTableStoreWriter) m_outputWriter).getSerializerForDataCell(CellClassInfo.get(cell));
    // node is written to disc; they have different memory policies.)
    if (m_indicesOfBlobInColumns == null) {
        m_indicesOfBlobInColumns = new int[m_spec.getNumColumns()];
    }
    int column = a.getColumn();
    int indexInColumn = m_indicesOfBlobInColumns[column]++;
    a.setIndexOfBlobInColumn(indexInColumn);
    boolean isToCompress = Buffer.isUseCompressionForBlobs(CellClassInfo.get(cell));
    File outFile = getBlobFile(indexInColumn, column, true, isToCompress);
    BlobAddress originalBA = cell.getBlobAddress();
    if (!Objects.equals(originalBA, a)) {
        int originalBufferIndex = originalBA.getBufferID();
        Buffer originalBuffer = null;
        ContainerTable t = getGlobalRepository().get(originalBufferIndex);
        if (t != null) {
            originalBuffer = t.getBuffer();
        } else if (getLocalRepository() != null) {
            t = getLocalRepository().get(originalBufferIndex);
            if (t != null) {
                originalBuffer = t.getBuffer();
            }
        }
        if (originalBuffer != null) {
            int index = originalBA.getIndexOfBlobInColumn();
            int col = originalBA.getColumn();
            boolean compress = originalBA.isUseCompression();
            File source = originalBuffer.getBlobFile(index, col, false, compress);
            FileUtil.copy(source, outFile);
            return;
        }
    }
    OutputStream out = new BufferedOutputStream(new FileOutputStream(outFile));
    Buffer.onFileCreated(outFile);
    if (isToCompress) {
        out = new GZIPOutputStream(out);
        // buffering the gzip stream brings another performance boost
        // (in one case from 5mins down to 2 mins)
        out = new BufferedOutputStream(out);
    }
    try (DCObjectOutputVersion2 outStream = new DCObjectOutputVersion2(out)) {
        if (ser != null) {
            // DataCell is datacell-serializable
            outStream.writeDataCellPerKNIMESerializer(ser, cell);
        } else {
            outStream.writeDataCellPerJavaSerialization(cell);
        }
    }
}
Also used : GZIPOutputStream(java.util.zip.GZIPOutputStream) ZipOutputStream(java.util.zip.ZipOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) NonClosableOutputStream(org.knime.core.data.util.NonClosableOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) GZIPOutputStream(java.util.zip.GZIPOutputStream) BlobAddress(org.knime.core.data.container.BlobDataCell.BlobAddress) FileOutputStream(java.io.FileOutputStream) DataCell(org.knime.core.data.DataCell) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream)

Example 3 with BlobAddress

use of org.knime.core.data.container.BlobDataCell.BlobAddress in project knime-core by knime.

the class Buffer method handleIncomingBlob.

private DataCell handleIncomingBlob(final DataCell cell, final int col, final int totalColCount, final boolean copyForVersionHop, final boolean forceCopyOfBlobsArg) throws IOException {
    if (!(m_outputFormat instanceof DefaultTableStoreFormat)) {
        return cell;
    }
    // whether the content of the argument row needs to be copied
    // into a new BlobSupportDataRow (will do that when either this
    // flag is true or cellCopies != null)
    boolean isWrapperCell = cell instanceof BlobWrapperDataCell;
    BlobAddress ad;
    final CellClassInfo cl;
    BlobWrapperDataCell wc;
    if (isWrapperCell) {
        wc = (BlobWrapperDataCell) cell;
        ad = wc.getAddress();
        cl = wc.getBlobClassInfo();
    } else if (cell instanceof BlobDataCell) {
        wc = null;
        cl = CellClassInfo.get(cell);
        ad = ((BlobDataCell) cell).getBlobAddress();
    } else if (cell instanceof CellCollection) {
        CellCollection cdv = (CellCollection) cell;
        if (cdv.containsBlobWrapperCells()) {
            Iterator<DataCell> it = cdv.iterator();
            if (!(it instanceof BlobSupportDataCellIterator)) {
                LOGGER.coding("(Collection) DataCell of class \"" + cell.getClass().getSimpleName() + "\" contains Blobs, but does not " + "return an iterator supporting those " + "(expected " + BlobSupportDataCellIterator.class.getName() + ", got " + it.getClass().getName() + ")");
            }
            while (it.hasNext()) {
                DataCell n = it instanceof BlobSupportDataCellIterator ? ((BlobSupportDataCellIterator) it).nextWithBlobSupport() : it.next();
                DataCell correctedCell = handleIncomingBlob(n, col, totalColCount, copyForVersionHop, forceCopyOfBlobsArg);
                if (correctedCell != n) {
                    if (it instanceof BlobSupportDataCellIterator) {
                        BlobSupportDataCellIterator bsdi = (BlobSupportDataCellIterator) it;
                        bsdi.replaceLastReturnedWithWrapperCell(correctedCell);
                    } else {
                    // coding problem was reported above.
                    }
                }
            }
        }
        return cell;
    } else {
        // ordinary cell (e.g. double cell)
        return cell;
    }
    boolean forceCopyOfBlobs = forceCopyOfBlobsArg;
    Buffer ownerBuffer;
    if (ad != null) {
        // either copying from or to an isolated buffer (or both)
        forceCopyOfBlobs |= ad.getBufferID() == -1 || getBufferID() == -1;
        // (and this is not an ordinary buffer (but a BufferedDataCont.)
        if (ad.getBufferID() == getBufferID() && getBufferID() != -1) {
            ownerBuffer = this;
        } else {
            // table that's been created somewhere in the workflow
            ContainerTable t = m_globalRepository.get(ad.getBufferID());
            ownerBuffer = t != null ? t.getBuffer() : null;
        }
        /* this can only be true if the argument row contains wrapper
             * cells for blobs that do not have a buffer set; that is,
             * someone took a BlobDataCell from a predecessor node
             * (ad != null) and put it manually into a new wrapper cell
             * (wc != null) - by doing that you loose the buffer info
             * (wc.getBuffer == null) */
        if (isWrapperCell && wc.getBuffer() == null) {
            wc.setAddressAndBuffer(ad, ownerBuffer);
        }
    } else {
        ownerBuffer = null;
    }
    // if we have to make a clone of the blob cell (true if
    // isCopyOfExisting is true and the blob address corresponds to the next
    // assignable m_indicesOfBlobInColumns[col])
    boolean isToCloneForVersionHop = false;
    if (copyForVersionHop) {
        isToCloneForVersionHop = ad != null && ad.getBufferID() == getBufferID();
        // buffer multiple times -- don't copy the duplicates
        if (isToCloneForVersionHop && m_indicesOfBlobInColumns == null) {
            // first to assign
            isToCloneForVersionHop = ad.getIndexOfBlobInColumn() == 0;
            assert isToCloneForVersionHop : "Clone of buffer does not return blobs in order";
        } else if (isToCloneForVersionHop && m_indicesOfBlobInColumns != null) {
            isToCloneForVersionHop = ad.getIndexOfBlobInColumn() == m_indicesOfBlobInColumns[col];
        }
    }
    // if we have to clone the blob because the forceCopyOfBlobs flag is
    // on (e.g. because the owning node is a loop end node)
    boolean isToCloneDueToForceCopyOfBlobs = false;
    // don't overwrite the deep-clone
    if (forceCopyOfBlobs && !isToCloneForVersionHop) {
        if (m_copiedBlobsMap == null) {
            m_copiedBlobsMap = new HashMap<BlobAddress, BlobAddress>();
        }
        // if not previously copied into this buffer
        if (ad != null) {
            BlobAddress previousCopyAddress = m_copiedBlobsMap.get(ad);
            if (previousCopyAddress == null) {
                isToCloneDueToForceCopyOfBlobs = true;
                if (isWrapperCell && ownerBuffer == null) {
                    ownerBuffer = ((BlobWrapperDataCell) cell).getBuffer();
                }
            } else {
                return new BlobWrapperDataCell(this, previousCopyAddress, cl);
            }
        }
    }
    // we have to make a clone
    if (ownerBuffer == null || isToCloneForVersionHop || isToCloneDueToForceCopyOfBlobs) {
        // need to set ownership if this blob was not assigned yet
        // or has been assigned to an unlinked (i.e. local) buffer
        boolean isCompress = ad != null ? ad.isUseCompression() : isUseCompressionForBlobs(cl);
        BlobAddress rewrite = new BlobAddress(m_bufferID, col, isCompress);
        if (ad == null) {
            // take ownership
            if (isWrapperCell) {
                ((BlobWrapperDataCell) cell).setAddressAndBuffer(rewrite, this);
            } else {
                ((BlobDataCell) cell).setBlobAddress(rewrite);
            }
            ad = rewrite;
        }
        if (m_indicesOfBlobInColumns == null) {
            m_indicesOfBlobInColumns = new int[totalColCount];
        }
        // to buffer to copy the blob from (if at all)
        Buffer b = null;
        if (isToCloneDueToForceCopyOfBlobs) {
            b = ownerBuffer;
            m_copiedBlobsMap.put(ad, rewrite);
        } else {
            ContainerTable tbl = m_localRepository.get(ad.getBufferID());
            b = tbl == null ? null : tbl.getBuffer();
        }
        if (b != null && !isToCloneForVersionHop) {
            int indexBlobInCol = m_indicesOfBlobInColumns[col]++;
            rewrite.setIndexOfBlobInColumn(indexBlobInCol);
            File source = b.getBlobFile(ad.getIndexOfBlobInColumn(), ad.getColumn(), false, ad.isUseCompression());
            File dest = getBlobFile(indexBlobInCol, col, true, ad.isUseCompression());
            FileUtil.copy(source, dest);
            wc = new BlobWrapperDataCell(this, rewrite, cl);
        } else {
            BlobDataCell bc;
            if (isWrapperCell) {
                DataCell c = ((BlobWrapperDataCell) cell).getCell();
                bc = c.isMissing() ? null : (BlobDataCell) c;
            } else {
                bc = (BlobDataCell) cell;
            }
            // to take an error along
            if (bc != null) {
                if (m_outputWriter == null) {
                    ensureTempFileExists();
                    initOutputWriter(m_binFile);
                }
                writeBlobDataCell(bc, rewrite);
                wc = new BlobWrapperDataCell(this, rewrite, cl, bc);
            } else {
                wc = new BlobWrapperDataCell(this, rewrite, cl);
            }
        }
        m_containsBlobs = true;
    } else {
        // blob has been saved in one of the predecessor nodes
        if (isWrapperCell) {
            wc = (BlobWrapperDataCell) cell;
        } else {
            wc = new BlobWrapperDataCell(ownerBuffer, ad, cl);
        }
    }
    return wc;
}
Also used : BlobSupportDataCellIterator(org.knime.core.data.collection.BlobSupportDataCellIterator) CellCollection(org.knime.core.data.collection.CellCollection) BlobAddress(org.knime.core.data.container.BlobDataCell.BlobAddress) DataCell(org.knime.core.data.DataCell) File(java.io.File)

Aggregations

DataCell (org.knime.core.data.DataCell)3 BlobAddress (org.knime.core.data.container.BlobDataCell.BlobAddress)3 File (java.io.File)2 BufferedOutputStream (java.io.BufferedOutputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 OutputStream (java.io.OutputStream)1 GZIPOutputStream (java.util.zip.GZIPOutputStream)1 ZipOutputStream (java.util.zip.ZipOutputStream)1 BlobSupportDataCellIterator (org.knime.core.data.collection.BlobSupportDataCellIterator)1 CellCollection (org.knime.core.data.collection.CellCollection)1 NonClosableOutputStream (org.knime.core.data.util.NonClosableOutputStream)1