Search in sources :

Example 21 with DataOutputStream

use of java.io.DataOutputStream in project pinot by linkedin.

the class DataTableImplV2 method serializeMetadata.

private byte[] serializeMetadata() throws IOException {
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream);
    dataOutputStream.writeInt(_metadata.size());
    for (Entry<String, String> entry : _metadata.entrySet()) {
        byte[] keyBytes = entry.getKey().getBytes(UTF_8);
        dataOutputStream.writeInt(keyBytes.length);
        dataOutputStream.write(keyBytes);
        byte[] valueBytes = entry.getValue().getBytes(UTF_8);
        dataOutputStream.writeInt(valueBytes.length);
        dataOutputStream.write(valueBytes);
    }
    return byteArrayOutputStream.toByteArray();
}
Also used : DataOutputStream(java.io.DataOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream)

Example 22 with DataOutputStream

use of java.io.DataOutputStream in project pinot by linkedin.

the class DataTableImplV2 method toBytes.

@Nonnull
@Override
public byte[] toBytes() throws IOException {
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream);
    dataOutputStream.writeInt(VERSION);
    dataOutputStream.writeInt(_numRows);
    dataOutputStream.writeInt(_numColumns);
    int dataOffset = HEADER_SIZE;
    // Write dictionary.
    dataOutputStream.writeInt(dataOffset);
    byte[] dictionaryMapBytes = null;
    if (_dictionaryMap != null) {
        dictionaryMapBytes = serializeDictionaryMap();
        dataOutputStream.writeInt(dictionaryMapBytes.length);
        dataOffset += dictionaryMapBytes.length;
    } else {
        dataOutputStream.writeInt(0);
    }
    // Write metadata.
    dataOutputStream.writeInt(dataOffset);
    byte[] metadataBytes = serializeMetadata();
    dataOutputStream.writeInt(metadataBytes.length);
    dataOffset += metadataBytes.length;
    // Write data schema.
    dataOutputStream.writeInt(dataOffset);
    byte[] dataSchemaBytes = null;
    if (_dataSchema != null) {
        dataSchemaBytes = _dataSchema.toBytes();
        dataOutputStream.writeInt(dataSchemaBytes.length);
        dataOffset += dataSchemaBytes.length;
    } else {
        dataOutputStream.writeInt(0);
    }
    // Write fixed size data.
    dataOutputStream.writeInt(dataOffset);
    if (_fixedSizeDataBytes != null) {
        dataOutputStream.writeInt(_fixedSizeDataBytes.length);
        dataOffset += _fixedSizeDataBytes.length;
    } else {
        dataOutputStream.writeInt(0);
    }
    // Write variable size data.
    dataOutputStream.writeInt(dataOffset);
    if (_variableSizeDataBytes != null) {
        dataOutputStream.writeInt(_variableSizeDataBytes.length);
    } else {
        dataOutputStream.writeInt(0);
    }
    // Write actual data.
    if (dictionaryMapBytes != null) {
        dataOutputStream.write(dictionaryMapBytes);
    }
    dataOutputStream.write(metadataBytes);
    if (dataSchemaBytes != null) {
        dataOutputStream.write(dataSchemaBytes);
    }
    if (_fixedSizeDataBytes != null) {
        dataOutputStream.write(_fixedSizeDataBytes);
    }
    if (_variableSizeDataBytes != null) {
        dataOutputStream.write(_variableSizeDataBytes);
    }
    return byteArrayOutputStream.toByteArray();
}
Also used : DataOutputStream(java.io.DataOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Nonnull(javax.annotation.Nonnull)

Example 23 with DataOutputStream

use of java.io.DataOutputStream in project pinot by linkedin.

the class SegmentIndexCreationDriverImpl method persistCreationMeta.

/**
   * Writes segment creation metadata to disk.
   */
void persistCreationMeta(File outputDir, long crc) throws IOException {
    final File crcFile = new File(outputDir, V1Constants.SEGMENT_CREATION_META);
    final DataOutputStream out = new DataOutputStream(new FileOutputStream(crcFile));
    out.writeLong(crc);
    long creationTime = System.currentTimeMillis();
    // Use the creation time from the configuration if it exists and is not -1
    try {
        long configCreationTime = Long.parseLong(config.getCreationTime());
        if (0L < configCreationTime) {
            creationTime = configCreationTime;
        }
    } catch (Exception nfe) {
    // Ignore NPE and NFE, use the current time.
    }
    out.writeLong(creationTime);
    out.close();
}
Also used : DataOutputStream(java.io.DataOutputStream) FileOutputStream(java.io.FileOutputStream) File(java.io.File) IOException(java.io.IOException)

Example 24 with DataOutputStream

use of java.io.DataOutputStream in project pinot by linkedin.

the class OffHeapBitmapInvertedIndexCreator method seal.

@Override
public void seal() throws IOException {
    FileOutputStream fos = null;
    FileInputStream fisOffsets = null;
    FileInputStream fisBitmaps = null;
    final DataOutputStream bitmapsOut;
    final DataOutputStream offsetsOut;
    String tempOffsetsFile = invertedIndexFile + ".offsets";
    String tempBitmapsFile = invertedIndexFile + ".binary";
    try {
        // build the posting list
        constructPostingLists();
        // we need two separate streams, one to write the offsets and another to write the serialized
        // bitmap data. We need two because we dont the serialized length of each bitmap without
        // constructing.
        offsetsOut = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(tempOffsetsFile)));
        bitmapsOut = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(tempBitmapsFile)));
        // write out offsets of bitmaps. The information can be used to access a certain bitmap
        // directly.
        // Totally (invertedIndex.length+1) offsets will be written out; the last offset is used to
        // calculate the length of
        // the last bitmap, which might be needed when accessing bitmaps randomly.
        // If a bitmap's offset is k, then k bytes need to be skipped to reach the bitmap.
        int startOffset = 4 * (cardinality + 1);
        // The first bitmap's offset
        offsetsOut.writeInt(startOffset);
        MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
        for (int i = 0; i < cardinality; i++) {
            bitmap.clear();
            int length = postingListLengths.get(i);
            for (int j = 0; j < length; j++) {
                int bufferOffset = postingListStartOffsets.get(i) + j;
                int value = postingListBuffer.get(bufferOffset);
                bitmap.add(value);
            }
            // serialize bitmap to bitmapsOut stream
            bitmap.serialize(bitmapsOut);
            startOffset += bitmap.serializedSizeInBytes();
            // write offset
            offsetsOut.writeInt(startOffset);
        }
        offsetsOut.close();
        bitmapsOut.close();
        // merge the two files by simply writing offsets data first and then bitmap serialized data
        fos = new FileOutputStream(invertedIndexFile);
        fisOffsets = new FileInputStream(tempOffsetsFile);
        fisBitmaps = new FileInputStream(tempBitmapsFile);
        FileChannel channelOffsets = fisOffsets.getChannel();
        channelOffsets.transferTo(0, channelOffsets.size(), fos.getChannel());
        FileChannel channelBitmaps = fisBitmaps.getChannel();
        channelBitmaps.transferTo(0, channelBitmaps.size(), fos.getChannel());
        LOGGER.debug("persisted bitmap inverted index for column : " + spec.getName() + " in " + invertedIndexFile.getAbsolutePath());
    } catch (Exception e) {
        LOGGER.error("Exception while creating bitmap index for column:" + spec.getName(), e);
    } finally {
        IOUtils.closeQuietly(fos);
        IOUtils.closeQuietly(fisOffsets);
        IOUtils.closeQuietly(fisOffsets);
        IOUtils.closeQuietly(fos);
        IOUtils.closeQuietly(fos);
        // MMaputils handles the null checks for buffer
        MmapUtils.unloadByteBuffer(origValueBuffer);
        origValueBuffer = null;
        valueBuffer = null;
        if (origLengths != null) {
            MmapUtils.unloadByteBuffer(origLengths);
            origLengths = null;
            lengths = null;
        }
        MmapUtils.unloadByteBuffer(origPostingListBuffer);
        origPostingListBuffer = null;
        postingListBuffer = null;
        MmapUtils.unloadByteBuffer(origPostingListCurrentOffsets);
        origPostingListCurrentOffsets = null;
        postingListCurrentOffsets = null;
        MmapUtils.unloadByteBuffer(origPostingListLengths);
        origPostingListLengths = null;
        postingListLengths = null;
        MmapUtils.unloadByteBuffer(origPostingListStartOffsets);
        origPostingListStartOffsets = null;
        postingListStartOffsets = null;
        FileUtils.deleteQuietly(new File(tempOffsetsFile));
        FileUtils.deleteQuietly(new File(tempBitmapsFile));
    }
}
Also used : MutableRoaringBitmap(org.roaringbitmap.buffer.MutableRoaringBitmap) DataOutputStream(java.io.DataOutputStream) FileChannel(java.nio.channels.FileChannel) FileOutputStream(java.io.FileOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) File(java.io.File) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException)

Example 25 with DataOutputStream

use of java.io.DataOutputStream in project pinot by linkedin.

the class OffHeapStarTreeBuilder method init.

public void init(StarTreeBuilderConfig builderConfig) throws Exception {
    schema = builderConfig.schema;
    timeColumnName = schema.getTimeColumnName();
    this.dimensionsSplitOrder = builderConfig.dimensionsSplitOrder;
    skipStarNodeCreationForDimensions = builderConfig.getSkipStarNodeCreationForDimensions();
    skipMaterializationForDimensions = builderConfig.getSkipMaterializationForDimensions();
    skipMaterializationCardinalityThreshold = builderConfig.getSkipMaterializationCardinalityThreshold();
    enableOffHeapFormat = builderConfig.isEnableOffHealpFormat();
    this.maxLeafRecords = builderConfig.maxLeafRecords;
    this.outDir = builderConfig.getOutDir();
    if (outDir == null) {
        outDir = new File(System.getProperty("java.io.tmpdir"), V1Constants.STAR_TREE_INDEX_DIR + "_" + DateTime.now());
    }
    LOG.info("Index output directory:{}", outDir);
    dimensionTypes = new ArrayList<>();
    dimensionNames = new ArrayList<>();
    dimensionNameToIndexMap = HashBiMap.create();
    dimensionNameToStarValueMap = new HashMap<>();
    dictionaryMap = new HashMap<>();
    // READ DIMENSIONS COLUMNS
    List<DimensionFieldSpec> dimensionFieldSpecs = schema.getDimensionFieldSpecs();
    for (int index = 0; index < dimensionFieldSpecs.size(); index++) {
        DimensionFieldSpec spec = dimensionFieldSpecs.get(index);
        String dimensionName = spec.getName();
        dimensionNames.add(dimensionName);
        dimensionNameToIndexMap.put(dimensionName, index);
        Object starValue;
        starValue = getAllStarValue(spec);
        dimensionNameToStarValueMap.put(dimensionName, starValue);
        dimensionTypes.add(spec.getDataType());
        HashBiMap<Object, Integer> dictionary = HashBiMap.create();
        dictionaryMap.put(dimensionName, dictionary);
    }
    // this dimension unless explicitly specified in split order
    if (timeColumnName != null) {
        dimensionNames.add(timeColumnName);
        TimeFieldSpec timeFieldSpec = schema.getTimeFieldSpec();
        dimensionTypes.add(timeFieldSpec.getDataType());
        int index = dimensionNameToIndexMap.size();
        dimensionNameToIndexMap.put(timeColumnName, index);
        Object starValue;
        starValue = getAllStarValue(timeFieldSpec);
        dimensionNameToStarValueMap.put(timeColumnName, starValue);
        HashBiMap<Object, Integer> dictionary = HashBiMap.create();
        dictionaryMap.put(schema.getTimeColumnName(), dictionary);
    }
    dimensionSizeBytes = dimensionNames.size() * Integer.SIZE / 8;
    this.numDimensions = dimensionNames.size();
    // READ METRIC COLUMNS
    this.metricNames = new ArrayList<>();
    this.metricNameToIndexMap = new HashMap<>();
    this.metricSizeBytes = 0;
    List<MetricFieldSpec> metricFieldSpecs = schema.getMetricFieldSpecs();
    for (int index = 0; index < metricFieldSpecs.size(); index++) {
        MetricFieldSpec spec = metricFieldSpecs.get(index);
        String metricName = spec.getName();
        metricNames.add(metricName);
        metricNameToIndexMap.put(metricName, index);
        metricSizeBytes += spec.getFieldSize();
    }
    numMetrics = metricNames.size();
    builderConfig.getOutDir().mkdirs();
    dataFile = new File(outDir, "star-tree.buf");
    LOG.info("StarTree output data file: {}", dataFile.getAbsolutePath());
    dataBuffer = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(dataFile)));
    // INITIALIZE THE ROOT NODE
    this.starTreeRootIndexNode = new StarTreeIndexNode();
    this.starTreeRootIndexNode.setDimensionName(StarTreeIndexNodeInterf.ALL);
    this.starTreeRootIndexNode.setDimensionValue(StarTreeIndexNodeInterf.ALL);
    this.starTreeRootIndexNode.setLevel(0);
    LOG.info("dimensionNames:{}", dimensionNames);
    LOG.info("metricNames:{}", metricNames);
}
Also used : DataOutputStream(java.io.DataOutputStream) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) FileOutputStream(java.io.FileOutputStream) JSONObject(org.json.JSONObject) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Aggregations

DataOutputStream (java.io.DataOutputStream)2957 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1309 IOException (java.io.IOException)1019 Test (org.junit.Test)633 DataInputStream (java.io.DataInputStream)611 FileOutputStream (java.io.FileOutputStream)426 ByteArrayInputStream (java.io.ByteArrayInputStream)409 File (java.io.File)279 BufferedOutputStream (java.io.BufferedOutputStream)227 UnitTest (org.apache.geode.test.junit.categories.UnitTest)172 URL (java.net.URL)149 InputStreamReader (java.io.InputStreamReader)144 BufferedReader (java.io.BufferedReader)140 Path (org.apache.hadoop.fs.Path)137 DataInput (java.io.DataInput)124 ArrayList (java.util.ArrayList)122 HttpURLConnection (java.net.HttpURLConnection)121 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)117 FileInputStream (java.io.FileInputStream)107 InputStream (java.io.InputStream)107