Search in sources :

Example 1 with DataStoreBlobStore

use of org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore in project jackrabbit-oak by apache.

the class AbstractBlobTrackerRegistrationTest method registerTrackingBlobStore.

protected void registerTrackingBlobStore() throws Exception {
    DataStoreBlobStore blobStore = DataStoreUtils.getBlobStore(repoHome);
    this.blobStore = context.bundleContext().registerService(BlobStore.class.getName(), blobStore, null);
}
Also used : DataStoreBlobStore(org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore)

Example 2 with DataStoreBlobStore

use of org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore in project jackrabbit-oak by apache.

the class BlobStoreFixture method getFileDataStore.

public static BlobStoreFixture getFileDataStore(final File basedir, final int fdsCacheInMB, final StatisticsProvider statisticsProvider) {
    return new BlobStoreFixture("FDS") {

        private File storeDir;

        private FileDataStore fds;

        @Override
        public BlobStore setUp() {
            fds = new FileDataStore();
            fds.setMinRecordLength(4092);
            storeDir = new File(basedir, unique);
            fds.init(storeDir.getAbsolutePath());
            configure(fds);
            DataStoreBlobStore bs = new DataStoreBlobStore(fds, true, fdsCacheInMB);
            bs.setBlobStatsCollector(new BlobStoreStats(statisticsProvider));
            configure(bs);
            return bs;
        }

        @Override
        public void tearDown() {
            fds.close();
            FileUtils.deleteQuietly(storeDir);
        }

        @Override
        public long size() {
            return FileUtils.sizeOfDirectory(storeDir);
        }
    };
}
Also used : BlobStoreStats(org.apache.jackrabbit.oak.plugins.blob.BlobStoreStats) File(java.io.File) FileDataStore(org.apache.jackrabbit.core.data.FileDataStore) DataStoreBlobStore(org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore)

Example 3 with DataStoreBlobStore

use of org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore in project jackrabbit-oak by apache.

the class TextExtractorMain method main.

public static void main(String[] args) throws Exception {
    Closer closer = Closer.create();
    String h = "tika [extract|report|generate]\n" + "\n" + "report   : Generates a summary report related to binary data\n" + "extract  : Performs the text extraction\n" + "generate : Generates the csv data file based on configured NodeStore/BlobStore";
    try {
        OptionParser parser = new OptionParser();
        OptionSpec<?> help = parser.acceptsAll(asList("h", "?", "help"), "show help").forHelp();
        OptionSpec<String> nodeStoreSpec = parser.accepts("nodestore", "NodeStore detail /path/to/oak/repository | mongodb://host:port/database").withRequiredArg().ofType(String.class);
        OptionSpec<String> pathSpec = parser.accepts("path", "Path in repository under which the binaries would be searched").withRequiredArg().ofType(String.class);
        OptionSpec<File> dataFileSpec = parser.accepts("data-file", "Data file in csv format containing the binary metadata").withRequiredArg().ofType(File.class);
        OptionSpec<File> tikaConfigSpec = parser.accepts("tika-config", "Tika config file path").withRequiredArg().ofType(File.class);
        OptionSpec<File> fdsDirSpec = parser.accepts("fds-path", "Path of directory used by FileDataStore").withRequiredArg().ofType(File.class);
        OptionSpec<File> s3ConfigSpec = parser.accepts("s3-config-path", "Path of properties file containing config for S3DataStore").withRequiredArg().ofType(File.class);
        OptionSpec<File> storeDirSpec = parser.accepts("store-path", "Path of directory used to store extracted text content").withRequiredArg().ofType(File.class);
        OptionSpec<Integer> poolSize = parser.accepts("pool-size", "Size of the thread pool used to perform text extraction. Defaults " + "to number of cores on the system").withRequiredArg().ofType(Integer.class);
        OptionSpec<String> nonOption = parser.nonOptions(h);
        OptionSet options = parser.parse(args);
        List<String> nonOptions = nonOption.values(options);
        if (options.has(help)) {
            parser.printHelpOn(System.out);
            System.exit(0);
        }
        if (nonOptions.isEmpty()) {
            parser.printHelpOn(System.err);
            System.exit(1);
        }
        boolean report = nonOptions.contains("report");
        boolean extract = nonOptions.contains("extract");
        boolean generate = nonOptions.contains("generate");
        File dataFile = null;
        File storeDir = null;
        File tikaConfigFile = null;
        BlobStore blobStore = null;
        BinaryResourceProvider binaryResourceProvider = null;
        BinaryStats stats = null;
        String path = "/";
        if (options.has(tikaConfigSpec)) {
            tikaConfigFile = tikaConfigSpec.value(options);
            checkArgument(tikaConfigFile.exists(), "Tika config file %s does not exist", tikaConfigFile.getAbsolutePath());
        }
        if (options.has(storeDirSpec)) {
            storeDir = storeDirSpec.value(options);
            if (storeDir.exists()) {
                checkArgument(storeDir.isDirectory(), "Path [%s] specified for storing extracted " + "text content '%s' is not a directory", storeDir.getAbsolutePath(), storeDirSpec.options());
            }
        }
        if (options.has(fdsDirSpec)) {
            File fdsDir = fdsDirSpec.value(options);
            checkArgument(fdsDir.exists(), "FileDataStore %s does not exist", fdsDir.getAbsolutePath());
            FileDataStore fds = new FileDataStore();
            fds.setPath(fdsDir.getAbsolutePath());
            fds.init(null);
            blobStore = new DataStoreBlobStore(fds);
        }
        if (options.has(s3ConfigSpec)) {
            File s3Config = s3ConfigSpec.value(options);
            checkArgument(s3Config.exists() && s3Config.canRead(), "S3DataStore config cannot be read from [%s]", s3Config.getAbsolutePath());
            Properties props = loadProperties(s3Config);
            log.info("Loaded properties for S3DataStore from {}", s3Config.getAbsolutePath());
            String pathProp = "path";
            String repoPath = props.getProperty(pathProp);
            checkNotNull(repoPath, "Missing required property [%s] from S3DataStore config loaded from [%s]", pathProp, s3Config);
            //Check if 'secret' key is defined. It should be non null for references
            //to be generated. As the ref are transient we can just use any random value
            //if not specified
            String secretConfig = "secret";
            if (props.getProperty(secretConfig) == null) {
                props.setProperty(secretConfig, UUID.randomUUID().toString());
            }
            log.info("Using {} for S3DataStore ", repoPath);
            DataStore ds = createS3DataStore(props);
            PropertiesUtil.populate(ds, toMap(props), false);
            ds.init(pathProp);
            blobStore = new DataStoreBlobStore(ds);
            closer.register(asCloseable(ds));
        }
        if (options.has(dataFileSpec)) {
            dataFile = dataFileSpec.value(options);
        }
        checkNotNull(dataFile, "Data file not configured with %s", dataFileSpec);
        if (report || extract) {
            checkArgument(dataFile.exists(), "Data file %s does not exist", dataFile.getAbsolutePath());
            binaryResourceProvider = new CSVFileBinaryResourceProvider(dataFile, blobStore);
            if (binaryResourceProvider instanceof Closeable) {
                closer.register((Closeable) binaryResourceProvider);
            }
            stats = new BinaryStats(tikaConfigFile, binaryResourceProvider);
            String summary = stats.getSummary();
            log.info(summary);
        }
        if (generate) {
            String src = nodeStoreSpec.value(options);
            checkNotNull(blobStore, "BlobStore found to be null. FileDataStore directory " + "must be specified via %s", fdsDirSpec.options());
            checkNotNull(dataFile, "Data file path not provided");
            NodeStore nodeStore = bootStrapNodeStore(src, blobStore, closer);
            BinaryResourceProvider brp = new NodeStoreBinaryResourceProvider(nodeStore, blobStore);
            CSVFileGenerator generator = new CSVFileGenerator(dataFile);
            generator.generate(brp.getBinaries(path));
        }
        if (extract) {
            checkNotNull(storeDir, "Directory to store extracted text content " + "must be specified via %s", storeDirSpec.options());
            checkNotNull(blobStore, "BlobStore found to be null. FileDataStore directory " + "must be specified via %s", fdsDirSpec.options());
            DataStoreTextWriter writer = new DataStoreTextWriter(storeDir, false);
            TextExtractor extractor = new TextExtractor(writer);
            if (options.has(poolSize)) {
                extractor.setThreadPoolSize(poolSize.value(options));
            }
            if (tikaConfigFile != null) {
                extractor.setTikaConfig(tikaConfigFile);
            }
            if (options.has(pathSpec)) {
                path = pathSpec.value(options);
            }
            closer.register(writer);
            closer.register(extractor);
            extractor.setStats(stats);
            log.info("Using path {}", path);
            extractor.extract(binaryResourceProvider.getBinaries(path));
            extractor.close();
            writer.close();
        }
    } catch (Throwable e) {
        throw closer.rethrow(e);
    } finally {
        closer.close();
    }
}
Also used : Closeable(java.io.Closeable) Properties(java.util.Properties) OptionParser(joptsimple.OptionParser) DocumentNodeStore(org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore) NodeStore(org.apache.jackrabbit.oak.spi.state.NodeStore) S3DataStore(org.apache.jackrabbit.aws.ext.ds.S3DataStore) DataStore(org.apache.jackrabbit.core.data.DataStore) FileDataStore(org.apache.jackrabbit.core.data.FileDataStore) FileDataStore(org.apache.jackrabbit.core.data.FileDataStore) DataStoreBlobStore(org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore) BlobStore(org.apache.jackrabbit.oak.spi.blob.BlobStore) Closer(com.google.common.io.Closer) DataStoreTextWriter(org.apache.jackrabbit.oak.plugins.index.datastore.DataStoreTextWriter) OptionSet(joptsimple.OptionSet) File(java.io.File) DataStoreBlobStore(org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore)

Example 4 with DataStoreBlobStore

use of org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore in project jackrabbit-oak by apache.

the class LuceneIndexEditorTest method setup.

@Before
public void setup() throws Exception {
    if (useBlobStore) {
        LuceneIndexEditorProvider provider = new LuceneIndexEditorProvider();
        CachingFileDataStore ds = DataStoreUtils.createCachingFDS(temporaryFolder.newFolder().getAbsolutePath(), temporaryFolder.newFolder().getAbsolutePath());
        provider.setBlobStore(new DataStoreBlobStore(ds));
        HOOK = new EditorHook(new IndexUpdateProvider(provider));
    } else {
        HOOK = new EditorHook(new IndexUpdateProvider(new LuceneIndexEditorProvider()));
    }
}
Also used : IndexUpdateProvider(org.apache.jackrabbit.oak.plugins.index.IndexUpdateProvider) EditorHook(org.apache.jackrabbit.oak.spi.commit.EditorHook) CachingFileDataStore(org.apache.jackrabbit.oak.plugins.blob.datastore.CachingFileDataStore) DataStoreBlobStore(org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore) Before(org.junit.Before)

Example 5 with DataStoreBlobStore

use of org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore in project jackrabbit-oak by apache.

the class ExternalBlobIT method testOfflineCompaction.

@Test
public void testOfflineCompaction() throws Exception {
    FileDataStore fds = createFileDataStore();
    DataStoreBlobStore dbs = new DataStoreBlobStore(fds);
    nodeStore = getNodeStore(dbs);
    int size = 2 * 1024 * 1024;
    byte[] data2 = new byte[size];
    new Random().nextBytes(data2);
    Blob b = nodeStore.createBlob(new ByteArrayInputStream(data2));
    NodeBuilder builder = nodeStore.getRoot().builder();
    builder.child("hello").setProperty("world", b);
    nodeStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
    store.flush();
    // blob went to the external store
    assertTrue(store.getStats().getApproximateSize() < 10 * 1024);
    close();
    SegmentGCOptions gcOptions = defaultGCOptions().setOffline();
    store = fileStoreBuilder(getWorkDir()).withMaxFileSize(1).withGCOptions(gcOptions).build();
    assertTrue(store.getStats().getApproximateSize() < 10 * 1024);
    store.compact();
    store.cleanup();
}
Also used : FileBlob(org.apache.jackrabbit.oak.segment.file.FileBlob) Blob(org.apache.jackrabbit.oak.api.Blob) AbstractBlob(org.apache.jackrabbit.oak.plugins.memory.AbstractBlob) SegmentGCOptions(org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions) Random(java.util.Random) ByteArrayInputStream(java.io.ByteArrayInputStream) NodeBuilder(org.apache.jackrabbit.oak.spi.state.NodeBuilder) FileDataStore(org.apache.jackrabbit.core.data.FileDataStore) DataStoreBlobStore(org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore) Test(org.junit.Test)

Aggregations

DataStoreBlobStore (org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore)33 File (java.io.File)12 Properties (java.util.Properties)12 FileDataStore (org.apache.jackrabbit.core.data.FileDataStore)11 Test (org.junit.Test)10 DataStore (org.apache.jackrabbit.core.data.DataStore)5 OakFileDataStore (org.apache.jackrabbit.oak.plugins.blob.datastore.OakFileDataStore)5 Before (org.junit.Before)5 ByteArrayInputStream (java.io.ByteArrayInputStream)4 Random (java.util.Random)4 CachingFileDataStore (org.apache.jackrabbit.oak.plugins.blob.datastore.CachingFileDataStore)4 BlobStore (org.apache.jackrabbit.oak.spi.blob.BlobStore)4 Blob (org.apache.jackrabbit.oak.api.Blob)3 AbstractBlob (org.apache.jackrabbit.oak.plugins.memory.AbstractBlob)3 NodeBuilder (org.apache.jackrabbit.oak.spi.state.NodeBuilder)3 NodeStore (org.apache.jackrabbit.oak.spi.state.NodeStore)3 Closer (com.google.common.io.Closer)2 IOException (java.io.IOException)2 InputStream (java.io.InputStream)2 Date (java.util.Date)2