use of org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore in project jackrabbit-oak by apache.
the class AbstractBlobTrackerRegistrationTest method registerTrackingBlobStore.
protected void registerTrackingBlobStore() throws Exception {
DataStoreBlobStore blobStore = DataStoreUtils.getBlobStore(repoHome);
this.blobStore = context.bundleContext().registerService(BlobStore.class.getName(), blobStore, null);
}
use of org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore in project jackrabbit-oak by apache.
the class BlobStoreFixture method getFileDataStore.
public static BlobStoreFixture getFileDataStore(final File basedir, final int fdsCacheInMB, final StatisticsProvider statisticsProvider) {
return new BlobStoreFixture("FDS") {
private File storeDir;
private FileDataStore fds;
@Override
public BlobStore setUp() {
fds = new FileDataStore();
fds.setMinRecordLength(4092);
storeDir = new File(basedir, unique);
fds.init(storeDir.getAbsolutePath());
configure(fds);
DataStoreBlobStore bs = new DataStoreBlobStore(fds, true, fdsCacheInMB);
bs.setBlobStatsCollector(new BlobStoreStats(statisticsProvider));
configure(bs);
return bs;
}
@Override
public void tearDown() {
fds.close();
FileUtils.deleteQuietly(storeDir);
}
@Override
public long size() {
return FileUtils.sizeOfDirectory(storeDir);
}
};
}
use of org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore in project jackrabbit-oak by apache.
the class TextExtractorMain method main.
public static void main(String[] args) throws Exception {
Closer closer = Closer.create();
String h = "tika [extract|report|generate]\n" + "\n" + "report : Generates a summary report related to binary data\n" + "extract : Performs the text extraction\n" + "generate : Generates the csv data file based on configured NodeStore/BlobStore";
try {
OptionParser parser = new OptionParser();
OptionSpec<?> help = parser.acceptsAll(asList("h", "?", "help"), "show help").forHelp();
OptionSpec<String> nodeStoreSpec = parser.accepts("nodestore", "NodeStore detail /path/to/oak/repository | mongodb://host:port/database").withRequiredArg().ofType(String.class);
OptionSpec<String> pathSpec = parser.accepts("path", "Path in repository under which the binaries would be searched").withRequiredArg().ofType(String.class);
OptionSpec<File> dataFileSpec = parser.accepts("data-file", "Data file in csv format containing the binary metadata").withRequiredArg().ofType(File.class);
OptionSpec<File> tikaConfigSpec = parser.accepts("tika-config", "Tika config file path").withRequiredArg().ofType(File.class);
OptionSpec<File> fdsDirSpec = parser.accepts("fds-path", "Path of directory used by FileDataStore").withRequiredArg().ofType(File.class);
OptionSpec<File> s3ConfigSpec = parser.accepts("s3-config-path", "Path of properties file containing config for S3DataStore").withRequiredArg().ofType(File.class);
OptionSpec<File> storeDirSpec = parser.accepts("store-path", "Path of directory used to store extracted text content").withRequiredArg().ofType(File.class);
OptionSpec<Integer> poolSize = parser.accepts("pool-size", "Size of the thread pool used to perform text extraction. Defaults " + "to number of cores on the system").withRequiredArg().ofType(Integer.class);
OptionSpec<String> nonOption = parser.nonOptions(h);
OptionSet options = parser.parse(args);
List<String> nonOptions = nonOption.values(options);
if (options.has(help)) {
parser.printHelpOn(System.out);
System.exit(0);
}
if (nonOptions.isEmpty()) {
parser.printHelpOn(System.err);
System.exit(1);
}
boolean report = nonOptions.contains("report");
boolean extract = nonOptions.contains("extract");
boolean generate = nonOptions.contains("generate");
File dataFile = null;
File storeDir = null;
File tikaConfigFile = null;
BlobStore blobStore = null;
BinaryResourceProvider binaryResourceProvider = null;
BinaryStats stats = null;
String path = "/";
if (options.has(tikaConfigSpec)) {
tikaConfigFile = tikaConfigSpec.value(options);
checkArgument(tikaConfigFile.exists(), "Tika config file %s does not exist", tikaConfigFile.getAbsolutePath());
}
if (options.has(storeDirSpec)) {
storeDir = storeDirSpec.value(options);
if (storeDir.exists()) {
checkArgument(storeDir.isDirectory(), "Path [%s] specified for storing extracted " + "text content '%s' is not a directory", storeDir.getAbsolutePath(), storeDirSpec.options());
}
}
if (options.has(fdsDirSpec)) {
File fdsDir = fdsDirSpec.value(options);
checkArgument(fdsDir.exists(), "FileDataStore %s does not exist", fdsDir.getAbsolutePath());
FileDataStore fds = new FileDataStore();
fds.setPath(fdsDir.getAbsolutePath());
fds.init(null);
blobStore = new DataStoreBlobStore(fds);
}
if (options.has(s3ConfigSpec)) {
File s3Config = s3ConfigSpec.value(options);
checkArgument(s3Config.exists() && s3Config.canRead(), "S3DataStore config cannot be read from [%s]", s3Config.getAbsolutePath());
Properties props = loadProperties(s3Config);
log.info("Loaded properties for S3DataStore from {}", s3Config.getAbsolutePath());
String pathProp = "path";
String repoPath = props.getProperty(pathProp);
checkNotNull(repoPath, "Missing required property [%s] from S3DataStore config loaded from [%s]", pathProp, s3Config);
//Check if 'secret' key is defined. It should be non null for references
//to be generated. As the ref are transient we can just use any random value
//if not specified
String secretConfig = "secret";
if (props.getProperty(secretConfig) == null) {
props.setProperty(secretConfig, UUID.randomUUID().toString());
}
log.info("Using {} for S3DataStore ", repoPath);
DataStore ds = createS3DataStore(props);
PropertiesUtil.populate(ds, toMap(props), false);
ds.init(pathProp);
blobStore = new DataStoreBlobStore(ds);
closer.register(asCloseable(ds));
}
if (options.has(dataFileSpec)) {
dataFile = dataFileSpec.value(options);
}
checkNotNull(dataFile, "Data file not configured with %s", dataFileSpec);
if (report || extract) {
checkArgument(dataFile.exists(), "Data file %s does not exist", dataFile.getAbsolutePath());
binaryResourceProvider = new CSVFileBinaryResourceProvider(dataFile, blobStore);
if (binaryResourceProvider instanceof Closeable) {
closer.register((Closeable) binaryResourceProvider);
}
stats = new BinaryStats(tikaConfigFile, binaryResourceProvider);
String summary = stats.getSummary();
log.info(summary);
}
if (generate) {
String src = nodeStoreSpec.value(options);
checkNotNull(blobStore, "BlobStore found to be null. FileDataStore directory " + "must be specified via %s", fdsDirSpec.options());
checkNotNull(dataFile, "Data file path not provided");
NodeStore nodeStore = bootStrapNodeStore(src, blobStore, closer);
BinaryResourceProvider brp = new NodeStoreBinaryResourceProvider(nodeStore, blobStore);
CSVFileGenerator generator = new CSVFileGenerator(dataFile);
generator.generate(brp.getBinaries(path));
}
if (extract) {
checkNotNull(storeDir, "Directory to store extracted text content " + "must be specified via %s", storeDirSpec.options());
checkNotNull(blobStore, "BlobStore found to be null. FileDataStore directory " + "must be specified via %s", fdsDirSpec.options());
DataStoreTextWriter writer = new DataStoreTextWriter(storeDir, false);
TextExtractor extractor = new TextExtractor(writer);
if (options.has(poolSize)) {
extractor.setThreadPoolSize(poolSize.value(options));
}
if (tikaConfigFile != null) {
extractor.setTikaConfig(tikaConfigFile);
}
if (options.has(pathSpec)) {
path = pathSpec.value(options);
}
closer.register(writer);
closer.register(extractor);
extractor.setStats(stats);
log.info("Using path {}", path);
extractor.extract(binaryResourceProvider.getBinaries(path));
extractor.close();
writer.close();
}
} catch (Throwable e) {
throw closer.rethrow(e);
} finally {
closer.close();
}
}
use of org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore in project jackrabbit-oak by apache.
the class LuceneIndexEditorTest method setup.
@Before
public void setup() throws Exception {
if (useBlobStore) {
LuceneIndexEditorProvider provider = new LuceneIndexEditorProvider();
CachingFileDataStore ds = DataStoreUtils.createCachingFDS(temporaryFolder.newFolder().getAbsolutePath(), temporaryFolder.newFolder().getAbsolutePath());
provider.setBlobStore(new DataStoreBlobStore(ds));
HOOK = new EditorHook(new IndexUpdateProvider(provider));
} else {
HOOK = new EditorHook(new IndexUpdateProvider(new LuceneIndexEditorProvider()));
}
}
use of org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore in project jackrabbit-oak by apache.
the class ExternalBlobIT method testOfflineCompaction.
@Test
public void testOfflineCompaction() throws Exception {
FileDataStore fds = createFileDataStore();
DataStoreBlobStore dbs = new DataStoreBlobStore(fds);
nodeStore = getNodeStore(dbs);
int size = 2 * 1024 * 1024;
byte[] data2 = new byte[size];
new Random().nextBytes(data2);
Blob b = nodeStore.createBlob(new ByteArrayInputStream(data2));
NodeBuilder builder = nodeStore.getRoot().builder();
builder.child("hello").setProperty("world", b);
nodeStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
store.flush();
// blob went to the external store
assertTrue(store.getStats().getApproximateSize() < 10 * 1024);
close();
SegmentGCOptions gcOptions = defaultGCOptions().setOffline();
store = fileStoreBuilder(getWorkDir()).withMaxFileSize(1).withGCOptions(gcOptions).build();
assertTrue(store.getStats().getApproximateSize() < 10 * 1024);
store.compact();
store.cleanup();
}
Aggregations