use of org.wso2.carbon.registry.indexing.solr.IndexDocument in project carbon-apimgt by wso2.
the class PDFIndexerTest method testShouldReturnIndexedDocumentWhenParameterCorrect.
@Test
public void testShouldReturnIndexedDocumentWhenParameterCorrect() throws IOException {
String mediaType = "application/pdf+test";
final String MEDIA_TYPE = "mediaType";
PDFParser parser = Mockito.mock(PDFParser.class);
COSDocument cosDoc = Mockito.mock(COSDocument.class);
PDFTextStripper pdfTextStripper = Mockito.mock(PDFTextStripper.class);
Mockito.doThrow(IOException.class).when(cosDoc).close();
Mockito.when(parser.getDocument()).thenReturn(new COSDocument()).thenReturn(cosDoc);
Mockito.when(pdfTextStripper.getText(new PDDocument())).thenReturn("");
PDFIndexer pdfIndexer = new PDFIndexerWrapper(parser, pdfTextStripper);
// should return the default media type when media type is not defined in file2Index
IndexDocument pdf = pdfIndexer.getIndexedDocument(file2Index);
if (!"application/pdf".equals(pdf.getFields().get(MEDIA_TYPE).get(0))) {
Assert.fail();
}
// should return the media type we have set in the file2Index even if error occurs in finally block
file2Index.mediaType = mediaType;
pdf = pdfIndexer.getIndexedDocument(file2Index);
if (!mediaType.equals(pdf.getFields().get(MEDIA_TYPE).get(0))) {
Assert.fail();
}
}
use of org.wso2.carbon.registry.indexing.solr.IndexDocument in project carbon-apimgt by wso2.
the class MSWordIndexerTest method testShouldReturnIndexedDocumentWhenParameterCorrect.
@Test
public void testShouldReturnIndexedDocumentWhenParameterCorrect() throws Exception {
POIFSFileSystem poiFS = Mockito.mock(POIFSFileSystem.class);
WordExtractor wordExtractor = Mockito.mock(WordExtractor.class);
XWPFWordExtractor xwpfExtractor = Mockito.mock(XWPFWordExtractor.class);
XWPFDocument xwpfDocument = Mockito.mock(XWPFDocument.class);
PowerMockito.whenNew(POIFSFileSystem.class).withParameterTypes(InputStream.class).withArguments(Mockito.any(InputStream.class)).thenThrow(OfficeXmlFileException.class).thenReturn(poiFS).thenThrow(APIManagementException.class);
PowerMockito.whenNew(WordExtractor.class).withArguments(poiFS).thenReturn(wordExtractor);
PowerMockito.whenNew(XWPFDocument.class).withParameterTypes(InputStream.class).withArguments(Mockito.any()).thenReturn(xwpfDocument);
PowerMockito.whenNew(XWPFWordExtractor.class).withArguments(xwpfDocument).thenReturn(xwpfExtractor);
Mockito.when(wordExtractor.getText()).thenReturn("");
Mockito.when(xwpfExtractor.getText()).thenReturn("");
MSWordIndexer indexer = new MSWordIndexer();
IndexDocument wordDoc = indexer.getIndexedDocument(file2Index);
// should return the default media type when media type is not defined in file2Index
if (!"application/pdf".equals(wordDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) {
Assert.fail();
}
// should return the media type we have set in the file2Index
file2Index.mediaType = "text/html";
wordDoc = indexer.getIndexedDocument(file2Index);
if (!"text/html".equals(wordDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) {
Assert.fail();
}
// should return the media type we have set in the file2Index even if exception occurred while reading the file
file2Index.mediaType = "text/html";
wordDoc = indexer.getIndexedDocument(file2Index);
if (!"text/html".equals(wordDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) {
Assert.fail();
}
}
use of org.wso2.carbon.registry.indexing.solr.IndexDocument in project carbon-apimgt by wso2.
the class XMLIndexerTest method testShouldReturnIndexedDocumentWhenParameterCorrect.
@Test
public void testShouldReturnIndexedDocumentWhenParameterCorrect() throws RegistryException {
String mediaType = "text/xml";
final String MEDIA_TYPE = "mediaType";
AsyncIndexer.File2Index file2Index = new AsyncIndexer.File2Index("".getBytes(), null, "", -1234, "");
XMLIndexer indexer = new XMLIndexer();
// should return the the default media type when media type is not defined in file2Index
IndexDocument xml = indexer.getIndexedDocument(file2Index);
if (xml.getFields().get(MEDIA_TYPE) != null) {
Assert.fail();
}
// should return the media type we have set in the file2Index
file2Index.mediaType = mediaType;
xml = indexer.getIndexedDocument(file2Index);
if (!mediaType.equals(xml.getFields().get(MEDIA_TYPE).get(0))) {
Assert.fail();
}
}
use of org.wso2.carbon.registry.indexing.solr.IndexDocument in project carbon-apimgt by wso2.
the class DocumentIndexer method getIndexedDocument.
public IndexDocument getIndexedDocument(AsyncIndexer.File2Index fileData) throws SolrException, RegistryException {
IndexDocument indexDocument = super.getIndexedDocument(fileData);
IndexDocument newIndexDocument = indexDocument;
Registry registry = GovernanceUtils.getGovernanceSystemRegistry(IndexingManager.getInstance().getRegistry(fileData.tenantId));
String documentResourcePath = fileData.path.substring(RegistryConstants.GOVERNANCE_REGISTRY_BASE_PATH.length());
if (documentResourcePath.contains("/apimgt/applicationdata/apis/")) {
return null;
}
if (log.isDebugEnabled()) {
log.debug("Executing document indexer for resource at " + documentResourcePath);
}
Resource documentResource = null;
Map<String, List<String>> fields = indexDocument.getFields();
if (registry.resourceExists(documentResourcePath)) {
documentResource = registry.get(documentResourcePath);
}
if (documentResource != null) {
try {
fetchRequiredDetailsFromAssociatedAPI(registry, documentResource, fields);
StringBuilder stringBuilder = new StringBuilder();
stringBuilder.append(fetchDocumentContent(registry, documentResource));
if (fields.get(APIConstants.DOC_NAME) != null) {
stringBuilder.append(APIConstants.DOC_NAME + "=" + StringUtils.join(fields.get(APIConstants.DOC_NAME), ","));
}
if (fields.get(APIConstants.DOC_SUMMARY) != null) {
stringBuilder.append(APIConstants.DOC_SUMMARY + "=" + StringUtils.join(fields.get(APIConstants.DOC_SUMMARY), ","));
}
newIndexDocument = new IndexDocument(fileData.path, "", stringBuilder.toString(), indexDocument.getTenantId());
fields.put(APIConstants.DOCUMENT_INDEXER_INDICATOR, Arrays.asList("true"));
newIndexDocument.setFields(fields);
} catch (APIManagementException e) {
// error occured while fetching details from API, but continuing document indexing
log.error("Error while updating indexed document.", e);
} catch (IOException e) {
// error occured while fetching document content, but continuing document indexing
log.error("Error while getting document content.", e);
}
}
return newIndexDocument;
}
use of org.wso2.carbon.registry.indexing.solr.IndexDocument in project carbon-apimgt by wso2.
the class MSPowerpointIndexer method getIndexedDocument.
public IndexDocument getIndexedDocument(File2Index fileData) throws SolrException {
try {
String ppText = null;
try {
// Extract Powerpoint 2003 (.ppt) document files
POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data));
PowerPointExtractor extractor = new PowerPointExtractor(fs);
ppText = extractor.getText();
} catch (OfficeXmlFileException e) {
// if 2003 Powerpoint (.ppt) extraction failed, try with Powerpoint 2007 (.pptx) document file extractor
XMLSlideShow xmlSlideShow = new XMLSlideShow(new ByteArrayInputStream(fileData.data));
XSLFPowerPointExtractor xslfPowerPointExtractor = new XSLFPowerPointExtractor(xmlSlideShow);
ppText = xslfPowerPointExtractor.getText();
} catch (Exception e) {
String msg = "Failed to extract the document";
log.error(msg, e);
}
IndexDocument indexDoc = new IndexDocument(fileData.path, ppText, null);
Map<String, List<String>> fields = new HashMap<String, List<String>>();
fields.put("path", Collections.singletonList(fileData.path));
if (fileData.mediaType != null) {
fields.put(IndexingConstants.FIELD_MEDIA_TYPE, Collections.singletonList(fileData.mediaType));
} else {
fields.put(IndexingConstants.FIELD_MEDIA_TYPE, Collections.singletonList("application/vnd" + ".ms-powerpoint"));
}
indexDoc.setFields(fields);
return indexDoc;
} catch (IOException e) {
String msg = "Failed to write to the index";
log.error(msg, e);
throw new SolrException(ErrorCode.SERVER_ERROR, msg, e);
}
}
Aggregations