Search in sources :

Example 26 with Element

use of org.jsoup.nodes.Element in project Java-readability by basis-technology-corp.

the class XmlDataMap method recurse.

private void recurse(Element element) {
    ElementAction action = classifyElement(element);
    if (action == ElementAction.Whitespace || action == ElementAction.Sentence) {
        appendSpace();
    }
    for (Node childNode : element.childNodes()) {
        // though we could use canonical XML to get rid of them.
        if (childNode instanceof TextNode && action != ElementAction.Banned) {
            TextNode textContent = (TextNode) childNode;
            String textString = textContent.text();
            append(textContent, textString);
        } else if (childNode instanceof Element) {
            recurse((Element) childNode);
        }
    }
    if (action == ElementAction.Whitespace) {
        appendSpace();
    } else if (action == ElementAction.Sentence) {
        appendPeriod();
    } else if (action == ElementAction.Mark) {
        Mark mark = new Mark();
        mark.setOffset(pcDataOffset);
        mark.setTag(element.tagName());
    }
}
Also used : Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode)

Example 27 with Element

use of org.jsoup.nodes.Element in project MusicDNA by harjot-oberai.

the class LyricWiki method fromURL.

public static Lyrics fromURL(String url, String artist, String song) {
    if (url.endsWith("action=edit")) {
        return new Lyrics(NO_RESULT);
    }
    String text;
    String originalArtist = artist;
    String originalTitle = song;
    try {
        //url = URLDecoder.decode(url, "utf-8");
        Document lyricsPage = Jsoup.connect(url).get();
        Element lyricbox = lyricsPage.select("div.lyricBox").get(0);
        lyricbox.getElementsByClass("references").remove();
        String lyricsHtml = lyricbox.html();
        final Document.OutputSettings outputSettings = new Document.OutputSettings().prettyPrint(false);
        text = Jsoup.clean(lyricsHtml, "", new Whitelist().addTags("br"), outputSettings);
        if (text.contains("&#"))
            text = Parser.unescapeEntities(text, true);
        text = text.replaceAll("\\[\\d\\]", "").trim();
        String title = lyricsPage.getElementsByTag("title").get(0).text();
        int colon = title.indexOf(':');
        if (artist == null)
            artist = title.substring(0, colon).trim();
        if (song == null) {
            int end = title.lastIndexOf("Lyrics");
            song = title.substring(colon + 1, end).trim();
        }
    } catch (IndexOutOfBoundsException | IOException e) {
        return new Lyrics(ERROR);
    }
    try {
        artist = URLDecoder.decode(artist, "UTF-8");
        song = URLDecoder.decode(song, "UTF-8");
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    }
    if (text.contains("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") || text.equals("Instrumental <br />")) {
        Lyrics result = new Lyrics(NEGATIVE_RESULT);
        result.setArtist(artist);
        result.setTitle(song);
        return result;
    } else if (text.equals("") || text.length() < 3)
        return new Lyrics(NO_RESULT);
    else {
        Lyrics lyrics = new Lyrics(POSITIVE_RESULT);
        lyrics.setArtist(artist);
        lyrics.setTitle(song);
        lyrics.setOriginalArtist(originalArtist);
        lyrics.setOriginalTitle(originalTitle);
        lyrics.setText(text);
        lyrics.setSource("LyricsWiki");
        lyrics.setURL(url);
        return lyrics;
    }
}
Also used : Element(org.jsoup.nodes.Element) Whitelist(org.jsoup.safety.Whitelist) UnsupportedEncodingException(java.io.UnsupportedEncodingException) Net.getUrlAsString(com.sdsmdg.harjot.MusicDNA.utilities.Net.getUrlAsString) IOException(java.io.IOException) Document(org.jsoup.nodes.Document)

Example 28 with Element

use of org.jsoup.nodes.Element in project gitblit by gitblit.

the class ImageDiffHandler method renderBinaryDiff.

/** {@inheritDoc} */
@Override
public String renderBinaryDiff(DiffEntry diffEntry) {
    switch(diffEntry.getChangeType()) {
        case MODIFY:
        case RENAME:
        case COPY:
            // TODO: for very small images such as icons, the slider doesn't really help. Two possible
            // approaches: either upscale them for display (may show blurry upscaled images), or show
            // them side by side (may still be too small to really make out the differences).
            String oldUrl = getImageUrl(diffEntry, Side.OLD);
            String newUrl = getImageUrl(diffEntry, Side.NEW);
            if (oldUrl != null && newUrl != null) {
                imgDiffCount++;
                String id = "imgdiff" + imgDiffCount;
                HtmlBuilder builder = new HtmlBuilder("div");
                Element wrapper = builder.root().attr("class", "imgdiff-container").attr("id", "imgdiff-" + id);
                Element container = wrapper.appendElement("div").attr("class", "imgdiff-ovr-slider").appendElement("div").attr("class", "imgdiff");
                Element old = container.appendElement("div").attr("class", "imgdiff-left");
                // style='max-width:640px;' is necessary for ensuring that the browser limits large images
                // to some reasonable width, and to override the "img { max-width: 100%; }" from bootstrap.css,
                // which would scale the left image to the width of its resizeable container, which isn't what
                // we want here. Note that the max-width must be defined directly as inline style on the element,
                // otherwise browsers ignore it if the image is larger, and we end up with an image display that
                // is too wide.
                // XXX: Maybe add a max-height, too, to limit portrait-oriented images to some reasonable height?
                // (Like a 300x10000px image...)
                old.appendElement("img").attr("class", "imgdiff-old").attr("id", id).attr("style", "max-width:640px;").attr("src", oldUrl);
                container.appendElement("img").attr("class", "imgdiff").attr("style", "max-width:640px;").attr("src", newUrl);
                wrapper.appendElement("br");
                Element controls = wrapper.appendElement("div");
                // Opacity slider
                controls.appendElement("div").attr("class", "imgdiff-opa-container").appendElement("a").attr("class", "imgdiff-opa-slider").attr("href", "#").attr("title", page.getString("gb.opacityAdjust"));
                // Blink comparator: find Pluto!
                controls.appendElement("a").attr("class", "imgdiff-link imgdiff-blink").attr("href", "#").attr("title", page.getString("gb.blinkComparator")).appendElement("img").attr("src", getStaticResourceUrl("blink32.png")).attr("width", "20");
                // Pixel subtraction, initially not displayed, will be shown by imgdiff.js depending on feature test.
                // (Uses CSS mix-blend-mode, which isn't supported on all browsers yet).
                controls.appendElement("a").attr("class", "imgdiff-link imgdiff-subtract").attr("href", "#").attr("title", page.getString("gb.imgdiffSubtract")).attr("style", "display:none;").appendElement("img").attr("src", getStaticResourceUrl("sub32.png")).attr("width", "20");
                return builder.toString();
            }
            break;
        case ADD:
            String url = getImageUrl(diffEntry, Side.NEW);
            if (url != null) {
                return new HtmlBuilder("img").root().attr("class", "diff-img").attr("src", url).toString();
            }
            break;
        default:
            break;
    }
    return null;
}
Also used : Element(org.jsoup.nodes.Element) HtmlBuilder(com.gitblit.utils.HtmlBuilder)

Example 29 with Element

use of org.jsoup.nodes.Element in project gocd by gocd.

the class NotificationTemplateTest method assertNotificationFilter.

private void assertNotificationFilter(Document document, int notificationIndex, String pipelineName, String stageName, String event, String checkInMatcher) {
    Element rowForNotification = document.select("#tab-content-of-notifications #filters-settings table.filters tbody tr").get(notificationIndex);
    assertThat(rowForNotification.select("td").get(0).html(), is(pipelineName));
    assertThat(rowForNotification.select("td").get(1).html(), is(stageName));
    assertThat(rowForNotification.select("td").get(2).html(), is(event));
    assertThat(rowForNotification.select("td").get(3).html(), is(checkInMatcher));
}
Also used : Element(org.jsoup.nodes.Element)

Example 30 with Element

use of org.jsoup.nodes.Element in project jstructure by JonStargaryen.

the class OrientationsOfProteinsInMembranesAnnotator method processInternally.

@Override
protected void processInternally(Protein protein) {
    try {
        Document document = getDocument(SEARCH_URL + protein.getPdbId().getPdbId());
        if (document.text().contains("No matches")) {
            throw new ComputationException("did not find OPM entry for " + protein.getIdentifier() + " - possibly it is no membrane protein");
        }
        // create global membrane object - 3rd link points to download
        String downloadLink = document.getElementById("caption").getElementsByTag("a").get(2).attr("href");
        try (InputStreamReader inputStreamReader = new InputStreamReader(new URL(BASE_URL + downloadLink).openStream())) {
            try (BufferedReader bufferedReader = new BufferedReader(inputStreamReader)) {
                byte[] bytes = bufferedReader.lines().collect(Collectors.joining(System.lineSeparator())).getBytes();
                // parse protein
                Protein opmProtein = ProteinParser.source(new ByteArrayInputStream(bytes)).forceProteinName(ProteinIdentifier.createFromPdbId(downloadLink.split("=")[0].split("/")[1].substring(0, 4))).parse();
                Membrane membrane = new Membrane(this);
                // superimpose opm protein onto instance of the original protein
                //TODO this alignment is by no means perfect, but works for a first glance
                SVDSuperimposer.ALPHA_CARBON_SVD_INSTANCE.align(protein.select().aminoAcids().asGroupContainer(), opmProtein.select().aminoAcids().asGroupContainer()).transform(opmProtein);
                // extract dummy atoms and move them to membrane object
                List<double[]> membraneAtoms = opmProtein.atoms().map(Atom::getParentGroup).filter(group -> group.getThreeLetterCode().equals("DUM")).flatMap(Group::atoms).map(Atom::getCoordinates).collect(Collectors.toList());
                membrane.setMembraneAtoms(membraneAtoms);
                // extract general information - that is the first table
                Element generalDataTable = document.getElementsByClass("data").get(0);
                Element thicknessTr = generalDataTable.getElementsByTag("tr").get(1);
                membrane.setHydrophobicThickness(thicknessTr.getElementsByTag("td").get(1).text());
                Element tiltTr = generalDataTable.getElementsByTag("tr").get(2);
                membrane.setTiltAngle(tiltTr.getElementsByTag("td").get(1).text());
                Element transferTr = generalDataTable.getElementsByTag("tr").get(3);
                membrane.setDeltaGTransfer(transferTr.getElementsByTag("td").get(1).text());
                Element topologyTr = generalDataTable.getElementsByTag("tr").get(5);
                membrane.setTopology(topologyTr.getElementsByTag("td").get(1).text());
                // extract trans-membrane helices - second table
                Element transMembraneSubunitsTable = document.getElementsByClass("data").get(1);
                List<TransMembraneHelix> helices = transMembraneSubunitsTable.getElementsByTag("tr").stream().skip(1).map(element -> element.getElementsByTag("td").get(0)).map(Element::text).map(TransMembraneHelix::new).collect(Collectors.toList());
                membrane.setTransMembraneHelices(helices);
                protein.getFeatureContainer().addFeature(membrane);
            //                    //TODO remove, used to evaluate alignment manually
            //                    Files.write(Paths.get(System.getProperty("user.home") + "/ori.pdb"), protein.getPdbRepresentation().getBytes());
            //                    Files.write(Paths.get(System.getProperty("user.home") + "/opm.pdb"), opmProtein.getPdbRepresentation().getBytes());
            //                    //TODO remove, used to evaluate segment positions manually
            //                    Files.write(Paths.get(System.getProperty("user.home") + "/tm.pdb"), protein.select()
            //                            .residueNumber(helices.stream()
            //                                    .map(TransMembraneHelix::getSegments)
            //                                    .flatMap(Collection::stream)
            //                                    .collect(Collectors.toList())
            //                                    .toArray(new IntegerRange[0]))
            //                            .asGroupContainer()
            //                            .getPdbRepresentation()
            //                            .getBytes());
            }
        }
    } catch (IOException e) {
        throw new ComputationException("failed to fetch OPM file", e);
    }
}
Also used : AbstractFeatureProvider(de.bioforscher.jstructure.model.feature.AbstractFeatureProvider) ProteinIdentifier(de.bioforscher.jstructure.model.structure.identifier.ProteinIdentifier) ProteinParser(de.bioforscher.jstructure.parser.ProteinParser) Logger(org.slf4j.Logger) ComputationException(de.bioforscher.jstructure.feature.ComputationException) URL(java.net.URL) LoggerFactory(org.slf4j.LoggerFactory) IOException(java.io.IOException) InputStreamReader(java.io.InputStreamReader) Collectors(java.util.stream.Collectors) SVDSuperimposer(de.bioforscher.jstructure.alignment.SVDSuperimposer) List(java.util.List) ByteArrayInputStream(java.io.ByteArrayInputStream) FeatureProvider(de.bioforscher.jstructure.model.feature.FeatureProvider) Group(de.bioforscher.jstructure.model.structure.Group) Atom(de.bioforscher.jstructure.model.structure.Atom) Document(org.jsoup.nodes.Document) Element(org.jsoup.nodes.Element) Jsoup(org.jsoup.Jsoup) BufferedReader(java.io.BufferedReader) Protein(de.bioforscher.jstructure.model.structure.Protein) Group(de.bioforscher.jstructure.model.structure.Group) InputStreamReader(java.io.InputStreamReader) Element(org.jsoup.nodes.Element) IOException(java.io.IOException) Document(org.jsoup.nodes.Document) URL(java.net.URL) Protein(de.bioforscher.jstructure.model.structure.Protein) Atom(de.bioforscher.jstructure.model.structure.Atom) ByteArrayInputStream(java.io.ByteArrayInputStream) ComputationException(de.bioforscher.jstructure.feature.ComputationException) BufferedReader(java.io.BufferedReader)

Aggregations

Element (org.jsoup.nodes.Element)343 Document (org.jsoup.nodes.Document)152 Elements (org.jsoup.select.Elements)95 ElementHandlerImpl (org.asqatasun.ruleimplementation.ElementHandlerImpl)87 IOException (java.io.IOException)63 File (java.io.File)62 ArrayList (java.util.ArrayList)45 Test (org.junit.Test)34 TestSolutionHandler (org.asqatasun.ruleimplementation.TestSolutionHandler)21 URL (java.net.URL)15 TestSolutionHandlerImpl (org.asqatasun.ruleimplementation.TestSolutionHandlerImpl)15 SimpleElementSelector (org.asqatasun.rules.elementselector.SimpleElementSelector)13 TestSolution (org.asqatasun.entity.audit.TestSolution)11 HashMap (java.util.HashMap)9 ElementSelector (org.asqatasun.rules.elementselector.ElementSelector)9 Node (org.jsoup.nodes.Node)9 InputStream (java.io.InputStream)8 EvidenceElement (org.asqatasun.entity.audit.EvidenceElement)8 SSPHandler (org.asqatasun.processor.SSPHandler)7 ProcessRemarkService (org.asqatasun.service.ProcessRemarkService)7