Monday, 12 March 2018

Capture Links on Webpage Using Selenium Webdriver | get All Links from HtmlPage

Capture Links on Webpage Using Selenium Webdriver | get All Links from HtmlPage

Here, i demostarate with java example to get all links from htmlPage all links display in java using Selenium Webdriver.

also, in example navigate links.

Example :-
package demo.linkoperation;

import java.util.List;
import org.openqa.selenium.By;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.phantomjs.PhantomJSDriver;
import org.openqa.selenium.remote.DesiredCapabilities;
import org.openqa.selenium.support.ui.ExpectedCondition;
import org.openqa.selenium.support.ui.WebDriverWait;

/**
 *
 * @author vishal.khokhar
 */
public class ExtractingAllLink {

    private static final String PHANTOMJS_PATH = "C:\\Document and setting\\vishal.khokhar\\phantomjs.exe";
    PhantomJSDriver driver = null;

    public static void main(String[] args) {
        new ExtractingAllLink().getAllLinks();
    }

    public void getAllLinks() {
        try {
            String screenPage = null;
            driver = initDriver();

            do {
                try {
                    driver.get("https://scrapemania.blogspot.com/");
                    waitingForLoad(driver);
                    Thread.sleep(7000);
                    screenPage = driver.getPageSource();

                    if (screenPage.equals("<html><head></head><body></body></html>")) {
                        screenPage = null;
                        Thread.sleep(20000);
                    }
                } catch (Exception ex) {
                    ex.printStackTrace();
                }
            } while (screenPage == null);

            List<WebElement> linkssize = driver.findElements(By.tagName("a"));
            int linksCount = linkssize.size();

            System.out.println("Total links Available on Page" + linksCount);

            String[] links = new String[1000];

            // List of all the links from Page
            for (int i = 0; i < linksCount; i++) {
                links[i] = linkssize.get(i).getAttribute("href");
                System.out.println(linkssize.get(i).getAttribute("href"));
            }

            //request fire to each link on the webpage
            for (int i = 0; i < linksCount; i++) {
                driver.navigate().to(links[i]);
                System.out.println(driver.getTitle());
            }
        } catch (Exception ex) {
            ex.printStackTrace();
        }
    }

    /**
     * Initialized PhantomJSDriver.
     */
    private PhantomJSDriver initDriver() {
        // set Capabilities for PhantomJSDriver
        DesiredCapabilities capabilities = DesiredCapabilities.phantomjs();
        capabilities.setJavascriptEnabled(true);
        System.setProperty("phantomjs.binary.path", PHANTOMJS_PATH);

        PhantomJSDriver driver = new PhantomJSDriver(capabilities);
        return driver;
    }

    /**
     * Waiting for page load.
     */
    void waitingForLoad(PhantomJSDriver driver) {
        // set waitForLoad for PhantomJSDriver
        ExpectedCondition<Boolean> pageLoadCondition = new ExpectedCondition<Boolean>() {
            public Boolean apply(WebDriver driver) {
                return ((JavascriptExecutor) driver).executeScript("return document.readyState").equals("complete");
            }
        };
        WebDriverWait wait = new WebDriverWait(driver, 60);
        wait.until(pageLoadCondition);
    }

}

No comments:

Post a Comment