Friday, 1 July 2016

ViewState Request Using HtmlUnit

ViewState Request Using HtmlUnit


Requrie library : HtmlUnit

View state is authenticate to user will give response for fired request ?

user will not comes directly for some webpages in website(passing parameters only). so , viewstate restrict to give response to  for some pages. 

This example is demo for how to get response for passing stages for view state authentication for your request.

This example is give to viewstate values from his immediate parent page.than it is goes to next page for that is like a browser to give you response.




package Demo;

import com.gargoylesoftware.htmlunit.HttpMethod;
import com.gargoylesoftware.htmlunit.TextPage;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebRequest;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.util.NameValuePair;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import org.apache.commons.lang3.StringUtils;

/**
 *
 * @author vishal.khokhar
 */
public class ViewStateRequest extends Thread {

    WebClient webClient = new WebClient();
    String param1;
    String param2;
    String param3;

    ViewStateRequest(String param1, String param2, String param3) {
        this.param1 = param1;
        this.param2 = param2;
        this.param3 = param3;
    }

    public void run() {
        setUpWebClient();
        HashMap paramMap = new HashMap();

        // This will be initialized request. for getting first page request.
        paramMap = firstPage();

        // For getting second page request.
        paramMap = secondPage(paramMap);

        //for getting Third page request.
        //I demostrate three pages only
        //now this case it is final response
        //you can add custom travesing page for your self to your requirement.
        HtmlPage page = thirdPage(paramMap);
    }

    private HashMap firstPage() {
        HashMap paramMap = new HashMap();
        try {
            HtmlPage page1 = webClient.getPage("https://scrapemania.blogspot.in/");
            paramMap = getScrpingParamPage(page1);
        } catch (IOException ie) {
            ie.printStackTrace();
        }
        return paramMap;
    }

    private HashMap secondPage(HashMap paramMapFirstPage) {
        String __VIEWSTATE = "";
        HashMap paramMap = new HashMap();

        try {
            __VIEWSTATE = (String) paramMapFirstPage.get("__VIEWSTATE");
        } catch (Exception ex) {
            ex.printStackTrace();
        }

        try {
            ArrayList<NameValuePair> val = new ArrayList();
            val.add(new NameValuePair("__ASYNCPOST", "true"));
            val.add(new NameValuePair("__VIEWSTATE", __VIEWSTATE));
            val.add(new NameValuePair("$ContentPlaceHolder1$param1", param1));
            val.add(new NameValuePair("$ContentPlaceHolder1$param1", param2));
            val.add(new NameValuePair("$ContentPlaceHolder1$param1", param3));

            WebRequest webRequestDiameter = setWebRequest();
            webRequestDiameter.setRequestParameters(val);

            TextPage page = webClient.getPage(webRequestDiameter);

            paramMap = getScrpingParamPage(page);

        } catch (IOException ie) {
            ie.printStackTrace();
        }
        return paramMap;
    }

    private HtmlPage thirdPage(HashMap paramMapSecondPage) {
        String __VIEWSTATE = "";
        HtmlPage page = null;

        try {
            __VIEWSTATE = (String) paramMapSecondPage.get("__VIEWSTATE");
        } catch (Exception ex) {
            ex.printStackTrace();
        }

        try {
            ArrayList<NameValuePair> val = new ArrayList();
            val.add(new NameValuePair("__ASYNCPOST", "true"));
            val.add(new NameValuePair("__VIEWSTATE", __VIEWSTATE));
            val.add(new NameValuePair("$ContentPlaceHolder1$param1", param1));
            val.add(new NameValuePair("$ContentPlaceHolder1$param1", param2));
            val.add(new NameValuePair("$ContentPlaceHolder1$param1", param3));
            WebRequest webRequest = setWebRequest();
            webRequest.setRequestParameters(val);

            page = webClient.getPage(webRequest);

        } catch (IOException ie) {
            ie.printStackTrace();
        }
        return page;
    }

    // getScrpingParamPage() method will Depends on Response page.
    // it will be  TextPage  OR HtmlPage
  
    private HashMap getScrpingParamPage(HtmlPage page1) {
        String __VIEWSTATE = "";
        HashMap paramMap = new HashMap();

        try {
            __VIEWSTATE = page1.getElementById("__VIEWSTATE").getAttribute("value");
            paramMap.put("__VIEWSTATE", __VIEWSTATE);
        } catch (Exception ex) {
            ex.printStackTrace();
        }
        return paramMap;
    }

    private HashMap getScrpingParamPage(TextPage page1) {
        String __VIEWSTATE = "";
        HashMap paramMap = new HashMap();

        try {
            String webresponse = page1.getWebResponse().getContentAsString();
            if (webresponse.contains("__VIEWSTATE|")) {
                __VIEWSTATE = StringUtils.substringBetween(webresponse, "__VIEWSTATE|", "|");
            }
            paramMap.put("__VIEWSTATE", __VIEWSTATE);
        } catch (Exception ex) {
            ex.printStackTrace();
        }
        return paramMap;
    }

    private WebRequest setWebRequest() {
        WebRequest webrequest = null;
        try {
            webrequest = new WebRequest(new URL("https://scrapemania.blogspot.in/"), HttpMethod.POST);
            webrequest.setAdditionalHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
            webrequest.setAdditionalHeader("Accept-Encoding", "gzip, deflate");
            webrequest.setAdditionalHeader("Accept-Language", "en-US,en;q=0.5");
            webrequest.setAdditionalHeader("Cache-Control", "no-cache");
            webrequest.setAdditionalHeader("Connection", "keep-alive");
            webrequest.setAdditionalHeader("Content-Type", "application/x-www-form-urlencoded; charset=utf-8");
            webrequest.setAdditionalHeader("Host", "scrapemania.blogspot.in/");
            webrequest.setAdditionalHeader("Pragma", "no-cache");
            webrequest.setAdditionalHeader("Referer", "https://scrapemania.blogspot.in/");
            webrequest.setAdditionalHeader("User-Agent", "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:28.0) Gecko/20100101 Firefox/28.0");
            webrequest.setAdditionalHeader("X-MicrosoftAjax", "Delta=true");
            webrequest.setAdditionalHeader("X-Requested-With", "XMLHttpRequest");
        } catch (MalformedURLException mfue) {
            mfue.printStackTrace();
        }
        return webrequest;
    }

    private void setUpWebClient() {
        webClient.getOptions().setTimeout(60000 * 2);
        webClient.getOptions().setCssEnabled(false);
        webClient.getOptions().setJavaScriptEnabled(false);
        webClient.getOptions().setThrowExceptionOnScriptError(false);
        webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
        webClient.getOptions().setUseInsecureSSL(true);
    }
}

No comments:

Post a Comment