us.codecraft.webmagic.selector.Html.create()方法的使用及代码示例

x33g5p2x  于2022-01-20 转载在 其他  
字(5.9k)|赞(0)|评价(0)|浏览(103)

本文整理了Java中us.codecraft.webmagic.selector.Html.create()方法的一些代码示例,展示了Html.create()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Html.create()方法的具体详情如下:
包路径:us.codecraft.webmagic.selector.Html
类名称:Html
方法名:create

Html.create介绍

暂无

代码示例

代码示例来源:origin: code4craft/webmagic

public void process(Page page) {
  Html html = page.getHtml();
  List<String> questionList =  html.xpath("//table[@class='tgCustomerCommunityCenterColumn']//div[@class='content']//table[@class='dataGrid']//tr").all();
  if(questionList != null && questionList.size() > 1)
  {
    //i=0是列名称,所以i从1开始
    for( int i = 1 ; i < questionList.size(); i++)
    {
      System.out.println(questionList.get(i));
      Html tempHtml =  Html.create("<table>"+questionList.get(i)+"</table>");
      String comment = tempHtml.xpath("//td[@class='title']//a/text()").toString();
      System.out.println(comment);
      String answerNum =  tempHtml.xpath("//td[@class='num']/text()").toString();
      System.out.println(answerNum);
      String createTime = tempHtml.xpath("//td[3]/text()").toString();
      System.out.println(createTime);
      /* Document doc = Jsoup.parse(questionList.get(i));
       Html hmt  = Html.create(questionList.get(i)) ;
       String str = hmt.links().toString();
       String   content =   doc.getElementsByTag("a").text();
       String ss = doc.text();*/
    }
  }
}

代码示例来源:origin: biezhi/java-library-examples

public void process(Page page) {
  Html html = page.getHtml();
  List<String> questionList =  html.xpath("//table[@class='tgCustomerCommunityCenterColumn']//div[@class='content']//table[@class='dataGrid']//tr").all();
  if(questionList != null && questionList.size() > 1)
  {
    //i=0是列名称,所以i从1开始
    for( int i = 1 ; i < questionList.size(); i++)
    {
      System.out.println(questionList.get(i));
      Html tempHtml =  Html.create("<table>"+questionList.get(i)+"</table>");
      String comment = tempHtml.xpath("//td[@class='title']//a/text()").toString();
      System.out.println(comment);
      String answerNum =  tempHtml.xpath("//td[@class='num']/text()").toString();
      System.out.println(answerNum);
      String createTime = tempHtml.xpath("//td[3]/text()").toString();
      System.out.println(createTime);
      /* Document doc = Jsoup.parse(questionList.get(i));
       Html hmt  = Html.create(questionList.get(i)) ;
       String str = hmt.links().toString();
       String   content =   doc.getElementsByTag("a").text();
       String ss = doc.text();*/
    }
  }
}

代码示例来源:origin: zifangsky/WeatherSpider

Html temp = Html.create(day);
StringBuffer stringBuffer = new StringBuffer();
stringBuffer.append(temp.xpath("//h1/text()").toString());
if(windList !=null && windList.size() > 0){
  for(String win : windList){
    Html winHtml = Html.create(win);
    windStr = windStr + winHtml.xpath("//span/@title") + "/";

代码示例来源:origin: CrowHawk/MagicToe

@Override
public void process(Page page) {
  List<String> ipList = page.getHtml().xpath("//table[@class='table table-bordered table-striped']/tbody/tr").all();
  List<ProxyIp> result = new ArrayList<>();
  if(ipList != null && ipList.size() > 0){
    for(String tmp : ipList){
      Html html = Html.create(tmp);
      ProxyIp proxyIp = new ProxyIp();
      String[] data = html.xpath("//body/text()").toString().trim().split("\\s+");
      proxyIp.setIp(data[0]);
      proxyIp.setPort(Integer.valueOf(data[1]));
      result.add(proxyIp);
    }
  }
  page.putField("result", result);
  page.addTargetRequest("http://www.kuaidaili.com/free/inha/2/");
  page.addTargetRequest("http://www.kuaidaili.com/free/intr/1/");
}

代码示例来源:origin: CrowHawk/MagicToe

@Override
public void process(Page page) {
  List<String> ipList = page.getHtml().xpath("//table[@id='ip_list']/tbody/tr").all();
  List<ProxyIp> result = new ArrayList<>();
  if(ipList != null && ipList.size() > 0){
    ipList.remove(0);  //移除表头
    for(String tmp : ipList){
      Html html = Html.create(tmp);
      ProxyIp proxyIp = new ProxyIp();
      String[] data = html.xpath("//body/text()").toString().trim().split("\\s+");
      proxyIp.setIp(data[0]);
      proxyIp.setPort(Integer.valueOf(data[1]));
      result.add(proxyIp);
    }
  }
  page.putField("result", result);
  page.addTargetRequest("http://www.xicidaili.com/nn/2");
  page.addTargetRequest("http://www.xicidaili.com/nt/");
}

代码示例来源:origin: zifangsky/WeatherSpider

@Override
public void process(Page page) {
  List<String> ipList = page.getHtml().xpath("//table[@class='table table-bordered table-striped']/tbody/tr").all();
  List<ProxyIp> result = new ArrayList<>();

  if(ipList != null && ipList.size() > 0){
    for(String tmp : ipList){
      Html html = Html.create(tmp);
      ProxyIp proxyIp = new ProxyIp();
      String[] data = html.xpath("//body/text()").toString().trim().split("\\s+");
      String dataStr = html.xpath("//body/text()").toString();
      
      proxyIp.setIp(data[0]);
      proxyIp.setPort(Integer.valueOf(data[1]));
      
      Pattern pattern = Pattern.compile("HTTPS?\\s(.*)?\\s\\d秒");
      Matcher matcher = pattern.matcher(dataStr);
      if(matcher.find()){
        proxyIp.setAddr(matcher.group(1));
      }
      proxyIp.setType(data[3]);
      
      result.add(proxyIp);
    } 
  }
  page.putField("result", result);
  page.addTargetRequest("https://www.kuaidaili.com/free/inha/2/");
  page.addTargetRequest("https://www.kuaidaili.com/free/intr/1/");
}

代码示例来源:origin: zifangsky/WeatherSpider

@Override
public void process(Page page) {
  List<String> ipList = page.getHtml().xpath("//table[@id='ip_list']/tbody/tr").all();
  List<ProxyIp> result = new ArrayList<>();

  if(ipList != null && ipList.size() > 0){
    ipList.remove(0);  //移除表头
    for(String tmp : ipList){
      Html html = Html.create(tmp);
      ProxyIp proxyIp = new ProxyIp();
      String[] data = html.xpath("//body/text()").toString().trim().split("\\s+");
      
      proxyIp.setIp(data[0]);
      proxyIp.setPort(Integer.valueOf(data[1]));
      proxyIp.setAddr(html.xpath("//a/text()").toString());
      proxyIp.setType(data[3]);
      
      result.add(proxyIp);
    } 
  }
  page.putField("result", result);
  page.addTargetRequest("http://www.xicidaili.com/nn/2");
  page.addTargetRequest("http://www.xicidaili.com/nt/");
}

相关文章

微信公众号

最新文章

更多