org.jsoup.nodes.Element.data()方法的使用及代码示例

x33g5p2x  于2022-01-18 转载在 其他  
字(9.9k)|赞(0)|评价(0)|浏览(154)

本文整理了Java中org.jsoup.nodes.Element.data()方法的一些代码示例,展示了Element.data()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Element.data()方法的具体详情如下:
包路径:org.jsoup.nodes.Element
类名称:Element
方法名:data

Element.data介绍

[英]Get the combined data of this element. Data is e.g. the inside of a script tag. Note that data is NOT the text of the element. Use #text() to get the text that would be visible to a user, and #data()for the contents of scripts, comments, CSS styles, etc.
[中]获取此元素的组合数据。例如,数据位于脚本标记的内部。请注意,数据不是元素的文本。使用#text()获取用户可见的文本,使用#data()获取脚本、注释、CSS样式等的内容。

代码示例

代码示例来源:origin: RipMeApp/ripme

private JSONObject getCollectionData(Document page){
  // Retrieve JSON from a script tag in the returned document
  for (Element script : page.select("script[type=text/javascript]")) {
    String data = script.data();
    // Ensure this chunk of javascript contains the album info
    if (data.contains("libraryAlbumsPageCollectionData")) {
      Matcher m = collDataPattern.matcher(data);
      if (m.matches()) {
        // Grab the JSON
        return new JSONObject(m.group(1));
      }
    }
  }
  return null;
}

代码示例来源:origin: RipMeApp/ripme

@Override
public List<String> getURLsFromPage(Document doc) {
  List<String> result = new ArrayList<>();
  String jsonText = null;
  for (Element script : doc.select("script")) {
    if (script.data().contains("var pages")) {
      jsonText = script.data().replaceAll("var pages = ", "");
      jsonText = jsonText.replaceAll("//<!\\[CDATA\\[", "");
      jsonText = jsonText.replaceAll("//]]>", "");
    }
  }
  JSONArray imageArray = new JSONArray(jsonText);
  for (int i = 0; i < imageArray.length(); i++) {
    result.add("https://dynasty-scans.com" + imageArray.getJSONObject(i).getString("image"));
  }
  return result;
}

代码示例来源:origin: org.jsoup/jsoup

@Override
public boolean matches(Element root, Element element) {
  return lowerCase(element.data()).contains(searchText);
}

代码示例来源:origin: RipMeApp/ripme

@Override
public List<String> getURLsFromPage(Document doc) {
  List<String> result = new ArrayList<>();
  for (Element script : doc.select("script")) {
    if (script.data().contains("var images = ")) {
      String s = script.data();
      s = s.replaceAll("var seriesId = \\d+;", "");
      s = s.replaceAll("var chapterId = \\d+;", "");
      s = s.replaceAll("var pages = \\d+;", "");
      s = s.replaceAll("var page = \\d+;", "");
      s = s.replaceAll("var prevCha = null;", "");
      s = s.replaceAll("var nextCha = \\.*;", "");
      String json = s.replaceAll("var images = ", "").replaceAll(";", "");
      JSONObject images = new JSONObject(json);
      for (int i = 1; i < images.length() +1; i++) {
        result.add(images.getString(Integer.toString(i)));
      }
    }
  }
  return result;
}

代码示例来源:origin: RipMeApp/ripme

private JSONObject getJSONFromPage(Document firstPage) throws IOException {
  // Check if this page is HTML + JSON or jsut json
  if (!firstPage.html().contains("window._sharedData =")) {
    return new JSONObject(stripHTMLTags(firstPage.html()));
  }
  String jsonText = "";
  try {
    for (Element script : firstPage.select("script[type=text/javascript]")) {
      if (script.data().contains("window._sharedData = ")) {
        jsonText = script.data().replaceAll("window._sharedData = ", "");
        jsonText = jsonText.replaceAll("};", "}");
      }
    }
    return new JSONObject(jsonText);
  } catch (JSONException e) {
    throw new IOException("Could not get JSON from page");
  }
}

代码示例来源:origin: apache/nifi

/**
 * Extracts the HTML value based on the configuration values.
 *
 * @return value from the parsed HTML element
 */
private String extractElementValue(String prependValue, final String outputType, String appendValue, final Element ele, final String attrKey) {
  if (StringUtils.isEmpty(prependValue)) {
    prependValue = "";
  }
  if (StringUtils.isEmpty(appendValue)) {
    appendValue = "";
  }
  switch (outputType) {
    case ELEMENT_HTML:
      return prependValue + ele.html() + appendValue;
    case ELEMENT_TEXT:
      return prependValue + ele.text() + appendValue;
    case ELEMENT_DATA:
      return prependValue + ele.data() + appendValue;
    case ELEMENT_ATTRIBUTE:
      return prependValue + ele.attr(attrKey) + appendValue;
    default:
      return prependValue + ele.html() + appendValue;
  }
}

代码示例来源:origin: org.jsoup/jsoup

} else if (childNode instanceof Element) {
  Element element = (Element) childNode;
  String elementData = element.data();
  sb.append(elementData);
} else if (childNode instanceof CDataNode) {

代码示例来源:origin: RipMeApp/ripme

@Override
  public void rip() throws IOException {
    LOGGER.info("Retrieving " + this.url);
    Document doc = Http.url(url).get();
    
    //Get user friendly filename from page title
    String title = doc.title();
    
    Elements script = doc.select("script");
    if (script.isEmpty()) {
      throw new IOException("Could not find script code at " + url);
    }
    //Regex assumes highest quality source is listed first
    Pattern p = Pattern.compile("\"source\":\"(.*?)\"");
    
    for (Element element : script) {
      Matcher m = p.matcher(element.data());
      if (m.find()){
        String vidUrl = m.group(1);
        addURLToDownload(new URL(vidUrl), HOST + "_" + title);
      }
    }
    waitForThreads();
  }
}

代码示例来源:origin: RipMeApp/ripme

@Override
  public void rip() throws IOException {
    LOGGER.info("Retrieving " + this.url);
    Document doc = Http.url(url).get();
    Element iframe = doc.select("iframe").first();
    String iframeSrc = iframe.attr("src");
    if (iframeSrc != null) {
      doc = Http.url("http://www.yuvutu.com" + iframeSrc).get();
    } else {
      throw new IOException("Could not find iframe code at " + url);
    }
    Elements script = doc.select("script");
    if (script.isEmpty()) {
      throw new IOException("Could not find script code at " + url);
    }
    Pattern p = Pattern.compile("file: \"(.*?)\"");
    
    for (Element element : script) {
      Matcher m = p.matcher(element.data());
      if (m.find()){
        String vidUrl = m.group(1);
        addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url));
      }
    }
    waitForThreads();
  }
}

代码示例来源:origin: neo4j-contrib/neo4j-apoc-procedures

private void getElements(Elements elements, List<Map<String, Object>> resultList) {
  for (Element element : elements) {
    Map<String, Object> result = new HashMap<>();
    if(element.attributes().size() > 0) result.put("attributes", getAttributes(element));
    if(!element.data().isEmpty())result.put("data", element.data());
    if(element.hasText()) result.put("text", element.text());
    if(!element.val().isEmpty()) result.put("value", element.val());
    if(!element.tagName().isEmpty()) result.put("tagName", element.tagName());
    resultList.add(result);
  }
}

代码示例来源:origin: astamuse/asta4d

public String data() {
  return originElement.data();
}

代码示例来源:origin: stackoverflow.com

Document doc = Jsoup.parse(line);
Elements pElems = doc.select("div.entry > p");

for (Element pElem : pElems) {
  myArrayList.add(pElem.data());
}

代码示例来源:origin: stackoverflow.com

Document doc = Jsoup.parse(html);
Elements scripts = doc.getElementsByTag("script");
for (Element script : scripts) {
  System.out.println(script.data());
}

代码示例来源:origin: stackoverflow.com

URL url = new URL("http://stackoverflow.com/questions/2993515");
Document document = Jsoup.parse(url, 3000);

Elements scripts = document.select("script");
for (Element script : scripts) {
  String data = script.data();
  if (!data.isEmpty()) {
    System.out.println(data);
  }
}

代码示例来源:origin: stackoverflow.com

String url = "https://www.wunderground.com/webcams/cadot1/902/video.html";
int timeout = 100 * 1000;

// Extract video URL
Document doc = Jsoup.connect(url).timeout(timeout).get();
Element script = doc.getElementById("inner-content")
    .getElementsByTag("script").last();
String content = script.data();
int indexOfUrl = content.indexOf("url");
int indexOfComma = content.indexOf(',', indexOfUrl);
String videoUrl = "https:" + content.substring(indexOfUrl + 6, indexOfComma - 1);
System.out.println(videoUrl);

代码示例来源:origin: stackoverflow.com

File input = new File(filePath);
PrintWriter writer = new PrintWriter(input, "UTF-8");
 writer.write(document.getElementById("1").outerHtml() + "\n");
 Elements scripts = document.getElementsByTag("script");
 for (Element script : scripts) {
  if (script.data().startsWith("(function(a, b)")) {
  writer.write(script.outerHtml() + "\n");
  }
 }
 writer.write(document.getElementsByClass("class1").outerHtml() + "\n");
 writer.write(document.getElementsByClass("class2").outerHtml() + "\n");
 writer.flush();
 writer.close();

代码示例来源:origin: jenkinsci/email-ext-plugin

/**
 * Generates a stylesheet from an html document
 *
 * @param doc the html document
 * @return a string representing the stylesheet.
 */
private String fetchStyles(Document doc) {
  Elements els = doc.select(STYLE_TAG);
  StringBuilder styles = new StringBuilder();
  for (Element e : els) {
    if (e.attr("data-inline").equals("true")) {
      styles.append(e.data());
      e.remove();
    }
  }
  return styles.toString();
}

代码示例来源:origin: fr.sii.ogham/ogham-core

/**
 * Generates a stylesheet from an html document
 *
 * @param doc
 *            the html document
 * @return a string representing the stylesheet.
 */
private String fetchStyles(Document doc) {
  Elements els = doc.select(STYLE_TAG);
  StringBuilder styles = new StringBuilder();
  for (Element e : els) {
    if (!TRUE_VALUE.equals(e.attr(SKIP_INLINE))) {
      styles.append(e.data());
      e.remove();
    }
  }
  return styles.toString();
}

代码示例来源:origin: chenerzhu/proxy-pool

private int getPort(Element element) throws ScriptException {
  int port = -1;
  ScriptEngineManager manager = new ScriptEngineManager();
  ScriptEngine engine = manager.getEngineByName("js");
  Pattern pattern = Pattern.compile("\\+.*?<");
  Matcher matcher = null;
  Document document = webPage.getDocument();
  String scrpit = document.getElementsByTag("script").get(2).data();
  engine.eval(scrpit);
  matcher = pattern.matcher(element.child(0).html());
  if (matcher.find()) {
    String portScript = matcher.group(0).substring(1, matcher.group(0).length() - 2);
    Object obj=engine.eval(portScript.replaceAll("\\+", "+''+"));
    port = Integer.parseInt((String)obj);
  }
  return port;
}

代码示例来源:origin: cn.wanghaomiao/JsoupXpath

/**
   * 函数具体逻辑
   *
   * @param scope 上下文
   * @return 计算好的节点
   */
  @Override
  public XValue call(Scope scope) {
    List<String> res = new LinkedList<>();
    for (Element e:scope.context()){
      if ("script".equals(e.nodeName())){
        res.add(e.data());
      }else {
        res.add(e.text());
      }
    }
    return XValue.create(res);
  }
}

相关文章

微信公众号

最新文章

更多

Element类方法