本文整理了Java中org.jsoup.nodes.Element.data()
方法的一些代码示例,展示了Element.data()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Element.data()
方法的具体详情如下:
包路径:org.jsoup.nodes.Element
类名称:Element
方法名:data
[英]Get the combined data of this element. Data is e.g. the inside of a script tag. Note that data is NOT the text of the element. Use #text() to get the text that would be visible to a user, and #data()for the contents of scripts, comments, CSS styles, etc.
[中]获取此元素的组合数据。例如,数据位于脚本标记的内部。请注意,数据不是元素的文本。使用#text()获取用户可见的文本,使用#data()获取脚本、注释、CSS样式等的内容。
代码示例来源:origin: RipMeApp/ripme
private JSONObject getCollectionData(Document page){
// Retrieve JSON from a script tag in the returned document
for (Element script : page.select("script[type=text/javascript]")) {
String data = script.data();
// Ensure this chunk of javascript contains the album info
if (data.contains("libraryAlbumsPageCollectionData")) {
Matcher m = collDataPattern.matcher(data);
if (m.matches()) {
// Grab the JSON
return new JSONObject(m.group(1));
}
}
}
return null;
}
代码示例来源:origin: RipMeApp/ripme
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
String jsonText = null;
for (Element script : doc.select("script")) {
if (script.data().contains("var pages")) {
jsonText = script.data().replaceAll("var pages = ", "");
jsonText = jsonText.replaceAll("//<!\\[CDATA\\[", "");
jsonText = jsonText.replaceAll("//]]>", "");
}
}
JSONArray imageArray = new JSONArray(jsonText);
for (int i = 0; i < imageArray.length(); i++) {
result.add("https://dynasty-scans.com" + imageArray.getJSONObject(i).getString("image"));
}
return result;
}
代码示例来源:origin: org.jsoup/jsoup
@Override
public boolean matches(Element root, Element element) {
return lowerCase(element.data()).contains(searchText);
}
代码示例来源:origin: RipMeApp/ripme
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<>();
for (Element script : doc.select("script")) {
if (script.data().contains("var images = ")) {
String s = script.data();
s = s.replaceAll("var seriesId = \\d+;", "");
s = s.replaceAll("var chapterId = \\d+;", "");
s = s.replaceAll("var pages = \\d+;", "");
s = s.replaceAll("var page = \\d+;", "");
s = s.replaceAll("var prevCha = null;", "");
s = s.replaceAll("var nextCha = \\.*;", "");
String json = s.replaceAll("var images = ", "").replaceAll(";", "");
JSONObject images = new JSONObject(json);
for (int i = 1; i < images.length() +1; i++) {
result.add(images.getString(Integer.toString(i)));
}
}
}
return result;
}
代码示例来源:origin: RipMeApp/ripme
private JSONObject getJSONFromPage(Document firstPage) throws IOException {
// Check if this page is HTML + JSON or jsut json
if (!firstPage.html().contains("window._sharedData =")) {
return new JSONObject(stripHTMLTags(firstPage.html()));
}
String jsonText = "";
try {
for (Element script : firstPage.select("script[type=text/javascript]")) {
if (script.data().contains("window._sharedData = ")) {
jsonText = script.data().replaceAll("window._sharedData = ", "");
jsonText = jsonText.replaceAll("};", "}");
}
}
return new JSONObject(jsonText);
} catch (JSONException e) {
throw new IOException("Could not get JSON from page");
}
}
代码示例来源:origin: apache/nifi
/**
* Extracts the HTML value based on the configuration values.
*
* @return value from the parsed HTML element
*/
private String extractElementValue(String prependValue, final String outputType, String appendValue, final Element ele, final String attrKey) {
if (StringUtils.isEmpty(prependValue)) {
prependValue = "";
}
if (StringUtils.isEmpty(appendValue)) {
appendValue = "";
}
switch (outputType) {
case ELEMENT_HTML:
return prependValue + ele.html() + appendValue;
case ELEMENT_TEXT:
return prependValue + ele.text() + appendValue;
case ELEMENT_DATA:
return prependValue + ele.data() + appendValue;
case ELEMENT_ATTRIBUTE:
return prependValue + ele.attr(attrKey) + appendValue;
default:
return prependValue + ele.html() + appendValue;
}
}
代码示例来源:origin: org.jsoup/jsoup
} else if (childNode instanceof Element) {
Element element = (Element) childNode;
String elementData = element.data();
sb.append(elementData);
} else if (childNode instanceof CDataNode) {
代码示例来源:origin: RipMeApp/ripme
@Override
public void rip() throws IOException {
LOGGER.info("Retrieving " + this.url);
Document doc = Http.url(url).get();
//Get user friendly filename from page title
String title = doc.title();
Elements script = doc.select("script");
if (script.isEmpty()) {
throw new IOException("Could not find script code at " + url);
}
//Regex assumes highest quality source is listed first
Pattern p = Pattern.compile("\"source\":\"(.*?)\"");
for (Element element : script) {
Matcher m = p.matcher(element.data());
if (m.find()){
String vidUrl = m.group(1);
addURLToDownload(new URL(vidUrl), HOST + "_" + title);
}
}
waitForThreads();
}
}
代码示例来源:origin: RipMeApp/ripme
@Override
public void rip() throws IOException {
LOGGER.info("Retrieving " + this.url);
Document doc = Http.url(url).get();
Element iframe = doc.select("iframe").first();
String iframeSrc = iframe.attr("src");
if (iframeSrc != null) {
doc = Http.url("http://www.yuvutu.com" + iframeSrc).get();
} else {
throw new IOException("Could not find iframe code at " + url);
}
Elements script = doc.select("script");
if (script.isEmpty()) {
throw new IOException("Could not find script code at " + url);
}
Pattern p = Pattern.compile("file: \"(.*?)\"");
for (Element element : script) {
Matcher m = p.matcher(element.data());
if (m.find()){
String vidUrl = m.group(1);
addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url));
}
}
waitForThreads();
}
}
代码示例来源:origin: neo4j-contrib/neo4j-apoc-procedures
private void getElements(Elements elements, List<Map<String, Object>> resultList) {
for (Element element : elements) {
Map<String, Object> result = new HashMap<>();
if(element.attributes().size() > 0) result.put("attributes", getAttributes(element));
if(!element.data().isEmpty())result.put("data", element.data());
if(element.hasText()) result.put("text", element.text());
if(!element.val().isEmpty()) result.put("value", element.val());
if(!element.tagName().isEmpty()) result.put("tagName", element.tagName());
resultList.add(result);
}
}
代码示例来源:origin: astamuse/asta4d
public String data() {
return originElement.data();
}
代码示例来源:origin: stackoverflow.com
Document doc = Jsoup.parse(line);
Elements pElems = doc.select("div.entry > p");
for (Element pElem : pElems) {
myArrayList.add(pElem.data());
}
代码示例来源:origin: stackoverflow.com
Document doc = Jsoup.parse(html);
Elements scripts = doc.getElementsByTag("script");
for (Element script : scripts) {
System.out.println(script.data());
}
代码示例来源:origin: stackoverflow.com
URL url = new URL("http://stackoverflow.com/questions/2993515");
Document document = Jsoup.parse(url, 3000);
Elements scripts = document.select("script");
for (Element script : scripts) {
String data = script.data();
if (!data.isEmpty()) {
System.out.println(data);
}
}
代码示例来源:origin: stackoverflow.com
String url = "https://www.wunderground.com/webcams/cadot1/902/video.html";
int timeout = 100 * 1000;
// Extract video URL
Document doc = Jsoup.connect(url).timeout(timeout).get();
Element script = doc.getElementById("inner-content")
.getElementsByTag("script").last();
String content = script.data();
int indexOfUrl = content.indexOf("url");
int indexOfComma = content.indexOf(',', indexOfUrl);
String videoUrl = "https:" + content.substring(indexOfUrl + 6, indexOfComma - 1);
System.out.println(videoUrl);
代码示例来源:origin: stackoverflow.com
File input = new File(filePath);
PrintWriter writer = new PrintWriter(input, "UTF-8");
writer.write(document.getElementById("1").outerHtml() + "\n");
Elements scripts = document.getElementsByTag("script");
for (Element script : scripts) {
if (script.data().startsWith("(function(a, b)")) {
writer.write(script.outerHtml() + "\n");
}
}
writer.write(document.getElementsByClass("class1").outerHtml() + "\n");
writer.write(document.getElementsByClass("class2").outerHtml() + "\n");
writer.flush();
writer.close();
代码示例来源:origin: jenkinsci/email-ext-plugin
/**
* Generates a stylesheet from an html document
*
* @param doc the html document
* @return a string representing the stylesheet.
*/
private String fetchStyles(Document doc) {
Elements els = doc.select(STYLE_TAG);
StringBuilder styles = new StringBuilder();
for (Element e : els) {
if (e.attr("data-inline").equals("true")) {
styles.append(e.data());
e.remove();
}
}
return styles.toString();
}
代码示例来源:origin: fr.sii.ogham/ogham-core
/**
* Generates a stylesheet from an html document
*
* @param doc
* the html document
* @return a string representing the stylesheet.
*/
private String fetchStyles(Document doc) {
Elements els = doc.select(STYLE_TAG);
StringBuilder styles = new StringBuilder();
for (Element e : els) {
if (!TRUE_VALUE.equals(e.attr(SKIP_INLINE))) {
styles.append(e.data());
e.remove();
}
}
return styles.toString();
}
代码示例来源:origin: chenerzhu/proxy-pool
private int getPort(Element element) throws ScriptException {
int port = -1;
ScriptEngineManager manager = new ScriptEngineManager();
ScriptEngine engine = manager.getEngineByName("js");
Pattern pattern = Pattern.compile("\\+.*?<");
Matcher matcher = null;
Document document = webPage.getDocument();
String scrpit = document.getElementsByTag("script").get(2).data();
engine.eval(scrpit);
matcher = pattern.matcher(element.child(0).html());
if (matcher.find()) {
String portScript = matcher.group(0).substring(1, matcher.group(0).length() - 2);
Object obj=engine.eval(portScript.replaceAll("\\+", "+''+"));
port = Integer.parseInt((String)obj);
}
return port;
}
代码示例来源:origin: cn.wanghaomiao/JsoupXpath
/**
* 函数具体逻辑
*
* @param scope 上下文
* @return 计算好的节点
*/
@Override
public XValue call(Scope scope) {
List<String> res = new LinkedList<>();
for (Element e:scope.context()){
if ("script".equals(e.nodeName())){
res.add(e.data());
}else {
res.add(e.text());
}
}
return XValue.create(res);
}
}
内容来源于网络,如有侵权,请联系作者删除!