org.htmlparser.Parser.<init>()方法的使用及代码示例

x33g5p2x  于2022-01-26 转载在 其他  
字(7.0k)|赞(0)|评价(0)|浏览(112)

本文整理了Java中org.htmlparser.Parser.<init>()方法的一些代码示例,展示了Parser.<init>()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Parser.<init>()方法的具体详情如下:
包路径:org.htmlparser.Parser
类名称:Parser
方法名:<init>

Parser.<init>介绍

[英]Zero argument constructor. The parser is in a safe but useless state parsing an empty string. Set the lexer or connection using #setLexeror #setConnection.
[中]零参数构造函数。解析器在解析空字符串时处于安全但无用的状态。使用#setLexeror#setConnection设置lexer或连接。

代码示例

代码示例来源:origin: org.htmlparser/htmlparser

/**
 * Create a FilterBean object.
 */
public FilterBean ()
{
  mPropertySupport = new PropertyChangeSupport (this);
  mParser = new Parser ();
  mFilters = null;
  mNodes = null;
  mRecursive = true;
}

代码示例来源:origin: com.bbossgroups/bboss-htmlparser

/** Creates new LinkBean */
public LinkBean ()
{
  mPropertySupport = new PropertyChangeSupport (this);
  mLinks = null;
  mParser = new Parser ();
}

代码示例来源:origin: org.htmlparser/htmlparser

/** Creates new LinkBean */
public LinkBean ()
{
  mPropertySupport = new PropertyChangeSupport (this);
  mLinks = null;
  mParser = new Parser ();
}

代码示例来源:origin: com.bbossgroups/bboss-htmlparser

/**
 * Create a FilterBean object.
 */
public FilterBean ()
{
  mPropertySupport = new PropertyChangeSupport (this);
  mParser = new Parser ();
  mFilters = null;
  mNodes = null;
}

代码示例来源:origin: org.fitnesse/fitnesse

private NodeList parseHtml(String possibleTable) {
 try {
  Parser parser = new Parser(possibleTable);
  return parser.parse(null);
 } catch (ParserException | StringIndexOutOfBoundsException e) {
  return null;
 }
}

代码示例来源:origin: com.github.tcnh/fitnesse

private NodeList parseHtml(String possibleTable) {
 try {
  Parser parser = new Parser(possibleTable);
  return parser.parse(null);
 } catch (ParserException e) {
  return null;
 }
}

代码示例来源:origin: fhopf/akka-crawler-example

@Override
public PageContent fetchPageContent(String url) {
  logger.debug("Fetching {}", url);
  try {
    Parser parser = new Parser(url);
    PageContentVisitor visitor = new PageContentVisitor(baseUrl, url);
    parser.visitAllNodesWith(visitor);
    
    return visitor.getContent();
  } catch (ParserException ex) {
    throw new IllegalStateException(ex);
  }
}

代码示例来源:origin: riotfamily/riot

public void parse() throws ParserException {
  Parser parser = new Parser();
  parser.setInputHTML(html);
  nodes = parser.parse(null);
}

代码示例来源:origin: org.wso2.carbon.automationutils/org.wso2.carbon.integration.common.tests

public static List<String> getLinks(String url) throws ParserException {
  Parser htmlParser = new Parser(url);
  List<String> links = new LinkedList<String>();
  NodeList tagNodeList = htmlParser.extractAllNodesThatMatch(new NodeClassFilter(LinkTag.class));
  for (int m = 0; m < tagNodeList.size(); m++) {
    LinkTag loopLinks = (LinkTag) tagNodeList.elementAt(m);
    String linkName = loopLinks.getLink();
    links.add(linkName);
  }
  return links;
}

代码示例来源:origin: com.bbossgroups/bboss-htmlparser

/**
 * Creates the parser on an input string.
 * @param html The string containing HTML.
 * @param charset <em>Optional</em>. The character set encoding that will
 * be reported by {@link #getEncoding}. If charset is <code>null</code>
 * the default character set is used.
 * @return A parser with the <code>html</code> string as input.
 */
public static Parser createParser (String html, String charset)
{
  Parser ret;
  if (null == html)
    throw new IllegalArgumentException ("html cannot be null");
  ret = new Parser (new Lexer (new Page (html, charset)));
  return (ret);
}

代码示例来源:origin: de.jungblut.common/thomasjungblut-common

/**
 * Extracts the title from the given HTML.
 *
 * @return never null, just an empty string if not parsable.
 */
public static String extractTitle(String html) throws ParserException {
  String title = "";
  Parser parser = new Parser(html);
  NodeList matches = parser.extractAllNodesThatMatch(TITLE_FILTER);
  SimpleNodeIterator it = matches.elements();
  while (it.hasMoreNodes()) {
    TitleTag node = (TitleTag) it.nextNode();
    title = node.getTitle().trim();
  }
  return title;
}

代码示例来源:origin: CloudSlang/cs-actions

private void processHTMLBodyWithBASE64Images(MimeMultipart multipart) throws ParserException,
    MessagingException, NoSuchAlgorithmException, SMIMEException, java.security.NoSuchProviderException {
  if (null != body && body.contains("base64")) {
    Parser parser = new Parser(body);
    NodeList nodeList = parser.parse(null);
    HtmlImageNodeVisitor htmlImageNodeVisitor = new HtmlImageNodeVisitor();
    nodeList.visitAllNodesWith(htmlImageNodeVisitor);
    body = nodeList.toHtml();
    addAllBase64ImagesToMimeMultipart(multipart, htmlImageNodeVisitor.getBase64Images());
  }
}

代码示例来源:origin: org.htmlparser/htmlparser

/**
 * Create a Parser Object having a String Object as input (instead of a url or a string representing the url location).
 * <BR>The string will be parsed as it would be a file.
 * @param input The string in input.
 * @return The Parser Object with the string as input stream.
 */
public static Parser createParserParsingAnInputString (String input)
  throws ParserException, UnsupportedEncodingException
{
   Parser parser = new Parser();
  Lexer lexer = new Lexer();
  Page page = new Page(input);
  lexer.setPage(page);
  parser.setLexer(lexer);
  
  return parser;
  
}

代码示例来源:origin: org.fitnesse/fitnesse

public HtmlTableScanner(String page) {
 if (page == null || page.equals(""))
  page = "<i>This page intentionally left blank.</i>";
 NodeList htmlTree;
 try {
  Parser parser = new Parser(new Lexer(new Page(page)));
  htmlTree = parser.parse(null);
 } catch (ParserException e) {
  throw new SlimError(e);
 }
 scanForTables(htmlTree);
}

代码示例来源:origin: com.github.tcnh/fitnesse

public HtmlTableScanner(String page) {
 if (page == null || page.equals(""))
  page = "<i>This page intentionally left blank.</i>";
 NodeList htmlTree;
 try {
  Parser parser = new Parser(new Lexer(new Page(page)));
  htmlTree = parser.parse(null);
 } catch (ParserException e) {
  throw new SlimError(e);
 }
 scanForTables(htmlTree);
}

代码示例来源:origin: org.fitnesse/fitnesse

private NodeList getMatchingTags(NodeFilter filter) throws Exception {
 String html = examiner.html();
 Parser parser = new Parser(new Lexer(new Page(html)));
 NodeList list = parser.parse(null);
 NodeList matches = list.extractAllNodesThatMatch(filter, true);
 return matches;
}

代码示例来源:origin: org.fitnesse/fitnesse

private NodeList makeNodeList(TestPage pageToTest) {
 String html = pageToTest.getHtml();
 Parser parser = new Parser(new Lexer(new Page(html)));
 try {
  return parser.parse(null);
 } catch (ParserException e) {
  throw new SlimError(e);
 }
}

代码示例来源:origin: com.github.tcnh/fitnesse

private NodeList makeNodeList(TestPage pageToTest) {
 String html = pageToTest.getHtml();
 Parser parser = new Parser(new Lexer(new Page(html)));
 try {
  return parser.parse(null);
 } catch (ParserException e) {
  throw new SlimError(e);
 }
}

代码示例来源:origin: com.github.tcnh/fitnesse

private NodeList getMatchingTags(NodeFilter filter) throws Exception {
 String html = examiner.html();
 Parser parser = new Parser(new Lexer(new Page(html)));
 NodeList list = parser.parse(null);
 NodeList matches = list.extractAllNodesThatMatch(filter, true);
 return matches;
}

代码示例来源:origin: fhopf/akka-crawler-example

@Test
  public void testLinkExtraction() throws ParserException {
    Parser parser = new Parser("http://synyx.de");
    ObjectFindingVisitor visitor = new ObjectFindingVisitor(LinkTag.class);
    parser.visitAllNodesWith(visitor);
    Node[] links = visitor.getTags();
    // TODO this could use some more meaningful assertions
    assertTrue(links.length > 0);
    for (int i = 0; i < links.length; i++) {
      LinkTag linkTag = (LinkTag) links[i];
      System.out.print("\"" + linkTag.getLinkText() + "\" => ");
      System.out.println(linkTag.getLink());
    }
  }
}

相关文章