Jsoup解析(京东搜索)

x33g5p2x  于2022-03-31 转载在 其他  
字(2.5k)|赞(0)|评价(0)|浏览(231)

Bean封装

package top.linruchang;

import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.Accessors;

import java.util.List;

/**
 * 作用:
 *
 * @author LinRuChang
 * @version 1.0
 * @date 2022/03/29
 * @since 1.8
 **/
@Data
@Builder
@Accessors(chain = true)
@NoArgsConstructor
@AllArgsConstructor
public class Product {

    /**
     * 产品名
     */
    String name;
    

    /**
     * 普通价格
     */
    String price;

    /**
     * 会员价
     */
    String VipPrice;

    /**
     * 产品图片
     */
    String imgUrl;

    /**
     * 店铺名
     */
    String shopName;

    /**
     * 产品标签
     */
    List<String> tags;
}

搜索工具类

package top.linruchang;

import cn.hutool.core.lang.Console;
import cn.hutool.core.util.StrUtil;
import lombok.SneakyThrows;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.stream.Stream;

/**
 * 作用:
 *
 * @author LinRuChang
 * @version 1.0
 * @date 2022/03/29
 * @since 1.8
 **/
public class JdUtil {

    /**
     * 京东搜索
     * @param productName 搜索条件
     * @return
     */
    @SneakyThrows
    public static List<Product> getProductInfo(String productName) {
        List<Product> products = Collections.emptyList();
        if(StrUtil.isNotBlank(productName)) {
            String keyword = productName;
            String url = StrUtil.format("https://search.jd.com/Search?keyword={}&enc=utf-8&wq=%E6%89%8B%E6%9C%BA&pvid=52652c1508ae479cb6765f1a871ed23c", keyword);

            Document doc = Jsoup.parse(new URL(url), 5000);

            //Console.log("标题:{}",doc.title());
            //Console.log("内容:{}",doc.body().text());
            Element j_goodsList = doc.getElementById("J_goodsList");
            //Console.log(j_goodsList.html());
            Elements liElements = j_goodsList.select(".gl-item");

            products = new ArrayList<>();
            for (Element liElement : liElements) {

                String imgUrl = liElement.getElementsByTag("img").eq(0).attr("data-lazy-img");
                imgUrl = StrUtil.addPrefixIfNot(imgUrl,"http:");

                Product product = Product.builder()
                        .name(liElement.getElementsByClass("p-name").eq(0).text())
                        .price(liElement.getElementsByClass("p-price").get(0).getElementsByTag("strong").text())
                        .VipPrice(liElement.getElementsByClass("p-price").get(0).select("span[title='PLUS会员专享价']").text())
                        .imgUrl(imgUrl)
                        .tags(liElement.select(".p-icons i").eachText())
                        .shopName(liElement.getElementsByClass("p-shop").text()).build();
                products.add(product);

                
            }
        }
        return products;
    }

    public static void main(String[] args) {
        List<Product> products = getProductInfo("书包");
        products.forEach(Console::log);
    }
    
}

相关文章

微信公众号

最新文章

更多