DROP TABLE IF EXISTS recommendations;
CREATE TABLE recommendations AS
SELECT
movie_id AS movie_id,
movie_id_2 AS movie_id_2,
(N * dotProductSum - ratingSum * rating2Sum) / (SQRT(N * ratingSqSum - ratingSum * ratingSum) * SQRT(N * rating2SqSum - rating2Sum * rating2Sum))
AS correlation
FROM
coratings
GROUP BY
coratings.movie_id, coratings.movie_id_2, coratings.dotProductSum
ORDER BY
movie_id;
我已经将dotproductsum添加到查询中,但仍然得到以下错误。
org.apache.hive.service.cli.hivesqlexception:编译语句时出错:失败:semanticexception[error 10025]:行5:3表达式不在group by键“dotproductsum”中
一张table下面是装饰用的table
CREATE TABLE coratings AS
SELECT
a.movie_id ,
b.movie_id AS movie_id_2 ,
COUNT(*) AS N ,
SUM(a.rating) AS ratingSum ,
SUM(b.rating) AS rating2Sum ,
SUM(a.rating * b.rating) AS dotProductSum ,
SUM(a.rating * a.rating) AS ratingSqSum ,
SUM(b.rating * b.rating) AS rating2SqSum
FROM
movies_ratings a
JOIN movies_ratings b ON a.user_id = b.user_id
WHERE
a.movie_id < b.movie_id
GROUP BY
a.movie_id, b.movie_id
HAVING
N >= 30;
电影分级表
USE rec_engine;
-- table definition will be created
DROP TABLE IF EXISTS movies_ratings ;
-- here
CREATE TABLE movies_ratings ( user_id INT, movie_id INT, rating INT, `time_stamp` INT )
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
"field.delim"=",",
"skip.header.line.count"="1"
);
-- data in local file system /home/hive folder. otherwise can't load.
LOAD DATA LOCAL INPATH 'ratings_small.csv'
OVERWRITE INTO TABLE movies_ratings;
暂无答案!
目前还没有任何答案,快来回答吧!