下面给出了一个json文件,我想通过使用jsonserde创建一个外部表来使用hive访问它。
{“responsecode”:“1”,“response”:“data found”,“data”:[{“season”:“rabi”,“sector”:“园艺”,“category”:“水果”,“crop”:“mango”,“querytype”:“cultural practices”,“querytext”:“如何控制芒果落花?”,“kcans”:“建议喷洒到planofix 5-7 mili/pump”,“statename”:“andhra pradesh”,“districtname”:“nellore”,“blockname”:“balayapalle”,“submitdate”:“11/1/2016 9:05:27 am”},{“season”:“rabi”,“sector”:“agriculture”,“category”:“others”,“crop”:“others”,“querytype”:“杂草管理”,“querytext”:“eucalyptus杂草管理”,“kccans”:“建议每英亩喷洒2升磷酸乙二醇酯”,“statename”:“andhra pradesh”,“districtname”:“nellore”,“blockname”:“marripadu”,“submitdate”:“11/1/2016 9:07:04 am”},{“season”:“rabi”,“sector”:“园艺”,“category”:“蔬菜”,“crop”:“bhindi(okra/ladysfinger)”,“querytype”:“\t植物保护\t”,“querytext”:“okra螨害”,“kcans”:“建议喷洒三氯杀螨醇1升/200升水/英亩”,“statename”:“安得拉邦”,“districtname”:“nellore”,“blockname”:“kovur”,“submitdate”:“11/1/2016 3:11:59 pm”},{“season”:“rabi”,“sector”:“agriculture”,“category”:“谷类”,“crop”:“paddy(dhan)”,“querytype”:“杂草管理”,“querytext”:“15-20天作物的稻田杂草管理”,“kccans”:“建议每亩喷洒cyhalopop-p-丁基250毫升”,“statename”:“andhra pradesh”,“districtname”:“nellore”,“blockname”:“balayapalle”,“submitdate”:“11/3/2016 2:11:17 pm”},{“season”:“kharif”,“sector”:“agriculture”,“category”:“others”,“crop”:“others”,“querytype”:“weather”,“querytext”:“weather report”,“kccans”:“weather report contact 1800425314108912543031”,“statename”:“andhra pradesh”,“districtname”:“nellore”,“blockname”:“kaligiri”,“submitdate”:“11/3/2016 5:22:22 pm”},{“season”:“rabi”,“sector”:“agriculture”,“category”:“pulses”,“crop”:“木豆(red gram/arhar/tur)”,“querytype”:“养分管理”,“querytext”:“red gram养分管理”,“kccans”:“建议每英亩喷洒13-045公斤”,“statename”:“andhra pradesh”,“districtname”:“nellore”,“blockname”:“anumasamudrampeta”,“submitdate”:“11/3/2016 7:25:10 pm”},{“season”:“rabi”,“sector”:“agriculture”,“category”:“pulses”,“crop”:“PigePea(red gram/arhar/tur)”,“querytype”:“nutrient management”,“querytext”:“red gram nutrient management”,“kccans”:“建议喷洒13-0-45 kg/acre”,“statename”:“andhra pradesh”,“districtname”:“nellore”,“blockname”:“anumasamudrampeta”,“submitdate”:“11/3/2016 7:30:02 pm”},{“season”:“rabi”,“sector”:“园艺”,“category”:“水果”,“作物”:“柑橘”,“querytype”:“\t植物保护\t”,“querytext”:“柑橘吮吸害虫”,“kccans”:“推荐用于乐果400 ml/英亩200升水”,“statename”:“安得拉邦”,“districtname”:“nellore”,“blockname”:“kodavalur”,“submitdate”:“11/4/2016 8:48:03 am”},{“season”:“rabi”,“sector”:“园艺”,“category”:“fruits”,“crop”:“柑橘”,“querytype”:“\t植物保护\t”,“querytext”:“柑橘吮吸害虫”,“kccans”:“推荐用于乐果400毫升/英亩200升水”,“statename”:“andhra pradesh”,“districtname”:“nellore”,“blockname”:“kodavalur”,“submitdate”:“11/4/2016 8:48:04 am”},{“season”:“kharif”,“sector”:“agriculture”,“category”:“others”,“crop”:“others”,“querytype”:“weather”,“querytext”:“weather report”,“kccans”:“今天下午左右有阵雨或雷雨,云量较大”,“statename”:“andhra pradesh”,“districtname”:“nellore”,“blockname”:“vinjamur”,“submitdate”:“11/4/2016 3:14:03 pm”},{“season”:“kharif”,“sector”:“agriculture”,“category”:“others”,“crop”:“others”,“querytype”:“weather”,“querytext”:“weather rteport”,“kccans”:“今天下午左右有阵雨或雷雨,云量很大”,“statename”:“andhra pradesh”,“districtname”:“nellore”,“blockname”:“marripadu”,“submitdate”:“11/4/2016 5:18:50 pm”},{“season”:“kharif”,“sector”:“agriculture”,“category”:“others”,“crop”:“others”,“querytype”:“\tplant protection\t”,“querytext”:“bengal gram best Variation”,“kccans”:“bengal gram best Variation:j.g-11,lam senaga(lbeg-7),iccv-10”,“statename”:“andhra pradesh”,“districtname”:“nellore”,“blockname”:“pellakur”,“submitdate”:“11/4/2016 7:57:44 pm”}]}
我已经创建了下表来访问上面的json数据。
create EXTERNAL table kccdata1 (
Response string,
data array<struct<
Season:STRING,
Sector:STRING,
Category:STRING,
Crop:STRING,
QueryType:STRING,
QueryText:string,
KCCAns:string,
StateName:string,
DistrictName:string,
BlockName:string,
SubmitDate:string>>)
row format serde "org.apache.hive.hcatalog.data.JsonSerDe"
location "/user/hduser/jsondata";
此表已将每个结构作为每条记录的值。我可以使用下面的查询从中访问元素。
select data[0].querytype from kccdata1;
select data.season[1] from kccdata1;
但是如果我想找到像crop=others的所有扇区,我不能使用这个表。关于这个问题的任何建议都会大有帮助。谢谢。
1条答案
按热度按时间9udxz4iz1#
外侧内联侧视图
内联(内置表生成函数(udtf))
侧视图