本文整理了Java中org.apache.spark.sql.DataFrame.registerTempTable()
方法的一些代码示例,展示了DataFrame.registerTempTable()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。DataFrame.registerTempTable()
方法的具体详情如下:
包路径:org.apache.spark.sql.DataFrame
类名称:DataFrame
方法名:registerTempTable
暂无
代码示例来源:origin: databricks/learning-spark
input.registerTempTable("tweets");
JavaRDD<HappyPerson> happyPeopleRDD = sc.parallelize(peopleList);
DataFrame happyPeopleSchemaRDD = sqlCtx.applySchema(happyPeopleRDD, HappyPerson.class);
happyPeopleSchemaRDD.registerTempTable("happy_people");
sqlCtx.udf().register("stringLengthJava", new UDF1<String, Integer>() {
@Override
代码示例来源:origin: Impetus/Kundera
/**
* Register table for json.
*
* @param tableName
* the table name
* @param dataSourcePath
* the data source path
* @param sqlContext
* the sql context
*/
private void registerTableForJson(String tableName, String dataSourcePath, HiveContext sqlContext)
{
sqlContext.jsonFile(dataSourcePath).registerTempTable(tableName);
}
代码示例来源:origin: Impetus/Kundera
/**
* Register table for csv.
*
* @param tableName
* the table name
* @param dataSourcePath
* the data source path
* @param sqlContext
* the sql context
*/
private void registerTableForCsv(String tableName, String dataSourcePath, HiveContext sqlContext)
{
HashMap<String, String> options = new HashMap<String, String>();
options.put("header", "true");
options.put("path", dataSourcePath);
sqlContext.load(SparkPropertiesConstants.SOURCE_CSV, options).registerTempTable(tableName);
}
代码示例来源:origin: Impetus/Kundera
@Override
public void registerTable(EntityMetadata m, SparkClient sparkClient)
{
String conn = getConnectionString(m);
Map<String, String> options = new HashMap<String, String>();
options.put("url", conn);
options.put("dbtable", m.getTableName());
sparkClient.sqlContext.load("jdbc", options).registerTempTable(m.getTableName());
}
代码示例来源:origin: Impetus/Kundera
@Override
public void registerTable(EntityMetadata m, SparkClient sparkClient)
{
SparkContextJavaFunctions functions = CassandraJavaUtil.javaFunctions(sparkClient.sparkContext);
Class clazz = m.getEntityClazz();
JavaRDD cassandraRowsRDD = functions.cassandraTable(m.getSchema(), m.getTableName(),
CassandraJavaUtil.mapRowTo(clazz));
sparkClient.sqlContext.createDataFrame(cassandraRowsRDD, clazz).registerTempTable(m.getTableName());
}
代码示例来源:origin: Impetus/Kundera
public void registerTable(EntityMetadata m, SparkClient sparkClient)
{
final Class clazz = m.getEntityClazz();
SparkContext sc = sparkClient.sparkContext;
Configuration config = new Configuration();
config.set(
"mongo.input.uri",
buildMongoURIPath(sc.getConf().get("hostname"), sc.getConf().get("portname"), m.getSchema(),
m.getTableName()));
JavaRDD<Tuple2<Object, BSONObject>> mongoJavaRDD = sc.newAPIHadoopRDD(config, MongoInputFormat.class,
Object.class, BSONObject.class).toJavaRDD();
JavaRDD<Object> mongoRDD = mongoJavaRDD.flatMap(new FlatMapFunction<Tuple2<Object, BSONObject>, Object>()
{
@Override
public Iterable<Object> call(Tuple2<Object, BSONObject> arg)
{
BSONObject obj = arg._2();
Object javaObject = generateJavaObjectFromBSON(obj, clazz);
return Arrays.asList(javaObject);
}
});
sparkClient.sqlContext.createDataFrame(mongoRDD, m.getEntityClazz()).registerTempTable(m.getTableName());
}
代码示例来源:origin: stackoverflow.com
peopleDataFrame.registerTempTable("people");
代码示例来源:origin: stackoverflow.com
DataFrame parquetData = sqlContext.read().parquet("/Users/leewallen/dev/spark_data/out/ParquetData");
parquetData.registerTempTable("pd");
DataFrame idGroupsDataFrame = sqlContext.sql("select id_groups.ids from pd");
List<String> idList = idGroupsDataFrame.javaRDD()
.map((Function<Row, String>) row -> {
List<String> ids = new ArrayList<>();
List<WrappedArray<String>> wrappedArrayList = row.getList(0);
java.util.Iterator<WrappedArray<String>> wrappedArrayIterator = wrappedArrayList.iterator();
while (wrappedArrayIterator.hasNext()) {
WrappedArray<String> idWrappedArray = wrappedArrayIterator.next();
Iterator<String> stringIter = idWrappedArray.iterator();
List<String> tempIds = new ArrayList<>();
while (stringIter.hasNext()) {
tempIds.add(stringIter.next());
}
ids.add(tempIds.stream()
.reduce((s1, s2) -> String.format("%s,%s", s1, s2))
.get());
}
return ids.stream()
.reduce((s1, s2) -> String.format("%s|%s", s1, s2))
.get();
}).collect();
idList.forEach(id -> System.out.println(id));
代码示例来源:origin: ddf-project/DDF
public void saveAsTable() throws DDFException {
if (!this.isTable()) {
DataFrame rdd = (DataFrame) this.getRepresentationHandler().get(DataFrame.class);
if (rdd == null) {
mLog.info("Could not create SchemaRDD for ddf");
mLog.info(String.format("Could not save ddf %s as table", this.getUUID().toString()));
} else {
mLog.info(String.format(">>>> register %s as table", this.getTableName()));
rdd.registerTempTable(this.getTableName());
}
}
}
代码示例来源:origin: stackoverflow.com
JavaRDD<TestClass> dates = sc.textFile("hdfs://0.0.0.0:19000/Dates.csv").map(
new Function<String, TestClass>(){
@Override
public TestClass call(String line){
String[] fields = line.split(",");
TestClass tc = new TestClass();
tc.setDate(Date.parse(fields[2]));
return tc;
}
});
DataFrame schemaTransactions = sqlContext.createDataFrame(dates, TestClass.class);
schemaTransactions.registerTempTable("dates");
DataFrame dAs = sqlContext.sql("SELECT * FROM dates");
dAs.count();
代码示例来源:origin: com.cloudera.livy/livy-test-lib
@Override
public List<String> call(JobContext jc) throws Exception {
InputStream source = getClass().getResourceAsStream("/testweet.json");
// Save the resource as a file in HDFS (or the local tmp dir when using a local filesystem).
URI input;
File local = File.createTempFile("tweets", ".json", jc.getLocalTmpDir());
Files.copy(source, local.toPath(), StandardCopyOption.REPLACE_EXISTING);
FileSystem fs = FileSystem.get(jc.sc().sc().hadoopConfiguration());
if ("file".equals(fs.getUri().getScheme())) {
input = local.toURI();
} else {
String uuid = UUID.randomUUID().toString();
Path target = new Path("/tmp/" + uuid + "-tweets.json");
fs.copyFromLocalFile(new Path(local.toURI()), target);
input = target.toUri();
}
SQLContext sqlctx = useHiveContext ? jc.hivectx() : jc.sqlctx();
sqlctx.jsonFile(input.toString()).registerTempTable("tweets");
List<String> tweetList = new ArrayList<>();
Row[] result =
(Row[])(sqlctx.sql("SELECT text, retweetCount FROM tweets ORDER BY retweetCount LIMIT 10")
.collect());
for (Row r : result) {
tweetList.add(r.toString());
}
return tweetList;
}
代码示例来源:origin: oeljeklaus-you/UserActionAnalyzePlatform
df.registerTempTable("user_visit_action");
for(Row _row : df.take(1)) {
System.out.println(_row);
df2.registerTempTable("user_info");
代码示例来源:origin: Erik-ly/SprakProject
df.registerTempTable("user_visit_action");
for(Row _row : df.take(1)) {
System.out.println(_row);
df2.registerTempTable("user_info");
代码示例来源:origin: Quetzal-RDF/quetzal
public static void main( String[] args )
{
// SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("local[2]");
// SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("spark://Kavithas-MBP.home:7077");
SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("spark://kavithas-mbp.watson.ibm.com:7077");
JavaSparkContext sc = new JavaSparkContext(conf);
HiveContext sqlContext = new HiveContext(sc.sc());
DataFrame urls = sqlContext.read().json("/tmp/urls.json");
urls.registerTempTable("urls");
DataFrame temp = sqlContext.sql("select * from urls");
temp.show();
sqlContext.sql("add jar /tmp/quetzal.jar");
sqlContext.sql("create temporary function webservice as 'com.ibm.research.rdf.store.utilities.WebServiceGetUDTF'");
DataFrame drugs = sqlContext.sql("select webservice(\"drug,id,action\", \"url\", \"\", \"GET\", \"xs=http://www.w3.org/2001/XMLSchema\", \"//row\",\"drug\",\"./drug\","
+ " \"<string>\", \"id\", \"./id\",\"<string>\", \"action\", \"./action\", \"<string>\", url) as (drug, drug_typ, id, id_typ, action, action_typ) from urls");
drugs.show();
System.out.println("Num rows:" + drugs.count());
}
代码示例来源:origin: phuonglh/vn.vitk
trainingData.registerTempTable("dfTable");
Row row = sqlContext.sql("SELECT MAX(label) as maxValue from dfTable").first();
int numLabels = (int)row.getDouble(0);
代码示例来源:origin: phuonglh/vn.vitk
df.registerTempTable("dft");
Row row = df.sqlContext().sql("SELECT MAX(label) as maxValue FROM dft").first();
this.numLabels = (int)row.getDouble(0) + 1;
内容来源于网络,如有侵权,请联系作者删除!