org.apache.spark.sql.DataFrame.registerTempTable()方法的使用及代码示例

x33g5p2x  于2022-01-18 转载在 其他  
字(8.7k)|赞(0)|评价(0)|浏览(237)

本文整理了Java中org.apache.spark.sql.DataFrame.registerTempTable()方法的一些代码示例,展示了DataFrame.registerTempTable()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。DataFrame.registerTempTable()方法的具体详情如下:
包路径:org.apache.spark.sql.DataFrame
类名称:DataFrame
方法名:registerTempTable

DataFrame.registerTempTable介绍

暂无

代码示例

代码示例来源:origin: databricks/learning-spark

input.registerTempTable("tweets");
JavaRDD<HappyPerson> happyPeopleRDD = sc.parallelize(peopleList);
DataFrame happyPeopleSchemaRDD = sqlCtx.applySchema(happyPeopleRDD, HappyPerson.class);
happyPeopleSchemaRDD.registerTempTable("happy_people");
sqlCtx.udf().register("stringLengthJava", new UDF1<String, Integer>() {
  @Override

代码示例来源:origin: Impetus/Kundera

/**
 * Register table for json.
 * 
 * @param tableName
 *            the table name
 * @param dataSourcePath
 *            the data source path
 * @param sqlContext
 *            the sql context
 */
private void registerTableForJson(String tableName, String dataSourcePath, HiveContext sqlContext)
{
  sqlContext.jsonFile(dataSourcePath).registerTempTable(tableName);
}

代码示例来源:origin: Impetus/Kundera

/**
 * Register table for csv.
 * 
 * @param tableName
 *            the table name
 * @param dataSourcePath
 *            the data source path
 * @param sqlContext
 *            the sql context
 */
private void registerTableForCsv(String tableName, String dataSourcePath, HiveContext sqlContext)
{
  HashMap<String, String> options = new HashMap<String, String>();
  options.put("header", "true");
  options.put("path", dataSourcePath);
  sqlContext.load(SparkPropertiesConstants.SOURCE_CSV, options).registerTempTable(tableName);
}

代码示例来源:origin: Impetus/Kundera

@Override
public void registerTable(EntityMetadata m, SparkClient sparkClient)
{
  String conn = getConnectionString(m);
  Map<String, String> options = new HashMap<String, String>();
  options.put("url", conn);
  options.put("dbtable", m.getTableName());
  sparkClient.sqlContext.load("jdbc", options).registerTempTable(m.getTableName());
}

代码示例来源:origin: Impetus/Kundera

@Override
public void registerTable(EntityMetadata m, SparkClient sparkClient)
{
  SparkContextJavaFunctions functions = CassandraJavaUtil.javaFunctions(sparkClient.sparkContext);
  Class clazz = m.getEntityClazz();
  JavaRDD cassandraRowsRDD = functions.cassandraTable(m.getSchema(), m.getTableName(),
      CassandraJavaUtil.mapRowTo(clazz));
  sparkClient.sqlContext.createDataFrame(cassandraRowsRDD, clazz).registerTempTable(m.getTableName());
}

代码示例来源:origin: Impetus/Kundera

public void registerTable(EntityMetadata m, SparkClient sparkClient)
{
  final Class clazz = m.getEntityClazz();
  SparkContext sc = sparkClient.sparkContext;
  Configuration config = new Configuration();
  config.set(
      "mongo.input.uri",
      buildMongoURIPath(sc.getConf().get("hostname"), sc.getConf().get("portname"), m.getSchema(),
          m.getTableName()));
  JavaRDD<Tuple2<Object, BSONObject>> mongoJavaRDD = sc.newAPIHadoopRDD(config, MongoInputFormat.class,
      Object.class, BSONObject.class).toJavaRDD();
  JavaRDD<Object> mongoRDD = mongoJavaRDD.flatMap(new FlatMapFunction<Tuple2<Object, BSONObject>, Object>()
  {
    @Override
    public Iterable<Object> call(Tuple2<Object, BSONObject> arg)
    {
      BSONObject obj = arg._2();
      Object javaObject = generateJavaObjectFromBSON(obj, clazz);
      return Arrays.asList(javaObject);
    }
  });
  sparkClient.sqlContext.createDataFrame(mongoRDD, m.getEntityClazz()).registerTempTable(m.getTableName());
}

代码示例来源:origin: stackoverflow.com

peopleDataFrame.registerTempTable("people");

代码示例来源:origin: stackoverflow.com

DataFrame parquetData = sqlContext.read().parquet("/Users/leewallen/dev/spark_data/out/ParquetData");
parquetData.registerTempTable("pd");
DataFrame idGroupsDataFrame = sqlContext.sql("select id_groups.ids from pd");

List<String> idList = idGroupsDataFrame.javaRDD()
                    .map((Function<Row, String>) row -> {
  List<String> ids = new ArrayList<>();
  List<WrappedArray<String>> wrappedArrayList = row.getList(0);
  java.util.Iterator<WrappedArray<String>> wrappedArrayIterator = wrappedArrayList.iterator();
  while (wrappedArrayIterator.hasNext()) {
    WrappedArray<String> idWrappedArray = wrappedArrayIterator.next();
    Iterator<String> stringIter = idWrappedArray.iterator();
    List<String> tempIds = new ArrayList<>();
    while (stringIter.hasNext()) {
      tempIds.add(stringIter.next());
    }

    ids.add(tempIds.stream()
            .reduce((s1, s2) -> String.format("%s,%s", s1, s2))
            .get());
  }

  return ids.stream()
       .reduce((s1, s2) -> String.format("%s|%s", s1, s2))
       .get();
}).collect();

idList.forEach(id -> System.out.println(id));

代码示例来源:origin: ddf-project/DDF

public void saveAsTable() throws DDFException {
 if (!this.isTable()) {
  DataFrame rdd = (DataFrame) this.getRepresentationHandler().get(DataFrame.class);
  if (rdd == null) {
   mLog.info("Could not create SchemaRDD for ddf");
   mLog.info(String.format("Could not save ddf %s as table", this.getUUID().toString()));
  } else {
   mLog.info(String.format(">>>> register %s as table", this.getTableName()));
   rdd.registerTempTable(this.getTableName());
  }
 }
}

代码示例来源:origin: stackoverflow.com

JavaRDD<TestClass> dates = sc.textFile("hdfs://0.0.0.0:19000/Dates.csv").map(
new Function<String, TestClass>(){
  @Override
  public TestClass call(String line){
    String[] fields = line.split(",");
    TestClass tc = new TestClass();
    tc.setDate(Date.parse(fields[2]));
    return tc;
  }
});

DataFrame  schemaTransactions = sqlContext.createDataFrame(dates, TestClass.class);
schemaTransactions.registerTempTable("dates");
DataFrame dAs = sqlContext.sql("SELECT * FROM dates");
dAs.count();

代码示例来源:origin: com.cloudera.livy/livy-test-lib

@Override
public List<String> call(JobContext jc) throws Exception {
 InputStream source = getClass().getResourceAsStream("/testweet.json");
 // Save the resource as a file in HDFS (or the local tmp dir when using a local filesystem).
 URI input;
 File local = File.createTempFile("tweets", ".json", jc.getLocalTmpDir());
 Files.copy(source, local.toPath(), StandardCopyOption.REPLACE_EXISTING);
 FileSystem fs = FileSystem.get(jc.sc().sc().hadoopConfiguration());
 if ("file".equals(fs.getUri().getScheme())) {
  input = local.toURI();
 } else {
  String uuid = UUID.randomUUID().toString();
  Path target = new Path("/tmp/" + uuid + "-tweets.json");
  fs.copyFromLocalFile(new Path(local.toURI()), target);
  input = target.toUri();
 }
 SQLContext sqlctx = useHiveContext ? jc.hivectx() : jc.sqlctx();
 sqlctx.jsonFile(input.toString()).registerTempTable("tweets");
 List<String> tweetList = new ArrayList<>();
 Row[] result =
  (Row[])(sqlctx.sql("SELECT text, retweetCount FROM tweets ORDER BY retweetCount LIMIT 10")
   .collect());
 for (Row r : result) {
   tweetList.add(r.toString());
 }
 return tweetList;
}

代码示例来源:origin: oeljeklaus-you/UserActionAnalyzePlatform

df.registerTempTable("user_visit_action");  
for(Row _row : df.take(1)) {
  System.out.println(_row);  
df2.registerTempTable("user_info");

代码示例来源:origin: Erik-ly/SprakProject

df.registerTempTable("user_visit_action");  
for(Row _row : df.take(1)) {
  System.out.println(_row);  
df2.registerTempTable("user_info");

代码示例来源:origin: Quetzal-RDF/quetzal

public static void main( String[] args )
 {       
//   	SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("local[2]");
//      	SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("spark://Kavithas-MBP.home:7077");
   SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("spark://kavithas-mbp.watson.ibm.com:7077");
 
   JavaSparkContext sc = new JavaSparkContext(conf);
   
   HiveContext sqlContext = new HiveContext(sc.sc());
   DataFrame urls = sqlContext.read().json("/tmp/urls.json");
   urls.registerTempTable("urls");
   DataFrame temp = sqlContext.sql("select * from urls");
   temp.show();
   
     sqlContext.sql("add jar /tmp/quetzal.jar");
   sqlContext.sql("create temporary function webservice as 'com.ibm.research.rdf.store.utilities.WebServiceGetUDTF'");
   DataFrame drugs = sqlContext.sql("select webservice(\"drug,id,action\", \"url\", \"\", \"GET\", \"xs=http://www.w3.org/2001/XMLSchema\", \"//row\",\"drug\",\"./drug\","
       + " \"<string>\", \"id\", \"./id\",\"<string>\", \"action\", \"./action\", \"<string>\", url) as (drug, drug_typ, id, id_typ, action, action_typ) from urls");
   drugs.show();
   System.out.println("Num rows:" + drugs.count());
 }

代码示例来源:origin: phuonglh/vn.vitk

trainingData.registerTempTable("dfTable");
Row row = sqlContext.sql("SELECT MAX(label) as maxValue from dfTable").first();
int numLabels = (int)row.getDouble(0);

代码示例来源:origin: phuonglh/vn.vitk

df.registerTempTable("dft");
Row row = df.sqlContext().sql("SELECT MAX(label) as maxValue FROM dft").first();
this.numLabels = (int)row.getDouble(0) + 1;

相关文章