org.apache.spark.sql.DataFrame.count()方法的使用及代码示例

x33g5p2x  于2022-01-18 转载在 其他  
字(4.6k)|赞(0)|评价(0)|浏览(207)

本文整理了Java中org.apache.spark.sql.DataFrame.count()方法的一些代码示例,展示了DataFrame.count()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。DataFrame.count()方法的具体详情如下:
包路径:org.apache.spark.sql.DataFrame
类名称:DataFrame
方法名:count

DataFrame.count介绍

暂无

代码示例

代码示例来源:origin: Impetus/Kundera

/**
 * Gets the data frame size.
 * 
 * @param dataFrame
 *            the data frame
 * @return the data frame size
 */
public int getDataFrameSize(DataFrame dataFrame)
{
  long l = dataFrame != null ? dataFrame.count() : 0;
  if (l < Integer.MIN_VALUE || l > Integer.MAX_VALUE)
  {
    logger.error(l + " cannot be cast to int without changing its value.");
    return 0;
  }
  return (int) l;
}

代码示例来源:origin: stackoverflow.com

CassandraSQLContext sqlContext = new CassandraSQLContext(sc);

String query = "SELECT * FROM testing.cf_text where id='Open' and date IN ('2015-01-21','2015-01-22')";
DataFrame resultsFrame = sqlContext.sql(query);

System.out.println(resultsFrame.count());

代码示例来源:origin: stackoverflow.com

JavaRDD<TestClass> dates = sc.textFile("hdfs://0.0.0.0:19000/Dates.csv").map(
new Function<String, TestClass>(){
  @Override
  public TestClass call(String line){
    String[] fields = line.split(",");
    TestClass tc = new TestClass();
    tc.setDate(Date.parse(fields[2]));
    return tc;
  }
});

DataFrame  schemaTransactions = sqlContext.createDataFrame(dates, TestClass.class);
schemaTransactions.registerTempTable("dates");
DataFrame dAs = sqlContext.sql("SELECT * FROM dates");
dAs.count();

代码示例来源:origin: phuonglh/vn.vitk

long n = input.count();
System.out.println(" Number of sentences = " + n);
System.out.println("  Total tagging time = " + duration + " milliseconds.");

代码示例来源:origin: phuonglh/vn.vitk

void testRandomSplit(String inputFileName, int numFeatures, String modelFileName) {
  CMMParams params = new CMMParams()
    .setMaxIter(600)
    .setRegParam(1E-6)
    .setMarkovOrder(2)
    .setNumFeatures(numFeatures);
  
  JavaRDD<String> lines = jsc.textFile(inputFileName);
  DataFrame dataset = createDataFrame(lines.collect());
  DataFrame[] splits = dataset.randomSplit(new double[]{0.9, 0.1}); 
  DataFrame trainingData = splits[0];
  System.out.println("Number of training sequences = " + trainingData.count());
  DataFrame testData = splits[1];
  System.out.println("Number of test sequences = " + testData.count());
  // train and save a model on the training data
  cmmModel = train(trainingData, modelFileName, params);
  // test the model on the test data
  System.out.println("Test accuracy:");
  evaluate(testData); 
  // test the model on the training data
  System.out.println("Training accuracy:");
  evaluate(trainingData);
}

代码示例来源:origin: Quetzal-RDF/quetzal

public static void main( String[] args )
 {       
//   	SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("local[2]");
//      	SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("spark://Kavithas-MBP.home:7077");
   SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("spark://kavithas-mbp.watson.ibm.com:7077");
 
   JavaSparkContext sc = new JavaSparkContext(conf);
   
   HiveContext sqlContext = new HiveContext(sc.sc());
   DataFrame urls = sqlContext.read().json("/tmp/urls.json");
   urls.registerTempTable("urls");
   DataFrame temp = sqlContext.sql("select * from urls");
   temp.show();
   
     sqlContext.sql("add jar /tmp/quetzal.jar");
   sqlContext.sql("create temporary function webservice as 'com.ibm.research.rdf.store.utilities.WebServiceGetUDTF'");
   DataFrame drugs = sqlContext.sql("select webservice(\"drug,id,action\", \"url\", \"\", \"GET\", \"xs=http://www.w3.org/2001/XMLSchema\", \"//row\",\"drug\",\"./drug\","
       + " \"<string>\", \"id\", \"./id\",\"<string>\", \"action\", \"./action\", \"<string>\", url) as (drug, drug_typ, id, id_typ, action, action_typ) from urls");
   drugs.show();
   System.out.println("Num rows:" + drugs.count());
 }

代码示例来源:origin: phuonglh/vn.vitk

DataFrame df1 = model.transform(df0);
prediction = jsc.broadcast(df1.select("prediction").collect());
if (df1.count() > 0) {
  output = s.map(new WhitespaceClassificationFunction());

代码示例来源:origin: phuonglh/vn.vitk

MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator().setMetricName("precision");
if (verbose) {
  System.out.println("N = " + trainingData.count());
  System.out.println("D = " + vocabSize);
  System.out.println("K = " + numLabels);

代码示例来源:origin: phuonglh/vn.vitk

DataFrame df = sqlContext.createDataFrame(jrdd, WhitespaceContext.class);
df.show(false);
System.out.println("N = " + df.count());
df.groupBy("label").count().show();

相关文章