1、java版本(spark-2.1.0)
package chavin.king;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.spark.SparkConf;
public class WordCount {
public static void main(String[] args) {
// TODO Auto-generated method stub
//初始化spark应用
SparkConf conf = new SparkConf().setAppName(“wordcount”).setMaster(“local”);
JavaSparkContext sc = new JavaSparkContext(conf);
//读取文件
JavaRDD
//将每一行切割成单词
JavaRDD
public Iterator
});
//将每个单词映射成(word,1)格式
JavaPairRDD
public Tuple2
});
//计算每个单词出现次数
JavaPairRDD
public Integer call(Integer v1, Integer v2) throws Exception {
return v1 + v2;
}
});
//打印输出
wordCounts.foreach(new VoidFunction
public void call(Tuple2
});
//关闭SparkContext
sc.close();
2、scala版本
package chavin.king
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
object WordCountLocal {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName(“WordCount”).setMaster(“local”)
val sc = new SparkContext(conf)
val lines = sc.textFile(“E://test//spark_wc.txt”, 1)
val words = lines.flatMap { line => line.split(” “) }
val pairs = words.map { word => (word, 1) }
val wordCounts = pairs.reduceByKey { _ + _ }
wordCounts.foreach(wordCount => println(wordCount._1 + ” appeared ” + wordCount._2 + ” times.”))
}
Original: https://www.cnblogs.com/wcwen1990/p/10253386.html
Author: ChavinKing
Title: Spark开发wordcount程序
原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/8850/
转载文章受原作者版权保护。转载请注明原作者出处!