代码1.1

import org.apache.spark.{SparkConf,SparkContext}
//建立的WordCount对象,以及定义main函数
object WordCount {
	def main(args: Array[String]): Unit = {
    	val conf = new SparkConf().setMaster("local").setAppName("wordcount")
		val sc = new SparkContext(conf)
  		val lines = sc.textFile("文件路径")  
  		val words = lines.flatMap(line => line.split(" ")) 
  		val count = words.map(word => (word,1)).reduceByKey{case(x,y) => x+y}
		println(count.collect().mkString("\n"))   //逐行输出
	}
}

代码1.2

from pyspark import SparkConf, SparkContext as sc
conf = SparkConf().setMaster("local").setAppName("wordcount")
sc=SparkContext.getOrCreate(conf)  #SparkContext初始化
lines = sc.textFile("文件路径")  
words = lines.flatMap(lambda line: line.split(" "))  #用空格拆分元素,使其切成一个个单词
#转换成键值对,且相同键值组合计数
count = words.map(lambda x:(x,1)).reduceByKey(lambda x,y: x+y)  
print(count.collect())

代码1.3

import java.util.Arrays;
import java.util.Iterator;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;

public class javaWordCount {
	public static void main(String[] args) {
		// 第一步:初始化配置
		SparkConf conf = new SparkConf().setMaster("local").setAppName("wordcount");
		// 第二步:创建JavaSparkContext对象,SparkContext是Spark的所有功能的入口
		JavaSparkContext sc = new JavaSparkContext(conf);

		// 第三步:创建一个初始的RDD
		// SparkContext中用于根据文件类型的输入源创建RDD的方法,叫做textFile()方法
		JavaRDD<String> lines = sc.textFile("文件路径"); //括号中为文件路径

		// 第四步:对初始的RDD进行transformation操作,也就是词频统计初始操作
		// 首先把单词用空格拆开
		JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
			  private static final long serialVersionUID = 1L;
			  @Override
			  public Iterator<String> call(String line) throws Exception {
				  return Arrays.asList(line.split(" ")).iterator();
			  }
		});
		// 将每一个单词,映射为(单词,1)的这种格式
		JavaPairRDD<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() {
      		private static final long serialVersionUID = 1L;
      		@Override
      		public Tuple2<String, Integer> call(String word) throws Exception {
          		return new Tuple2<String, Integer>(word, 1);
      		}
    	});
		// 以单词作为key,统计每个单词出现的次数
		JavaPairRDD<String, Integer> wordCounts = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() {
      		private static final long serialVersionUID = 1L;
      		@Override
      		public Integer call(Integer v1, Integer v2) throws Exception {
         	 	return v1 + v2;
      		}
		});
		// 输出WordCount结果
		wordCounts.foreach(new VoidFunction<Tuple2<String,Integer>>() {
      		private static final long serialVersionUID = 1L;
      		@Override
      		public void call(Tuple2<String, Integer> wordCount) throws Exception {
          		System.out.println(wordCount);
      		}
		});
    	sc.close();
	}
}


代码1.4

val lines = sc.textFile("文件路径")  //获取文件
val words = lines.flatMap(line => line.split(" "))  //用空格切分RDD成一个个单词RDD
val pairs = words.map(word => (word,1))  //把单词化成(word,1)的形式
val WordCounts = pairs.reduceByKey(_+_)  //进行单词统计,统计相同单词数量