Spark-Java-算子
package scala.spark.Day3;import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.VoidFunction;
import java.util.Arrays;
import java.util.List;
/**
* Created by Administrator on 2019/10/16.
*/
public class JavaRDDTest {
public static void main(String[] args) {
System.setProperty("hadoop.home.dir", "E:\\hadoop-2.6.0-cdh5.15.0\\hadoop-2.6.0-cdh5.15.0");
//JavaRDD 标准RDD
//JavaPairRDD PairRDD
//JavaDoubleRDD DoubleRDD
//java没有scala隐式转化,生成RDD时候,必须指明是哪种RDD
//实例化驱动器
SparkConf sparkConf = new SparkConf();
sparkConf.setMaster("local");
sparkConf.setAppName("Java RDD");
JavaSparkContext javaSparkContext = new JavaSparkContext(sparkConf);
//javaSparkContext.parallelize() 标准RDD
//javaSparkContext.parallelizePairs() PairRDD
//javaSparkContext.parallelizeDoubles() 数值类型RDD
List<Integer> list = Arrays.asList(1, 2, 3, 4, 5);
JavaRDD<Integer> sourceRDD = javaSparkContext.parallelize(list);
//map 算子
// 匿名子类
/*
public interface Function<T1, R> extends Serializable {
R call(T1 v1) throws Exception;
}
T1 RDD中成员的类型
v1 返回的类型
*/
JavaRDD<Integer> mapRDD = sourceRDD.map(
new Function<Integer, Integer>() {
public Integer call(Integer v1) throws Exception {
return v1 * v1;
}
}
);
/*
public interface VoidFunction<T> extends Serializable {
void call(T t) throws Exception;
}
*/
mapRDD.foreach(
new VoidFunction<Integer>() {
public void call(Integer integer) throws Exception {
System.out.println(integer);
}
}
);
javaSparkContext.close();
}
}
以上是 Spark-Java-算子 的全部内容, 来源链接: utcz.com/z/393184.html