Spark-Java-算子

java

package scala.spark.Day3;

import org.apache.spark.SparkConf;

import org.apache.spark.api.java.JavaRDD;

import org.apache.spark.api.java.JavaSparkContext;

import org.apache.spark.api.java.function.Function;

import org.apache.spark.api.java.function.VoidFunction;

import java.util.Arrays;

import java.util.List;

/**

* Created by Administrator on 2019/10/16.

*/

public class JavaRDDTest {

public static void main(String[] args) {

System.setProperty("hadoop.home.dir", "E:\\hadoop-2.6.0-cdh5.15.0\\hadoop-2.6.0-cdh5.15.0");

//JavaRDD 标准RDD

//JavaPairRDD PairRDD

//JavaDoubleRDD DoubleRDD

//java没有scala隐式转化,生成RDD时候,必须指明是哪种RDD

//实例化驱动器

SparkConf sparkConf = new SparkConf();

sparkConf.setMaster("local");

sparkConf.setAppName("Java RDD");

JavaSparkContext javaSparkContext = new JavaSparkContext(sparkConf);

//javaSparkContext.parallelize() 标准RDD

//javaSparkContext.parallelizePairs() PairRDD

//javaSparkContext.parallelizeDoubles() 数值类型RDD

List<Integer> list = Arrays.asList(1, 2, 3, 4, 5);

JavaRDD<Integer> sourceRDD = javaSparkContext.parallelize(list);

//map 算子

// 匿名子类

/*

public interface Function<T1, R> extends Serializable {

R call(T1 v1) throws Exception;

}

T1 RDD中成员的类型

v1 返回的类型

*/

JavaRDD<Integer> mapRDD = sourceRDD.map(

new Function<Integer, Integer>() {

public Integer call(Integer v1) throws Exception {

return v1 * v1;

}

}

);

/*

public interface VoidFunction<T> extends Serializable {

void call(T t) throws Exception;

}

*/

mapRDD.foreach(

new VoidFunction<Integer>() {

public void call(Integer integer) throws Exception {

System.out.println(integer);

}

}

);

javaSparkContext.close();

}

}

  

以上是 Spark-Java-算子 的全部内容, 来源链接: utcz.com/z/393184.html

回到顶部