Hadoop系列03、单元测试MapRedurce

编程

一、准备一篇MapRedurce任务

统计字母出现次数

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class CharCountApp {

public static class CharCountMapper extends Mapper<LongWritable, Text,Text,LongWritable> {

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

// 将每行字符串拆解成单个字母,然后输出

char[] chars = value.toString().toCharArray();

for(char c : chars){

context.write(new Text(c + ""),new LongWritable(1));

}

}

}

public static class CharCountReducer extends Reducer<Text, LongWritable,Text,LongWritable> {

@Override

protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {

// 通过累加,计算出每个字母出现的次数 以 key value形式输出

long c = 0;

for(LongWritable lw : values){

c += lw.get();

}

context.write(key,new LongWritable(c));

}

}

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

Configuration conf = new Configuration();

Job job = Job.getInstance(conf);

// Hadoop会自动根据驱动程序的类路径来扫描该作业的Jar包。

job.setJarByClass(com.hadoop.demo.mapredurce.CharCountApp.class);

// 指定mapper

job.setMapperClass(CharCountMapper.class);

// 指定reducer

job.setReducerClass(CharCountReducer.class);

// map程序的输出键-值对类型

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(LongWritable.class);

// 输出键-值对类型

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(LongWritable.class);

// 输入文件的路径(HDFS 路径)

FileInputFormat.addInputPath(job,new Path(args[0]));

// 输出文件的路径(HDFS 路径)

FileOutputFormat.setOutputPath(job,new Path(args[1]));

boolean res = job.waitForCompletion(true);

System.exit(res ? 0 : 1);

}

}

二、创建单元测试

import com.hadoop.demo.mapredurce.CharCountApp;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mrunit.mapreduce.MapDriver;

import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;

import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;

import org.junit.Before;

import org.junit.Test;

import java.io.IOException;

import java.util.Arrays;

public class CharCountMapReduceTest {

private Mapper<LongWritable,Text,Text,LongWritable> mapper;

private Reducer<Text, LongWritable, Text, LongWritable> reducer;

private MapDriver mapDriver;

private ReduceDriver reduceDriver;

private MapReduceDriver mrDriver;

@Before

public void init(){

mapper = new CharCountApp.CharCountMapper();

reducer = new CharCountApp.CharCountReducer();

mapDriver = new MapDriver();

reduceDriver = new ReduceDriver();

mrDriver = new MapReduceDriver(mapper,reducer);

// 设置 HADOOP_HOME

System.setProperty("hadoop.home.dir", "/C:/hadoop/hadoop-2.7.7");

}

@Test

public void mapperTest() throws IOException {

Text value = new Text("hi hadoop");

mapDriver.withMapper(mapper)

.withInput(new LongWritable(0),value)

.withOutput(new Text("h"), new LongWritable(1))

.withOutput(new Text("i"), new LongWritable(1))

.withOutput(new Text(" "), new LongWritable(1))

.withOutput(new Text("h"), new LongWritable(1))

.withOutput(new Text("a"), new LongWritable(1))

.withOutput(new Text("d"), new LongWritable(1))

.withOutput(new Text("o"), new LongWritable(1))

.withOutput(new Text("o"), new LongWritable(1))

.withOutput(new Text("p"), new LongWritable(1))

.runTest();

}

@Test

public void reducerTest() throws IOException {

reduceDriver.withReducer(reducer)

.withInput(new Text("h"), Arrays.asList(new LongWritable(1),new LongWritable(1)))

.withInput(new Text("i"), Arrays.asList(new LongWritable(1)))

.withOutput(new Text("h"),new LongWritable(2))

.withOutput(new Text("i"),new LongWritable(1))

.runTest();

}

@Test

public void mapReducerTest() throws IOException {

String line1 = "hi";

String line2 = "hadoop";

mrDriver.withMapper(mapper)

.withReducer(reducer)

.withInput(new LongWritable(),new Text(line1))

.withInput(new LongWritable(),new Text(line2))

.withOutput(new Text("a"),new LongWritable(1))

.withOutput(new Text("d"),new LongWritable(1))

.withOutput(new Text("h"),new LongWritable(2))

.withOutput(new Text("i"),new LongWritable(1))

.withOutput(new Text("o"),new LongWritable(2))

.withOutput(new Text("p"),new LongWritable(1))

.runTest();

}

}

    MRUnit是Cloudera公司专为Hadoop MapReduce写的单元测试框架,其API非常简洁实用。该框架对不同的测试对象使用不同的Driver,因此分为了:MapDriver、ReduceDriver和MapReduceDriver

MRUnit 依赖:

<dependency>

<groupId>org.apache.mrunit</groupId>

<artifactId>mrunit</artifactId>

<version>1.1.0</version>

<classifier>hadoop2</classifier>

<scope>test</scope>

</dependency>

三、执行单元测试

MRUnit可以对MapRedurce进行分开测试,可以map、reducer和mapreducer分别进行测试。

执行MapTest

#输出结果

DEBUG - Mapping input (0, hi hadoop)

DEBUG - Matched expected output (h, 1) at position 0

DEBUG - Matched expected output (i, 1) at position 1

DEBUG - Matched expected output ( , 1) at position 2

DEBUG - Matched expected output (h, 1) at position 3

DEBUG - Matched expected output (a, 1) at position 4

DEBUG - Matched expected output (d, 1) at position 5

DEBUG - Matched expected output (o, 1) at position 6

DEBUG - Matched expected output (o, 1) at position 7

DEBUG - Matched expected output (p, 1) at position 8

执行ReducerTest

DEBUG - Matched expected output (h, 2) at position 0

DEBUG - Matched expected output (i, 1) at position 1

执行MapRedurceTest

DEBUG - Starting map phase with mapper: com.hadoop.demo.mapredurce.CharCountApp$CharCountMapper@41c2284a

DEBUG - Starting reduce phase with reducer: com.hadoop.demo.mapredurce.CharCountApp$CharCountReducer@42530531

DEBUG - Reducing input ((a, 1))

DEBUG - Reducing input ((d, 1))

DEBUG - Reducing input ((h, 1), (h, 1))

DEBUG - Reducing input ((i, 1))

DEBUG - Reducing input ((o, 1), (o, 1))

DEBUG - Reducing input ((p, 1))

DEBUG - Matched expected output (a, 1) at position 0

DEBUG - Matched expected output (d, 1) at position 1

DEBUG - Matched expected output (h, 2) at position 2

DEBUG - Matched expected output (i, 1) at position 3

DEBUG - Matched expected output (o, 2) at position 4

DEBUG - Matched expected output (p, 1) at position 5

注:在MapReducerTest中,如果代码添加的OutPut和Reducer计算的结果不一致,就会报错,例:将a调换一下position,并将o统计结果更改为1再次执行:

 

本地执行的话,可能需要在Window上安装Hadoop,参考: https://blog.csdn.net/qinlan1994/article/details/90413243

本文参考: https://www.jianshu.com/p/26ef726e643b

源码下载:https://pan.baidu.com/s/1CxTJdcQGgBrcQ-R4QBljNQ      提取码 kbyy

 

以上是 Hadoop系列03、单元测试MapRedurce 的全部内容, 来源链接: utcz.com/z/518616.html

回到顶部