Hadoop系列03、单元测试MapRedurce

Z时代
2024-01-10
分类：综合

一、准备一篇MapRedurce任务

统计字母出现次数


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class CharCountApp {
    public static class CharCountMapper extends Mapper<LongWritable, Text,Text,LongWritable> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            // 将每行字符串拆解成单个字母，然后输出
            char[] chars = value.toString().toCharArray();
            for(char c : chars){
                context.write(new Text(c + ""),new LongWritable(1));
            }
        }
    }
    public static class CharCountReducer extends Reducer<Text, LongWritable,Text,LongWritable> {
        @Override
        protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
            // 通过累加，计算出每个字母出现的次数 以 key value形式输出
            long c = 0;
            for(LongWritable lw : values){
                c += lw.get();
            }
            context.write(key,new LongWritable(c));
        }
    }
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        // Hadoop会自动根据驱动程序的类路径来扫描该作业的Jar包。
        job.setJarByClass(com.hadoop.demo.mapredurce.CharCountApp.class);
        // 指定mapper
        job.setMapperClass(CharCountMapper.class);
        // 指定reducer
        job.setReducerClass(CharCountReducer.class);
        // map程序的输出键-值对类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);
        // 输出键-值对类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        // 输入文件的路径(HDFS 路径)
        FileInputFormat.addInputPath(job,new Path(args[0]));
        // 输出文件的路径(HDFS 路径)
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        boolean res = job.waitForCompletion(true);
        System.exit(res ? 0 : 1);
    }
}

二、创建单元测试


import com.hadoop.demo.mapredurce.CharCountApp;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.Arrays;
public class CharCountMapReduceTest {
    private Mapper<LongWritable,Text,Text,LongWritable> mapper;
    private Reducer<Text, LongWritable, Text, LongWritable> reducer;
    private MapDriver mapDriver;
    private ReduceDriver reduceDriver;
    private MapReduceDriver mrDriver;
    @Before
    public void init(){
        mapper = new CharCountApp.CharCountMapper();
        reducer = new CharCountApp.CharCountReducer();
        mapDriver = new MapDriver();
        reduceDriver = new ReduceDriver();
        mrDriver = new MapReduceDriver(mapper,reducer);
        // 设置 HADOOP_HOME
        System.setProperty("hadoop.home.dir", "/C:/hadoop/hadoop-2.7.7");
    }
    @Test
    public void mapperTest() throws IOException {
        Text value = new Text("hi hadoop");
        mapDriver.withMapper(mapper)
                 .withInput(new LongWritable(0),value)
                 .withOutput(new Text("h"), new LongWritable(1))
                 .withOutput(new Text("i"), new LongWritable(1))
                 .withOutput(new Text(" "), new LongWritable(1))
                 .withOutput(new Text("h"), new LongWritable(1))
                 .withOutput(new Text("a"), new LongWritable(1))
                 .withOutput(new Text("d"), new LongWritable(1))
                 .withOutput(new Text("o"), new LongWritable(1))
                 .withOutput(new Text("o"), new LongWritable(1))
                 .withOutput(new Text("p"), new LongWritable(1))
                 .runTest();
    }
    @Test
    public void reducerTest() throws IOException {
        reduceDriver.withReducer(reducer)
                .withInput(new Text("h"), Arrays.asList(new LongWritable(1),new LongWritable(1)))
                .withInput(new Text("i"), Arrays.asList(new LongWritable(1)))
                .withOutput(new Text("h"),new LongWritable(2))
                .withOutput(new Text("i"),new LongWritable(1))
                .runTest();
    }
    @Test
    public void mapReducerTest() throws IOException {
        String line1 = "hi";
        String line2 = "hadoop";
        mrDriver.withMapper(mapper)
                .withReducer(reducer)
                .withInput(new LongWritable(),new Text(line1))
                .withInput(new LongWritable(),new Text(line2))
                .withOutput(new Text("a"),new LongWritable(1))
                .withOutput(new Text("d"),new LongWritable(1))
                .withOutput(new Text("h"),new LongWritable(2))
                .withOutput(new Text("i"),new LongWritable(1))
                .withOutput(new Text("o"),new LongWritable(2))
                .withOutput(new Text("p"),new LongWritable(1))
                .runTest();
    }
}

MRUnit是Cloudera公司专为Hadoop MapReduce写的单元测试框架，其API非常简洁实用。该框架对不同的测试对象使用不同的Driver，因此分为了：MapDriver、ReduceDriver和MapReduceDriver

MRUnit 依赖：


<dependency>
    <groupId>org.apache.mrunit</groupId>
    <artifactId>mrunit</artifactId>
    <version>1.1.0</version>
    <classifier>hadoop2</classifier>
    <scope>test</scope>
</dependency>

三、执行单元测试

MRUnit可以对MapRedurce进行分开测试，可以map、reducer和mapreducer分别进行测试。

执行MapTest


#输出结果
DEBUG - Mapping input (0, hi hadoop)
DEBUG - Matched expected output (h, 1) at position 0
DEBUG - Matched expected output (i, 1) at position 1
DEBUG - Matched expected output ( , 1) at position 2
DEBUG - Matched expected output (h, 1) at position 3
DEBUG - Matched expected output (a, 1) at position 4
DEBUG - Matched expected output (d, 1) at position 5
DEBUG - Matched expected output (o, 1) at position 6
DEBUG - Matched expected output (o, 1) at position 7
DEBUG - Matched expected output (p, 1) at position 8

执行ReducerTest


DEBUG - Matched expected output (h, 2) at position 0
DEBUG - Matched expected output (i, 1) at position 1

执行MapRedurceTest


DEBUG - Starting map phase with mapper: com.hadoop.demo.mapredurce.CharCountApp$CharCountMapper@41c2284a
DEBUG - Starting reduce phase with reducer: com.hadoop.demo.mapredurce.CharCountApp$CharCountReducer@42530531
DEBUG - Reducing input ((a, 1))
DEBUG - Reducing input ((d, 1))
DEBUG - Reducing input ((h, 1), (h, 1))
DEBUG - Reducing input ((i, 1))
DEBUG - Reducing input ((o, 1), (o, 1))
DEBUG - Reducing input ((p, 1))
DEBUG - Matched expected output (a, 1) at position 0
DEBUG - Matched expected output (d, 1) at position 1
DEBUG - Matched expected output (h, 2) at position 2
DEBUG - Matched expected output (i, 1) at position 3
DEBUG - Matched expected output (o, 2) at position 4
DEBUG - Matched expected output (p, 1) at position 5