



import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class CharCountApp {

public static class CharCountMapper extends Mapper<LongWritable, Text,Text,LongWritable> {


protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

// 将每行字符串拆解成单个字母,然后输出

char[] chars = value.toString().toCharArray();

for(char c : chars){

context.write(new Text(c + ""),new LongWritable(1));




public static class CharCountReducer extends Reducer<Text, LongWritable,Text,LongWritable> {


protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {

// 通过累加,计算出每个字母出现的次数 以 key value形式输出

long c = 0;

for(LongWritable lw : values){

c += lw.get();


context.write(key,new LongWritable(c));



public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

Configuration conf = new Configuration();

Job job = Job.getInstance(conf);

// Hadoop会自动根据驱动程序的类路径来扫描该作业的Jar包。


// 指定mapper


// 指定reducer


// map程序的输出键-值对类型



// 输出键-值对类型



// 输入文件的路径(HDFS 路径)

FileInputFormat.addInputPath(job,new Path(args[0]));

// 输出文件的路径(HDFS 路径)

FileOutputFormat.setOutputPath(job,new Path(args[1]));

boolean res = job.waitForCompletion(true);

System.exit(res ? 0 : 1);




import com.hadoop.demo.mapredurce.CharCountApp;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mrunit.mapreduce.MapDriver;

import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;

import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;

import org.junit.Before;

import org.junit.Test;

import java.io.IOException;

import java.util.Arrays;

public class CharCountMapReduceTest {

private Mapper<LongWritable,Text,Text,LongWritable> mapper;

private Reducer<Text, LongWritable, Text, LongWritable> reducer;

private MapDriver mapDriver;

private ReduceDriver reduceDriver;

private MapReduceDriver mrDriver;


public void init(){

mapper = new CharCountApp.CharCountMapper();

reducer = new CharCountApp.CharCountReducer();

mapDriver = new MapDriver();

reduceDriver = new ReduceDriver();

mrDriver = new MapReduceDriver(mapper,reducer);


System.setProperty("hadoop.home.dir", "/C:/hadoop/hadoop-2.7.7");



public void mapperTest() throws IOException {

Text value = new Text("hi hadoop");


.withInput(new LongWritable(0),value)

.withOutput(new Text("h"), new LongWritable(1))

.withOutput(new Text("i"), new LongWritable(1))

.withOutput(new Text(" "), new LongWritable(1))

.withOutput(new Text("h"), new LongWritable(1))

.withOutput(new Text("a"), new LongWritable(1))

.withOutput(new Text("d"), new LongWritable(1))

.withOutput(new Text("o"), new LongWritable(1))

.withOutput(new Text("o"), new LongWritable(1))

.withOutput(new Text("p"), new LongWritable(1))




public void reducerTest() throws IOException {


.withInput(new Text("h"), Arrays.asList(new LongWritable(1),new LongWritable(1)))

.withInput(new Text("i"), Arrays.asList(new LongWritable(1)))

.withOutput(new Text("h"),new LongWritable(2))

.withOutput(new Text("i"),new LongWritable(1))




public void mapReducerTest() throws IOException {

String line1 = "hi";

String line2 = "hadoop";



.withInput(new LongWritable(),new Text(line1))

.withInput(new LongWritable(),new Text(line2))

.withOutput(new Text("a"),new LongWritable(1))

.withOutput(new Text("d"),new LongWritable(1))

.withOutput(new Text("h"),new LongWritable(2))

.withOutput(new Text("i"),new LongWritable(1))

.withOutput(new Text("o"),new LongWritable(2))

.withOutput(new Text("p"),new LongWritable(1))




    MRUnit是Cloudera公司专为Hadoop MapReduce写的单元测试框架,其API非常简洁实用。该框架对不同的测试对象使用不同的Driver,因此分为了:MapDriver、ReduceDriver和MapReduceDriver

MRUnit 依赖:












DEBUG - Mapping input (0, hi hadoop)

DEBUG - Matched expected output (h, 1) at position 0

DEBUG - Matched expected output (i, 1) at position 1

DEBUG - Matched expected output ( , 1) at position 2

DEBUG - Matched expected output (h, 1) at position 3

DEBUG - Matched expected output (a, 1) at position 4

DEBUG - Matched expected output (d, 1) at position 5

DEBUG - Matched expected output (o, 1) at position 6

DEBUG - Matched expected output (o, 1) at position 7

DEBUG - Matched expected output (p, 1) at position 8


DEBUG - Matched expected output (h, 2) at position 0

DEBUG - Matched expected output (i, 1) at position 1


DEBUG - Starting map phase with mapper: com.hadoop.demo.mapredurce.CharCountApp$CharCountMapper@41c2284a

DEBUG - Starting reduce phase with reducer: com.hadoop.demo.mapredurce.CharCountApp$CharCountReducer@42530531

DEBUG - Reducing input ((a, 1))

DEBUG - Reducing input ((d, 1))

DEBUG - Reducing input ((h, 1), (h, 1))

DEBUG - Reducing input ((i, 1))

DEBUG - Reducing input ((o, 1), (o, 1))

DEBUG - Reducing input ((p, 1))

DEBUG - Matched expected output (a, 1) at position 0

DEBUG - Matched expected output (d, 1) at position 1

DEBUG - Matched expected output (h, 2) at position 2

DEBUG - Matched expected output (i, 1) at position 3

DEBUG - Matched expected output (o, 2) at position 4

DEBUG - Matched expected output (p, 1) at position 5



本地执行的话,可能需要在Window上安装Hadoop,参考: https://blog.csdn.net/qinlan1994/article/details/90413243

本文参考: https://www.jianshu.com/p/26ef726e643b

源码下载:https://pan.baidu.com/s/1CxTJdcQGgBrcQ-R4QBljNQ      提取码 kbyy


