利用MRUnit进行MapReduce单元测试
利用 进行 单元测试 MapReduce
2023-09-14 08:59:49 时间
一、MRUnit简介
Java编程技巧之单元测试用例编写流程 前言清代杰出思想家章学诚有一句名言:“学必求其心得,业必贵其专精。”意思是:学习上一定要追求心得体会,事业上一定要贵以专注精深。做技术就是这样,一件事如果做到了极致,就必然会有所心得体会。作者最近在一个项目上,追求单元测试覆盖率到极致(行覆盖率96.11%,分支覆盖率93.35%),所以才有了这篇心得体会。上一篇文章《Java单元测试技巧之PowerMock》除了介绍单元测试基础知识外,主要介绍了
Spock单元测试框架初探 软件工程发生在代码被非原作者阅读之时 Spock vs JUnit 单元测试框架,JUnit读者已了解,因此直接开门见山,基于JUnit和Spock做一个对比,明显Spock在工程化更有优势。
官网地址:https://mrunit.apache.org/
Apache MRUnit ™ is a Java library that helps developers unit test Apache Hadoop map reduce jobs.
MRUnit是一个帮助开发者测试map reduce 作业的单元测试库。
二、代码示例以maven项目为例,演示如何使用MRUnit进行MR单元测试。
关于示例的讲解,请参考:https://cwiki.apache.org/confluence/display/MRUNIT/MRUnit+Tutorial
项目pom.xml文件,重点关注mrunit,mockito-all, junit三个类库的引入,MRUnit是利用mockito+junit针对MR程序进行模拟测试。
MR单元测试类package mrunit; import static org.junit.Assert.assertEquals; import java.io.IOException; import java.util.ArrayList; import java.util.List; import mrunit.SMSCDRMapper.CDRCounter; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; import org.junit.Before; import org.junit.Test; * 测试数据说明 CDRID;CDRType;Phone1;Phone2;SMS Status Code * 655209;1;796764372490213;804422938115889;6 * 353415;0;356857119806206;287572231184798;4 * 835699;1;252280313968413;889717902341635;0 public class SMSCDRMapperReducerTest { Configuration conf = new Configuration(); MapDriver LongWritable, Text, Text, IntWritable mapDriver; ReduceDriver Text, IntWritable, Text, IntWritable reduceDriver; MapReduceDriver LongWritable, Text, Text, IntWritable, Text, IntWritable mapReduceDriver; @Before public void setUp() { //测试mapreduce SMSCDRMapper mapper = new SMSCDRMapper(); SMSCDRReducer reducer = new SMSCDRReducer(); mapDriver = MapDriver.newMapDriver(mapper); reduceDriver = ReduceDriver.newReduceDriver(reducer); mapReduceDriver = MapReduceDriver.newMapReduceDriver(mapper, reducer); //测试配置参数 mapDriver.setConfiguration(conf); conf.set("myParameter1", "20"); conf.set("myParameter2", "23"); @Test public void testMapper() throws IOException { mapDriver.withInput(new LongWritable(), new Text( "655209;1;796764372490213;804422938115889;6")); mapDriver.withOutput(new Text("6"), new IntWritable(1)); mapDriver.runTest(); @Test public void testReducer() throws IOException { List IntWritable values = new ArrayList IntWritable values.add(new IntWritable(1)); values.add(new IntWritable(1)); reduceDriver.withInput(new Text("6"), values); reduceDriver.withOutput(new Text("6"), new IntWritable(2)); reduceDriver.runTest(); @Test public void testMapperReducer() throws IOException { mapReduceDriver.withInput(new LongWritable(), new Text( "655209;1;796764372490213;804422938115889;6")); mapReduceDriver.withOutput(new Text("6"), new IntWritable(1)); @Test public void testMapperCount() throws IOException { mapDriver.withInput(new LongWritable(), new Text( "655209;0;796764372490213;804422938115889;6")); // mapDriver.withOutput(new Text("6"), new IntWritable(1)); mapDriver.runTest(); assertEquals("Expected 1 counter increment", 1, mapDriver.getCounters() .findCounter(CDRCounter.NonSMSCDR).getValue()); }Mapper类
package mrunit; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class SMSCDRMapper extends Mapper LongWritable, Text, Text, IntWritable { private Text status = new Text(); private final static IntWritable addOne = new IntWritable(1); static enum CDRCounter { NonSMSCDR; * Returns the SMS status code and its count protected void map(LongWritable key, Text value, Context context) throws java.io.IOException, InterruptedException { //655209;1;796764372490213;804422938115889;6 is the Sample record format String[] line = value.toString().split(";"); // If record is of SMS CDR if (Integer.parseInt(line[1]) == 1) { status.set(line[4]); context.write(status, addOne); }else{ // CDR record is not of type SMS so increment the counter context.getCounter(CDRCounter.NonSMSCDR).increment(1); }Reducer类
package mrunit; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class SMSCDRReducer extends Reducer Text, IntWritable, Text, IntWritable { protected void reduce(Text key, Iterable IntWritable values, Context context) throws java.io.IOException, InterruptedException { int sum = 0; for (IntWritable value : values) { sum += value.get(); context.write(key, new IntWritable(sum)); }项目的pom.xml文件
project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" modelVersion 4.0.0 /modelVersion groupId com.cdh /groupId artifactId cdh-test /artifactId version SNAPSHOT-1.0.0 /version packaging jar /packaging name cdh-test /name url http://maven.apache.org /url properties hadoop.version 2.0.0-mr1-cdh4.4.0 /hadoop.version hbase.version 0.94.6-cdh4.4.0 /hbase.version project.build.sourceEncoding utf-8 /project.build.sourceEncoding maven.compiler.encoding utf-8 /maven.compiler.encoding /properties build pluginManagement plugins plugin groupId org.apache.maven.plugins /groupId artifactId maven-compiler-plugin /artifactId version 3.1 /version configuration encoding utf-8 /encoding source 1.6 /source target 1.6 /target /configuration /plugin /plugins /pluginManagement plugins plugin groupId org.apache.maven.plugins /groupId artifactId maven-shade-plugin /artifactId version 2.1 /version executions execution phase package /phase goals goal shade /goal /goals /execution /executions /plugin plugin groupId org.apache.maven.plugins /groupId artifactId maven-eclipse-plugin /artifactId version 2.9 /version configuration buildOutputDirectory eclipse-classes /buildOutputDirectory downloadSources true /downloadSources downloadJavadocs false /downloadJavadocs /configuration /plugin /plugins /build dependencies dependency groupId jdk.tools /groupId artifactId jdk.tools /artifactId version 1.6 /version scope system /scope systemPath ${JAVA_HOME}/lib/tools.jar /systemPath /dependency dependency groupId org.apache.hadoop /groupId artifactId hadoop-client /artifactId version ${hadoop.version} /version scope provided /scope exclusions exclusion artifactId mockito-all /artifactId groupId org.mockito /groupId /exclusion /exclusions /dependency dependency groupId org.apache.hadoop /groupId artifactId hadoop-mapreduce-client-core /artifactId version 2.0.0-cdh4.4.0 /version exclusions exclusion artifactId jersey-test-framework-grizzly2 /artifactId groupId com.sun.jersey.jersey-test-framework /groupId /exclusion exclusion artifactId netty /artifactId groupId org.jboss.netty /groupId /exclusion /exclusions scope provided /scope /dependency dependency groupId org.apache.hbase /groupId artifactId hbase /artifactId version ${hbase.version} /version scope provided /scope /dependency dependency groupId com.hadoop.gplcompression /groupId artifactId hadoop-lzo-cdh4 /artifactId version 0.4.15-gplextras /version /dependency dependency groupId org.hsqldb /groupId artifactId hsqldb /artifactId version 2.2.9 /version /dependency dependency groupId redis.clients /groupId artifactId jedis /artifactId version 2.5.1 /version /dependency !-- junit test -- dependency groupId org.apache.mrunit /groupId artifactId mrunit /artifactId version 1.1.0 /version classifier hadoop2 /classifier scope test /scope /dependency dependency groupId org.mockito /groupId artifactId mockito-all /artifactId version 1.9.5 /version scope test /scope /dependency dependency groupId junit /groupId artifactId junit /artifactId version 4.10 /version scope test /scope /dependency /dependencies repositories repository id cloudera /id url https://repository.cloudera.com/artifactory/cloudera-repos /url releases enabled true /enabled /releases snapshots enabled false /enabled /snapshots /repository /repositories /project
Java编程技巧之单元测试用例编写流程 前言清代杰出思想家章学诚有一句名言:“学必求其心得,业必贵其专精。”意思是:学习上一定要追求心得体会,事业上一定要贵以专注精深。做技术就是这样,一件事如果做到了极致,就必然会有所心得体会。作者最近在一个项目上,追求单元测试覆盖率到极致(行覆盖率96.11%,分支覆盖率93.35%),所以才有了这篇心得体会。上一篇文章《Java单元测试技巧之PowerMock》除了介绍单元测试基础知识外,主要介绍了
Spock单元测试框架初探 软件工程发生在代码被非原作者阅读之时 Spock vs JUnit 单元测试框架,JUnit读者已了解,因此直接开门见山,基于JUnit和Spock做一个对比,明显Spock在工程化更有优势。
相关文章
- CVE-2019-0797漏洞:Windows操作系统中的新零日在攻击中被利用
- 利用flume+kafka+storm+mysql构建大数据实时系统
- LVM学习之KVM利用LVM快照备份与恢复虚拟机
- 利用Python进行异常值分析实例代码
- 用Excel利用RFM模型进行客户细分
- 用Excel利用RFM模型进行客户细分
- CV之FR:基于dlib、cv2库利用warpPerspective函数和shape_predictor_68_face_landmarks.dat文件实现AI换脸渐变融合的视频效果案例应用
- ML之LiR:利用LiR线性回归算法(自定义目标函数MSE和优化器GD)对Boston房价数据集(两特征+归一化)进行回归预测
- EL之Bagging:kaggle比赛之利用titanic(泰坦尼克号)数据集建立Bagging模型对每个人进行获救是否预测
- ML之LoR&Bagging&RF:依次利用LoR、Bagging、RF算法对titanic(泰坦尼克号)数据集 (Kaggle经典案例)获救人员进行二分类预测(最全)
- DL之CNN可视化:利用SimpleConvNet算法【3层,im2col优化】基于mnist数据集训练并对卷积层输出进行可视化
- DL之CNN:基于CRNN_OCR算法(keras,CNN+RNN)利用数据集(torch,mdb格式)训练来实现新图片上不定长度字符串进行识别—训练过程
- Py之wxPython:利用wxPython库设计CMD的dos窗口并进行交互可视化图文教程
- 利用BeanMap进行对象与Map的相互转换
- 基于Flask开发网站--利用复选框进行批量操作
- 如何巧妙的利用selenium和requests组合来进行操作需要登录的页面
- Linux下利用iverilog进行功能仿真,利用gtkwave查看仿真输出波形
- 2019年赣州市赛任务四利用python脚本进行web渗透测试
- 利用MySQL玩转数据分析之基础篇
- C# 利用反射进行深拷贝
- IPython:利用python语言将后缀为ipynb文件中的输出的图片在py文件中编程进行可视化—即如何将IPython.core.display.HTML类型的数据进行图表可视化
- 利用Jenkins实现java-cms项目自动化上线及回滚(七)