首页 > 分享 > MapReduce——统计单词出现次数WordCount

MapReduce——统计单词出现次数WordCount

花匠小妙招
2024-12-17 08:38

一、

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

import java.net.URI;

import java.net.URISyntaxException;

public class ForWorldCount {

public static class ForMapper extends Mapper<LongWritable,Text,Text,IntWritable>{

Text oKey=new Text();

IntWritable oValue=new IntWritable(1);

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

String line=value.toString();

String []strs=line.split(" ");

for(String s:strs){

oKey.set(s);

context.write(oKey,oValue);

}

public static class ForReducer extends Reducer<Text,IntWritable,Text,IntWritable>{

IntWritable oValue=new IntWritable();

@Override

protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

int sum=0;

for(IntWritable i:values){

sum+=i.get();

}

oValue.set(sum);

context.write(key,oValue);

}

public static void main(String[] args) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {

Job job= Job.getInstance();

job.setMapperClass(ForMapper.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(IntWritable.class);

job.setReducerClass(ForReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

FileInputFormat.addInputPath(job,new Path("E://forTestData//forWordCount"));

FileSystem fileSystem=FileSystem.get(new URI("file://E://output"),new Configuration());

Path path=new Path("E://output");

if(fileSystem.exists(path)){

fileSystem.delete(path,true);

}

FileOutputFormat.setOutputPath(job,path);

job.waitForCompletion(true);

}

二、

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

import java.net.URI;

import java.net.URISyntaxException;

import java.util.*;

public class ForSortWordCount {

public static class ForMapper extends Mapper<LongWritable,Text,Text,IntWritable>{

Map<String,Integer> map=new HashMap<String, Integer>();

int maxTimes=0;

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

String line=value.toString();

String strs[]=line.split("t");

String word=strs[0];

int times=Integer.parseInt(strs[1]);

if(times>maxTimes){

map.clear();

map.put(word,times);

maxTimes=times;

}

@Override

protected void cleanup(Context context) throws IOException, InterruptedException {

Map.Entry<String,Integer> entry=map.entrySet().iterator().next();

context.write(new Text(entry.getKey()),new IntWritable(entry.getValue()));

}

public static class ForReducer extends Reducer<Text,IntWritable,Text,IntWritable>{

Map<String,Integer> map=new HashMap<String, Integer>();

@Override

protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

map.put(key.toString(),values.iterator().next().get());

}

@Override

protected void cleanup(Context context) throws IOException, InterruptedException {

List<Map.Entry<String,Integer>> list=new ArrayList<Map.Entry<String,Integer>>(map.entrySet());

Collections.sort(list,new Comparator<Map.Entry<String, Integer>>() {

public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {

if(o1.getValue()==o2.getValue()){

return o2.getKey().length()-o1.getKey().length();

}else{

return o2.getValue()-o1.getValue();

}

}});

Map.Entry<String,Integer> entry=list.get(0);

context.write(new Text(entry.getKey()),new IntWritable(entry.getValue()));

}

public static void main(String[] args) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {

Job job= Job.getInstance();

job.setMapperClass(ForMapper.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(IntWritable.class);

job.setReducerClass(ForReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

FileSystem fileSystem=FileSystem.get(new URI("file:E://output"),new Configuration());

Path path=new Path("E://output");

if(fileSystem.exists(path)){

fileSystem.delete(path,true);

}

FileInputFormat.addInputPath(job,new Path("E://forTestData//forWordCount//forSortWordCount"));

FileOutputFormat.setOutputPath(job,path);

job.setNumReduceTasks(1);

job.waitForCompletion(true);

}

蒜蓉炒西兰花

黑暗料理王蒜蓉炒时蔬有哪10种配方

热点分享

家庭养花知识大全(家庭养花知识大全与技巧)

养花常识养花技巧 1.浇花 ①残茶浇花残茶用来浇花,既能保持土...

养花知识大全,养花技巧大全

养花知识绿萝是一种很常见的盆栽植物，因为四季翠绿、养护简单...

推荐分享

家庭养花风水知识家庭养花“五行说”

许多人喜欢在家庭里面养花，但不是很了解家庭养花风水知识。居家...

家庭养花知识大全家庭养花有什么好处

家庭养花知识大全家庭养花有什么好处爱花之人总是喜欢在家里...

热门点击排行

君子兰什么品种最名贵十大名贵君子兰排名

世界上最名贵的10种兰花图片，莲瓣兰价值高达1500万

分享分类导航

花卉

每日分享

花卉图片

养花生活

MapReduce——统计单词出现次数WordCount

蒜蓉炒西兰花

黑暗料理王蒜蓉炒时蔬有哪10种配方

家庭养花知识大全(家庭养花知识大全与技巧)

养花知识大全,养花技巧大全

家庭养花风水知识 家庭养花“五行说”

家庭养花知识大全 家庭养花有什么好处

家庭养花风水知识家庭养花“五行说”

家庭养花知识大全家庭养花有什么好处