《Hadoop倒排索引例子》由会员分享,可在线阅读,更多相关《Hadoop倒排索引例子(4页珍藏版)》请在金锄头文库上搜索。
1、package cn.yws;import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce
2、.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.FileSplit;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;/倒排索引 请在hadoop index_in目录下放置file1,file2测试文件public class MyInv
3、ertedIndex public static class Map extends Mapperprivate Text keyinfo=new Text();private Text valueinfo=new Text();private FileSplit split;/映射Overrideprotected void map(Object key, Text value,Context context)throws IOException, InterruptedException /super.map(key, value, context);/获取文件分词split=(FileS
4、plit) context.getInputSplit();StringTokenizer tokenizer=new StringTokenizer(value.toString();while(tokenizer.hasMoreTokens()int splitindex=split.getPath().toString().indexOf(file);keyinfo.set(tokenizer.nextToken()+:+split.getPath().toString().substring(splitindex);valueinfo.set(1);/file3:1;context.w
5、rite(keyinfo, valueinfo);public static class Combine extends Reducerprivate Text infoText=new Text();Overrideprotected void reduce(Text key, Iterable values,Context context)throws IOException, InterruptedException /super.reduce(key, values, context);int sum=0;for(Text value:values)sum+=Integer.parse
6、Int(value.toString();int splitindex=key.toString().indexOf(:);/file2:1;file3:2;file1:1infoText.set(key.toString().substring(splitindex+1)+:+sum);key.set(key.toString().substring(0,splitindex);context.write(key, infoText);public static class Reduce extends Reducerprivate Text result=new Text();Overri
7、deprotected void reduce(Text key, Iterable values,Context context)throws IOException, InterruptedException /super.reduce(key, values, context);/生成文档列表String filelist=new String();for(Text value:values)filelist+=value.toString()+;result.set(filelist);context.write(key, result);public static void main
8、(String args) try Configuration configuration=new Configuration();/这句话很关键configuration.set(mapred.job.tracker, 192.168.1.15:9001);String ioargs=new Stringindex_in,index_out3;if(args.length=2)ioargs=args;String otherArgs=new GenericOptionsParser(configuration,ioargs).getRemainingArgs();if(otherArgs.l
9、ength!=2)System.err.println(Usage:inverted +MyInvertedIndex.class.getSimpleName()+ );System.exit(2);/启动计算任务Job job=new Job(configuration, MyInvertedIndex.class.getSimpleName();job.setJarByClass(MyInvertedIndex.class);/映射job.setMapperClass(Map.class);/合成job.setCombinerClass(Combine.class);/规约job.setR
10、educerClass(Reduce.class);/设置映射Map输出类型job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(Text.class);/设置reduce规约输出类型job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);/设置输入和输出目录FileInputFormat.addInputPath(job, new Path(otherArgs0);FileOutputFormat.setOutputPath(job, new Path(otherArgs1);System.exit(job.waitForCompletion(true)?0:1); catch (Exception e) e.printStackTrace();