logo资料库

基于MapReduce的Apriori算法代码及其使用.docx

第1页 / 共4页
第2页 / 共4页
第3页 / 共4页
第4页 / 共4页
资料共4页,全文预览结束
package com; import java.io.IOException; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Date; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class Apriori extends Configured implements Tool { enum Counter { LINESKIP, // 出错的行 } public static class MAP extends Mapper { @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // 输入的一行预处理文本 StringTokenizer itr = new StringTokenizer(value.toString()); String[] values = new String[2]; int i = 0; while (itr.hasMoreTokens()) { values[i] = itr.nextToken(); i++; } if (values[0].length() > 0 && values[1].length() > 0) { String dd[] = values[1].split(","); System.out.println(values[1] + "," + dd.length); int[] result;
String output = ""; for (int j = 1; j < dd.length; j++) { Combination c = new Combination(j + 1, dd.length); while ((result = c.next()) != null) { for (i = 0; i < result.length; i++) { // System.out.print(result+""); output += dd[result] + ","; //System.out.print(d[i]+" "); output += dd[result[i]] + ","; // } // System.out.println(); context.write(new Text(output.substring(0, output.length()-1)), new IntWritable(1)); result = null; output = ""; } } } } } public static class REDUCE extends Reducer { @Override public void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { int CNT = 0; for (IntWritable value : values) { CNT += value.get(); } // int min_sup = // Interge.context.getConfiguration().get("min_sup"); int min_sup = 2; if (CNT >= min_sup) context.write(key, new IntWritable(CNT)); } } if (args.length != 2) { System.err path>"); System.exit(-1); public static void main(String[] args) throws Exception { .println("Usage: MaxTemperature
} // 记录开始时间 DateFormat HH:mm:ss"); formatter = new SimpleDateFormat("yyyy-MM-dd Date start = new Date(); // 运行任务 int res = ToolRunner.run(new Configuration(), new Apriori(), args); // 输出任务耗时 Date end = new Date(); float time = (float) ((end.getTime() - start.getTime()) / 60000.0); System.out.println("任务开始:" + formatter.format(start)); System.out.println("任务结束:" + formatter.format(end)); System.out.println("任务耗时:" + String.valueOf(time) + " 分钟"); System.exit(res); } @Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub Configuration conf = getConf(); Job job = new Job(conf, "Apriori"); // 任务名 job.setJarByClass(Apriori.class); // 指定 Class job.setJobName("Apriori map reduce"); FileInputFormat.addInputPath(job, new Path(args[0])); Path path =new Path(args[1]) ; FileSystem.get(conf).delete(path, true); FileOutputFormat.setOutputPath(job, path); job.setMapperClass(MAP.class); job.setReducerClass(REDUCE.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); job.waitForCompletion(true); // 输出任务完成情况 System.out.println("任务名称:" + job.getJobName()); System.out.println("任务成功:" + (job.isSuccessful() ? "是" : "否")); System.out.println("输入行数:" + job.getCounters() System.out.println("输出行数:" + job.getCounters() .findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue()); .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue());
System.out.println("跳过的行:" return job.isSuccessful() ? 0 : 1; + job.getCounters().findCounter(Counter.LINESKIP).getValue()); } } 使用手册: 输入输出路径:hdfs://master:9000/apriori/apriori.txt hdfs://master:9000/apriori/out
分享到:
收藏