logo资料库

Java实现K-means算法.doc

第1页 / 共2页
第2页 / 共2页
资料共2页,全文预览结束
package org.conan.mymahout.cluster08; /** http://blog.fens.me/hadoop-mahout-kmeans/ **/ import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobConf; import org.apache.mahout.clustering.conversion.InputDriver; import org.apache.mahout.clustering.kmeans.KMeansDriver; import org.apache.mahout.clustering.kmeans.RandomSeedGenerator; import org.apache.mahout.common.distance.DistanceMeasure; import org.apache.mahout.common.distance.EuclideanDistanceMeasure; import org.apache.mahout.utils.clustering.ClusterDumper; import org.conan.mymahout.hdfs.HdfsDAO; import org.conan.mymahout.recommendation.ItemCFHadoop; public class KmeansHadoop { private static final String HDFS = "hdfs://192.168.1.210:9000"; public static void main(String[] args) throws Exception { String localFile = "datafile/randomData.csv"; String inPath = HDFS + "/user/hdfs/mix_data"; String seqFile = inPath + "/seqfile"; String seeds = inPath + "/seeds"; String outPath = inPath + "/result/"; String clusteredPoints = outPath + "/clusteredPoints"; JobConf conf = config(); HdfsDAO hdfs = new HdfsDAO(HDFS, conf); hdfs.rmr(inPath); hdfs.mkdirs(inPath); hdfs.copyFile(localFile, inPath); hdfs.ls(inPath); "org.apache.mahout.math.RandomAccessSparseVector"); InputDriver.runJob(new Path(inPath), new Path(seqFile), int k = 3; Path seqFilePath = new Path(seqFile); Path clustersSeeds = new Path(seeds); DistanceMeasure measure = new EuclideanDistanceMeasure(); clustersSeeds = RandomSeedGenerator.buildRandom(conf, seqFilePath, clustersSeeds, k, measure); measure, 0.01, 10, true, 0.01, false); KMeansDriver.run(conf, seqFilePath, clustersSeeds, new Path(outPath), Path outGlobPath = new Path(outPath, "clusters-*-final"); Path clusteredPointsPath = new Path(clusteredPoints);
clusteredPoints: %s\n", outGlobPath, clusteredPointsPath); System.out.printf("Dumping out clusters from clusters: %s and ClusterDumper clusterDumper = new ClusterDumper(outGlobPath, clusteredPointsPath); clusterDumper.printClusters(null); } public static JobConf config() { JobConf conf = new JobConf(ItemCFHadoop.class); conf.setJobName("ItemCFHadoop"); conf.addResource("classpath:/hadoop/core-site.xml"); conf.addResource("classpath:/hadoop/hdfs-site.xml"); conf.addResource("classpath:/hadoop/mapred-site.xml"); return conf; } } 原始数据: ~ vi datafile/randomData.csv -0.883033363823402 -3.31967192630249 -2.39312626419456 3.34726861118871 2.66976353341256 1.85144276077058 -1.09922906899594 -6.06261735207489 -4.36361936997216 1.90509905380532 -0.00351835125495037 -0.610105996559153 -2.9962958796338 -3.60959839525735 -3.27529418132066 0.0230099799641799 2.17665594420569 6.77290756817957 -2.47862038335637 2.53431833167278 5.53654901906814 2.65089785582474 5.66257474538338 6.86783609641077 -0.558946883114376 1.22332819416237 5.11728525486132 3.74663871584768 1.91240516693351 2.95874731384062 -2.49747101306535 2.05006504756875 3.98781883213459 1.00780938946366 5.47470532716682 5.35084411045171
分享到:
收藏