PageRank Hadoop MapReduce

本文介绍了一种使用MapReduce框架实现的PageRank算法,该算法基于Hadoop平台进行网页排名计算。通过迭代处理链接关系文件,逐步调整每个网页的重要性权重。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

这里写图片描述

links.txt

链接关系

A B C D
B A D
C C
D B C

part-r-00000 初试概率分布向量

a=0.8

A a 0.25
B a 0.25
C a 0.25
D a 0.25

PageRankMapReduce

package org.bigdata.pagerank;

import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.bigdata.util.HadoopCfg;
import org.bigdata.util.HadoopUtil;

/**
 * PageRank
 * 
 * @author wwhhf
 * 
 */
public class PageRankMapReduce {

    private final static String JOB_NAME = "PageRank";
    private static String LINKS = "links";

    private static Map<String, Double> rand = new HashMap<String, Double>();

    private static final double a = 0.8;

    public static void initRand(String pathin, String filename)
            throws IOException {
        List<String> lines = HadoopUtil.lslFile(pathin, filename);
        for (String line : lines) {
            String terms[] = line.toString().split("\t");
            rand.put(terms[0], Double.valueOf(terms[1]));
        }
    }

    private static class PageRankMapper extends
            Mapper<Text, Text, Text, DoubleWritable> {

        @Override
        protected void map(Text key, Text value, Context context)
                throws IOException, InterruptedException {
            String filename = key.toString();
            if (filename.startsWith(LINKS)) {
                String dests[] = value.toString().split(" ");
                double e = rand.get(dests[0]);
                for (int i = 0, len = dests.length; i < len; i++) {
                    String dest = dests[i];
                    if (i == 0) {
                        context.write(new Text(dest), new DoubleWritable(0.0));
                    } else {
                        context.write(new Text(dest), new DoubleWritable(e
                                / (len - 1)));
                    }
                }
            }
        }

    }

    private static class PageRankReducer extends
            Reducer<Text, DoubleWritable, Text, DoubleWritable> {

        @Override
        protected void reduce(Text key, Iterable<DoubleWritable> values,
                Context context) throws IOException, InterruptedException {
            Double sum = 0.0;
            for (DoubleWritable value : values) {
                sum += value.get();
            }
            double e = rand.get(key.toString());
            context.write(key, new DoubleWritable(a * sum + (1 - a) * e));
        }

    }

    public static void solve(String linksin, String pathin, String pathout)
            throws ClassNotFoundException, InterruptedException {
        try {
            Configuration cfg = HadoopCfg.getConfiguration();
            Job job = Job.getInstance(cfg);
            job.setJobName(JOB_NAME);
            job.setJarByClass(PageRankMapReduce.class);
            job.setInputFormatClass(FileNameInputFormat.class);

            // mapper
            job.setMapperClass(PageRankMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(DoubleWritable.class);

            // reducer
            job.setReducerClass(PageRankReducer.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(DoubleWritable.class);

            FileInputFormat.addInputPath(job, new Path(pathin));
            FileInputFormat.addInputPath(job, new Path(linksin));
            FileOutputFormat.setOutputPath(job, new Path(pathout));

            job.waitForCompletion(true);

        } catch (IllegalStateException | IllegalArgumentException | IOException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) throws ClassNotFoundException,
            InterruptedException, IOException {
        String path = "/pagerank";
        String links_pathin = "/pagerank_links";
        String filename = "part-r-00000";
        String tmp_pathin = path;
        for (int i = 1; i <= 5; i++) {
            initRand(tmp_pathin, filename);
            String tmp_pathout = path + i;
            System.out.println(links_pathin + " " + tmp_pathin + " "
                    + tmp_pathout);
            solve(links_pathin, tmp_pathin, tmp_pathout);
            tmp_pathin = tmp_pathout;
        }

    }
}
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值