博客园博客PDF生成器

最新推荐文章于 2024-10-11 13:39:57 发布

weixin_33888907

最新推荐文章于 2024-10-11 13:39:57 发布

阅读量165

点赞数

周末写了一个博客园博客PDF生成器，由于博客园文件上传大小的限制，我把源代码放在优快云上了（想信大家都有帐号哈），如果没有帐号的请留下邮箱，我会尽快发给你，当然如果哪位朋友能帮忙把源代码上传到博客园上更好：博客园博客PDF生成器

废话不多说，直接看生成后的PDF效果哈：

博客中图片效果：

代码比较简单，这里先简单说一下思路，先通过博客地址取得该博客的RSS信息，这是一个XML文件，把源码存在本地，然后解析这个XML文件，从中取出需要的信息，再用iTextSharp这个DLL来操作PDF，从面生成PDF文档。

下面只帖出几个主要的类，大家有兴趣可以下载源代码看：

实体类channel，类属性是从XML文件中取得的：

实体类：

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace BlogsConvert
{
     public class channel
    {
         private string title;
         private string link;
         private string description;
         private string language;
         private DateTime lastBuildDate;
         private DateTime pubDate;
         private int ttl;

         public string Title
        {
             get { return title; }
             set { title = value; }
        }

         public string Link
        {
             get { return link; }
             set { link = value; }
        }

         public string Description
        {
             get { return description; }
             set { description = value; }
        }

         public string Language
        {
             get { return language; }
             set { language = value; }
        }

         public DateTime LastBuildDate
        {
             get { return lastBuildDate; }
             set { lastBuildDate = value; }
        }

         public DateTime PubDate
        {
             get { return pubDate; }
             set { pubDate = value; }
        }

         public int Ttl
        {
             get { return ttl; }
             set { ttl = value; }
        }
    }
}

实体类item（属性来自XML文件）：

实体类：

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace BlogsConvert
{
     public class item
    {
         private string title;
         private string link;
         private string dc_creator;
         private string author;
         private DateTime pubDate;
         private string guid;
         private string description;

         public string Title
        {
             get { return title; }
             set { title = value; }
        }

         public string Link
        {
             get { return link; }
             set { link = value; }
        }

         public string Dc_creator
        {
             get { return dc_creator; }
             set { dc_creator = value; }
        }

         public string Author
        {
             get { return author; }
             set { author = value; }
        }

         public DateTime PubDate
        {
             get { return pubDate; }
             set { pubDate = value; }
        }

         public string Guid
        {
             get { return guid; }
             set { guid = value; }
        }

         public string Description
        {
             get { return description; }
             set { description = value; }
        }
    }
}

从XML文件中提取博客信息类：

代码

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using System.Xml;

namespace BlogsConvert
{
     public class BlogsInfo
    {
         /// <summary>
         /// 从XML文件中取得博主信息
         /// </summary>
         /// <param name="xmlPath"> xml文件路径 </param>
         /// <returns> channel </returns>
         public channel GetChannel( string xmlPath)
        {
            channel cha = new channel();
             // 解析XML文件
            XmlDocument myXml = new XmlDocument();
            myXml.Load(xmlPath);
            XmlNode blogs = myXml.DocumentElement;
            XmlNode node = blogs.ChildNodes[ 0 ];
             if (node.Name == " channel " )
            {
                 foreach (XmlNode chanode in node.ChildNodes)
                {
                     switch (chanode.Name)
                    {
                         case " title " :
                            cha.Title = chanode.InnerText;
                             break ;
                         case " link " :
                            cha.Link = chanode.InnerText;
                             break ;
                         case " description " :
                            cha.Description = chanode.InnerText;
                             break ;
                         case " language " :
                            cha.Language = chanode.InnerText;
                             break ;
                         case " lastBuildDate " :
                            cha.LastBuildDate = DateTime.Parse(chanode.InnerText);
                             break ;
                         case " pubDate " :
                            cha.PubDate = DateTime.Parse(chanode.InnerText);
                             break ;
                         case " ttl " :
                            cha.Ttl = int .Parse(chanode.InnerText);
                             break ;
                    }
                     if (chanode.Name == " item " )
                         break ;
                }
            }
             if (cha.Title.Trim() != "" )
                 return cha;
             return null ;
        }

         /// <summary>
         /// 从XML文件中取得文章信息
         /// </summary>
         /// <param name="xmlPath"> xml文件路径 </param>
         /// <returns> IList </returns>
         public IList < item > GetItems( string xmlPath)
        {
             return GetItems(xmlPath, "" );
        }

         /// <summary>
         /// 从XML文件中取得文章信息
         /// </summary>
         /// <param name="xmlPath"> xml文件路径 </param>
         /// <param name="keyWord"> 按关键字提取博客信息 </param>
         /// <returns> IList </returns>
         public IList < item > GetItems( string xmlPath, string keyWord)
        {
            IList < item > itemList = new List < item > ();
            item temp;
             // 解析XML文件
            XmlDocument myXml = new XmlDocument();
            myXml.Load(xmlPath);
            XmlNode blogs = myXml.DocumentElement;
            XmlNode node = blogs.ChildNodes[ 0 ];
             if (node.Name == " channel " )
            {
                 foreach (XmlNode statusnode in node.ChildNodes)
                {
                     switch (statusnode.Name)
                    {
                         case " item " :
                            temp = new item();
                             bool flag = true ;
                             foreach (XmlNode o in statusnode.ChildNodes)
                            {
                                 if (flag)
                                {
                                     switch (o.Name)
                                    {
                                         case " title " :
                                             if (keyWord.Trim() != "" )
                                            {
                                                 if ( ! o.InnerText.Contains(keyWord))
                                                    flag = false ;
                                            }
                                            temp.Title = o.InnerText;
                                             break ;
                                         case " link " :
                                            temp.Link = o.InnerText;
                                             break ;
                                         case " dc:creator " :
                                            temp.Dc_creator = o.InnerText;
                                             break ;
                                         case " author " :
                                            temp.Author = o.InnerText;
                                             break ;
                                         case " pubDate " :
                                            temp.PubDate = DateTime.Parse(o.InnerText);
                                             break ;
                                         case " guid " :
                                            temp.Guid = o.InnerText;
                                             break ;
                                         case " description " :
                                            temp.Description = o.InnerText;
                                             break ;
                                    }
                                }
                            }
                             if (temp.Link != null )
                                itemList.Add(temp);
                             break ;
                    }
                }
            }
             if (itemList.Count > 0 )
                 return itemList;
             return null ;
        }
    }
}

PDF文件生成类，也是本软件中最重要的一个类，其实就是iTextSharp的运用（这个DLL文件在源代码中有）：

代码

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using iTextSharp.text;
using iTextSharp.text.pdf;
using System.IO;
using System.Text.RegularExpressions;

namespace BlogsConvert
{
     public class ToPdf:IConvert
    {
         #region IConvert 成员

         /// <summary>
         /// 转为PDF
         /// </summary>
         /// <param name="commonInfo"> 博主信息 </param>
         /// <param name="itemList"> 文章信息 </param>
         /// <param name="path"> 生成的PDF文件存放路径 </param>
         public void Convert(channel commonInfo, IList < item > itemList, string path)
        {
             if (commonInfo != null && itemList != null )
            {
                 // 设置页面大小
                Rectangle pageSize = PageSize.A4;
                 // 创建文档对象
                Document document = new Document(pageSize);
                PdfWriter.GetInstance(document, new FileStream(path,FileMode.Create));

                 // 打开文档
                document.Open();

                 // 定义字体
                BaseFont bfSongTi = BaseFont.CreateFont( @" Fonts\SIMHEI.TTF " ,BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
                Font font = new Font(bfSongTi, 12 );

                 // 定义字体
                BaseFont bfSongTiBlod = BaseFont.CreateFont( @" Fonts\SIMHEI.TTF " , BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
                Font fontBlod = new Font(bfSongTiBlod, 15 );

                 // 提示段落
                Paragraph pToop = new Paragraph( new Chunk( " 本文档由程序整理生成（生成时间： " + DateTime.Now + " ） " ,fontBlod));
                 // 1为居中，0为居左，2为居右
                pToop.Alignment = 1 ;
                pToop.SpacingAfter = 20 ;
                document.Add(pToop);

                 // 博客标题
                Paragraph pTitle = new Paragraph( new Phrase(commonInfo.Title, fontBlod));
                pTitle.Alignment = 1 ;
                pTitle.SpacingAfter = 20 ;
                document.Add(pTitle);

                 // 添加博客子标题
                Paragraph pDescription = new Paragraph(commonInfo.Description,font);
                pDescription.Alignment = 0 ;
                 // 行间距（倍）
                pDescription.MultipliedLeading = 2 ;
                pDescription.SpacingAfter = 20 ;
                document.Add(pDescription);

                 // 博客目录
                Paragraph allGuid = new Paragraph( " 目      录 " , fontBlod);
                allGuid.Alignment = 1 ;
                allGuid.SpacingBefore = 10 ;
                document.Add(allGuid);

                 // 添加目录
                Paragraph guid = new Paragraph( "      " );
                guid.MultipliedLeading = 1 ;
                Anchor aTitle;
                 for ( int i = 0 ; i < itemList.Count;i ++ )
                {
                    item o = itemList[i];
                    aTitle = new Anchor( " 第 " + (i + 1 ) + " 篇： " + o.Title,font);
                    aTitle.Reference = " #link " + o.PubDate.ToString();
                    document.Add(aTitle);
                    document.Add(guid);
                }
                document.Add(guid);
                document.Add(guid);
                document.Add(guid);

                 // 文章标题
                Paragraph blogTitle;
                 // 文章内容
                Paragraph blogContent;
                 // 分割线
                Paragraph hr = new Paragraph( " -------------------------------------------------------------------------------------------------------- " );
                hr.Alignment = 1 ;
                hr.SpacingAfter = 20 ;
                hr.SpacingBefore = 20 ;

                 // 提取图片
                 string Content;
                Regex reg = new Regex( @" (?is)(?:<img[^>]*?src|\bbackground)=(?:(['""])(?<img>[^'"">]+)\1|(?<img>[^'""\s>]+)) " );
                MatchCollection mc;
                IList < string > picList;

                 // 内容处理
                 string [] ContentArray;
                Anchor lTitle;
                 int index = 1 ;
                 foreach (var o in itemList)
                {
                    lTitle = new Anchor( " 第 " + index + " 篇： " ,font);
                    lTitle.Name = " link " + o.PubDate.ToString();
                    document.Add(lTitle);
                    index ++ ;
                    blogTitle = new Paragraph(o.Title,fontBlod);
                    blogTitle.Alignment = 1 ;
                    blogTitle.MultipliedLeading = 1 ;
                    document.Add(blogTitle);

                    Content = o.Description;
                    Content = Content.Replace( " <p> " , " 卍 " );
                    Content = Content.Replace( " <br /> " , " 卍 " );
                    Content = Content.Replace( " <br/ /> " , " 卍 " );

                     mc = reg.Matches(Content);
                     picList = new List < string > ();
                     for ( int i = 0 ;i < mc.Count;i ++ )
                    {
                        Match m = mc[i];
                         if ( ! m.Groups[ " img " ].Value.Contains( " OutliningIndicators " ))
                        {
                            picList.Add(m.Groups[ " img " ].Value);
                            Content = Content.Replace(m.Groups[ " img " ].Value, " \ " /> 卍Pic " + m.Groups[ " img " ].Value + " ciP卍 < img src = \ "" );
                        }
                    }

                     // 去掉Html标签
                    Content = NoHTML(Content);

                     // 按文章内容生成段落
                    ContentArray = Content.Split( ' 卍 ' );
                     for ( int i = 0 ; i < ContentArray.Length; i ++ )
                    {
                         for ( int j = 0 ; j < picList.Count; j ++ )
                        {
                             if ( ContentArray[i] == " Pic " + picList[j] + " ciP " )
                            {
                                Image jpeg = Image.GetInstance(picList[j]);
                                 if (jpeg.Width > PageSize.A4.Width)
                                {
                                    jpeg.ScaleAbsolute(PageSize.A4.Width, jpeg.Width * jpeg.Height / PageSize.A4.Width);
                                }
                                jpeg.Alignment = Image.MIDDLE_ALIGN;
                                document.Add(jpeg);
                                ContentArray[i] = " PicDRJciP " ;
                            }
                        }
                         if (ContentArray[i] != " PicDRJciP " )
                        {
                            blogContent = new Paragraph(ContentArray[i], font);
                            blogContent.Alignment = 0 ;
                            blogContent.MultipliedLeading = 2 ;
                            blogContent.SpacingAfter = 10 ;
                            document.Add(blogContent);
                        }
                    }
                    document.Add(hr);
                }

                 // 提示信息
                Paragraph drj = new Paragraph( new Chunk( " 本程序由博客园——天行健(http://home.cnblogs.com/u/durongjian/)制作，如有建议请发邮件至drjchina@163.com " , font));
                 // 1为居中，0为居左，2为居右
                drj.Alignment = 1 ;
                drj.SpacingAfter = 20 ;
                drj.SpacingBefore = 20 ;
                document.Add(drj);

                 // 关闭文档
                document.Close();
            }
        }

         /// <summary>
         /// 去掉HTML标签
         /// </summary>
         /// <param name="Htmlstring"> 带有HTML标签的字符串 </param>
         /// <returns> string </returns>
         public static string NoHTML( string Htmlstring)
        {
            Htmlstring = Regex.Replace(Htmlstring, @" <script[^>]*?>.*?</script> " , "" , RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @" <(.[^>]*)> " , "" , RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @" ([\r\n])[\s]+ " , "" , RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @" --> " , "" , RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @" <!--.* " , "" , RegexOptions.IgnoreCase);

            Htmlstring = Regex.Replace(Htmlstring, @" &(quot|#34); " , " \ "" , RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @" &(amp|#38); " , " & " , RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @" &(lt|#60); " , " < " , RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @" &(gt|#62); " , " > " , RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @" &(nbsp|#160); " , " " , RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @" &(iexcl|#161); " , " \xa1 " , RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @" &(cent|#162); " , " \xa2 " , RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @" &(pound|#163); " , " \xa3 " , RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @" &(copy|#169); " , " \xa9 " , RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @" &#(\d+); " , "" , RegexOptions.IgnoreCase);

            Htmlstring.Replace( " < " , "" );
            Htmlstring.Replace( " > " , "" );
            Htmlstring.Replace( " \r\n " , "" );

             return Htmlstring.Trim();
        }

         #endregion
    }
}

最后就是调用类了，先看一下软件界面吧：

后台代码：

代码

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using BlogsConvert;
using System.Net;
using System.IO;

namespace CnBlogsHelper
{
     public partial class BlogToPdf : Form
    {
         public channel commonInfo = new channel();
         public IList < item > blogInfos = new List < item > ();
         public BlogToPdf()
        {
            InitializeComponent();
        }

         private void BlogToPdf_Load( object sender, EventArgs e)
        {
        }

         /// <summary>
         /// 获取RSS源码，存入XML文件中
         /// </summary>
         /// <param name="PageUrl"> XML文件路径 </param>
         public void GetXML( string PageUrl)
        {
             // 发送GET请求，得到XML格式的数据
            WebRequest request = WebRequest.Create(PageUrl);
            WebResponse response = request.GetResponse();
            Stream resStream = response.GetResponseStream();
            StreamReader sr = new StreamReader(resStream, System.Text.Encoding.GetEncoding( " GB2312 " ));
             string Content = sr.ReadToEnd();
             string xmlPath = Application.StartupPath + @" \Blogs.xml " ;

             // 如果XML文件不存在就创建
             if ( ! System.IO.File.Exists(xmlPath))
            {
                System.IO.FileStream f = System.IO.File.Create(xmlPath);
                f.Close();
            }
             // 以覆盖的形式把数据写入XML文件
            System.IO.StreamWriter f2 = new System.IO.StreamWriter(xmlPath, false , System.Text.Encoding.GetEncoding( " UTF-8 " ));
            f2.Write(Content);
            f2.Close();
            f2.Dispose();
            sr.Close();
            resStream.Close();

             if (Content.Trim() == "" )
            {
                 throw new Exception( " 用户名有误，请检查后重新输入! " );
            }
        }

         /// <summary>
         /// 生成PDF文件
         /// </summary>
         /// <param name="saveName"> 生成的PDF文件名 </param>
         /// <param name="cha"> 博主信息 </param>
         /// <param name="itemList"> 文章信息 </param>
         public void CreatePDF( string saveName,channel cha,IList < item > itemList)
        {
            BlogsInfo blog = new BlogsInfo();
            IConvert con = new ToPdf();
             string dir = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);
            con.Convert(cha,itemList,dir + " \\ " + saveName + " .pdf " );
        }

         // 生成事件
         private void btnCreate_Click( object sender, EventArgs e)
        {
             if ( ! CheckForm())
                 return ;
             try
            {
                 if (blogInfos.Count > 0 )
                {
                    Wait f = new Wait();
                    f.Show();
                    Application.DoEvents();

                    CreatePDF(txtFileName.Text.Trim(), commonInfo, blogInfos);

                    f.Close();
                    MessageBox.Show( " PDF文档“ " + txtFileName.Text.Trim() + " .pdf”生成成功,文档在桌面! " );
                }
                 else
                {
                    MessageBox.Show( " 博客数为0，请先提取博客信息！ " );
                }
            }
             catch (Exception ex)
            {
                MessageBox.Show( " 异常信息: " + ex.Message);
            }
        }

         // 提取博客信息事件
         private void btnFind_Click( object sender, EventArgs e)
        {
             if ( ! CheckForm())
                 return ;
            libBlog.Items.Clear();
             string pageUrl = txtBlogUrl.Text.Trim();
             if (pageUrl.Substring(pageUrl.Length - 1 , 1 ) != " / " )
            {
                pageUrl = pageUrl + @" / " ;
            }
            pageUrl = pageUrl + " rss " ;
             try
            {
                 // 弹出等待窗体
                Wait f = new Wait();
                f.Show();
                Application.DoEvents();

                GetXML(pageUrl);
                 string path = Application.StartupPath + @" \Blogs.xml " ;
                BlogsInfo blogInfo = new BlogsInfo();
                commonInfo = blogInfo.GetChannel(path);
                blogInfos = blogInfo.GetItems(path, txtKeyWord.Text.Trim() == " 请输入标题中的关键字 " ? "" :txtKeyWord.Text.Trim());

                 foreach (item o in blogInfos)
                {
                    libBlog.Items.Add(o.Title);
                }

                f.Close();
            }
             catch (Exception ex)
            {
                MessageBox.Show( " 异常信息: " + ex.Message);
            }
        }

         // 清空事件
         private void btnClearAll_Click( object sender, EventArgs e)
        {
            libBlog.Items.Clear();
            blogInfos.Clear();
        }

         // 删除当前选中项事件
         private void btnClearCurrent_Click( object sender, EventArgs e)
        {
             int index = libBlog.SelectedIndex;
            libBlog.Items.Remove(libBlog.Items[index]);
            blogInfos.RemoveAt(index);
        }

         // 鼠标进入文本框清空默认文本
         private void txtKeyWord_Click( object sender, EventArgs e)
        {
            txtKeyWord.Text = txtKeyWord.Text.Trim() == " 请输入标题中的关键字 " ? "" :txtKeyWord.Text;
        }

         private bool CheckForm()
        {
             if (txtBlogUrl.Text.Trim() == "" || txtFileName.Text.Trim() == "" )
            {
                MessageBox.Show( " 博客地址和保存文件名不能为空! " );
                txtBlogUrl.Text = " http://www.cnblogs.com/ " ;
                txtFileName.Text = " 我的博客 " ;
                 return false ;
            }
             return true ;
        }
    }
}

其中调用了一个等待窗体Wait，非常简单，这里就不说了，大家可以看源代码。

博客园中高手如云，本人只能算个菜，只是把自己写的一点小东西拿出来跟大家分享，希望能帮到大家，欢迎各位朋友批评指正，如果使用过程中有错误请留言哦。

本软件目地是服务博客园的朋友们，源代码完全开源，但转载或二次开发请注明出处。