博客园博客PDF生成器

      周末写了一个博客园博客PDF生成器,由于博客园文件上传大小的限制,我把源代码放在优快云上了(想信大家都有帐号哈),如果没有帐号的请留下邮箱,我会尽快发给你,当然如果哪位朋友能帮忙把源代码上传到博客园上更好:博客园博客PDF生成器 

      废话不多说,直接看生成后的PDF效果哈:

 

博客中图片效果:

 

      代码比较简单,这里先简单说一下思路,先通过博客地址取得该博客的RSS信息,这是一个XML文件,把源码存在本地,然后解析这个XML文件,从中取出需要的信息,再用iTextSharp这个DLL来操作PDF,从面生成PDF文档。

      下面只帖出几个主要的类,大家有兴趣可以下载源代码看:

      实体类channel,类属性是从XML文件中取得的:

实体类:
using  System;
using  System.Collections.Generic;
using  System.Linq;
using  System.Text;

namespace  BlogsConvert
{
    
public   class  channel
    {
        
private   string  title;
        
private   string  link;
        
private   string  description;
        
private   string  language;
        
private  DateTime lastBuildDate;
        
private  DateTime pubDate;
        
private   int  ttl;

        
public   string  Title
        {
            
get  {  return  title; }
            
set  { title  =  value; }
        }

        
public   string  Link
        {
            
get  {  return  link; }
            
set  { link  =  value; }
        }

        
public   string  Description
        {
            
get  {  return  description; }
            
set  { description  =  value; }
        }

        
public   string  Language
        {
            
get  {  return  language; }
            
set  { language  =  value; }
        }

        
public  DateTime LastBuildDate
        {
            
get  {  return  lastBuildDate; }
            
set  { lastBuildDate  =  value; }
        }

        
public  DateTime PubDate
        {
            
get  {  return  pubDate; }
            
set  { pubDate  =  value; }
        }

        
public   int  Ttl
        {
            
get  {  return  ttl; }
            
set  { ttl  =  value; }
        }
    }
}

 

 

      实体类item(属性来自XML文件):

 

实体类:
using  System;
using  System.Collections.Generic;
using  System.Linq;
using  System.Text;

namespace  BlogsConvert
{
    
public   class  item
    {
        
private   string  title;
        
private   string  link;
        
private   string  dc_creator;
        
private   string  author;
        
private  DateTime pubDate;
        
private   string  guid;
        
private   string  description;

        
public   string  Title
        {
            
get  {  return  title; }
            
set  { title  =  value; }
        }

        
public   string  Link
        {
            
get  {  return  link; }
            
set  { link  =  value; }
        }

        
public   string  Dc_creator
        {
            
get  {  return  dc_creator; }
            
set  { dc_creator  =  value; }
        }

        
public   string  Author
        {
            
get  {  return  author; }
            
set  { author  =  value; }
        }

        
public  DateTime PubDate
        {
            
get  {  return  pubDate; }
            
set  { pubDate  =  value; }
        }

        
public   string  Guid
        {
            
get  {  return  guid; }
            
set  { guid  =  value; }
        }

        
public   string  Description
        {
            
get  {  return  description; }
            
set  { description  =  value; }
        }
    }
}

 

 

      从XML文件中提取博客信息类:

 

代码
using  System;
using  System.Collections.Generic;
using  System.Linq;
using  System.Text;
using  System.Xml.Linq;
using  System.Xml;

namespace  BlogsConvert
{
    
public   class  BlogsInfo
    {
        
///   <summary>
        
///  从XML文件中取得博主信息
        
///   </summary>
        
///   <param name="xmlPath"> xml文件路径 </param>
        
///   <returns> channel </returns>
         public  channel GetChannel( string  xmlPath)
        {
            channel cha
= new  channel();
            
// 解析XML文件
            XmlDocument myXml  =   new  XmlDocument();
            myXml.Load(xmlPath);
            XmlNode blogs 
=  myXml.DocumentElement;
            XmlNode node
= blogs.ChildNodes[ 0 ];
            
if  (node.Name  ==   " channel " )
            {
                
foreach  (XmlNode chanode  in  node.ChildNodes)
                {
                    
switch  (chanode.Name)
                    {
                        
case   " title " :
                            cha.Title 
=  chanode.InnerText;
                            
break ;
                        
case   " link " :
                            cha.Link 
=  chanode.InnerText;
                            
break ;
                        
case   " description " :
                            cha.Description 
=  chanode.InnerText;
                            
break ;
                        
case   " language " :
                            cha.Language 
=  chanode.InnerText;
                            
break ;
                        
case   " lastBuildDate " :
                            cha.LastBuildDate 
=  DateTime.Parse(chanode.InnerText);
                            
break ;
                        
case   " pubDate " :
                            cha.PubDate 
=  DateTime.Parse(chanode.InnerText);
                            
break ;
                        
case   " ttl " :
                            cha.Ttl 
=   int .Parse(chanode.InnerText);
                            
break ;
                    }
                    
if  (chanode.Name  ==   " item " )
                        
break ;
                }
            }
            
if  (cha.Title.Trim() != "" )
                
return  cha;
            
return   null ;
        }

        
///   <summary>
        
///  从XML文件中取得文章信息
        
///   </summary>
        
///   <param name="xmlPath"> xml文件路径 </param>
        
///   <returns> IList </returns>
         public  IList < item >  GetItems( string  xmlPath)
        {
            
return  GetItems(xmlPath, "" );
        }

        
///   <summary>
        
///  从XML文件中取得文章信息
        
///   </summary>
        
///   <param name="xmlPath"> xml文件路径 </param>
        
///   <param name="keyWord"> 按关键字提取博客信息 </param>
        
///   <returns> IList </returns>
         public  IList < item >  GetItems( string  xmlPath, string  keyWord)
        {
            IList
< item >  itemList  =   new  List < item > ();
            item temp;
            
// 解析XML文件
            XmlDocument myXml  =   new  XmlDocument();
            myXml.Load(xmlPath);
            XmlNode blogs 
=  myXml.DocumentElement;
            XmlNode node 
=  blogs.ChildNodes[ 0 ];
            
if  (node.Name  ==   " channel " )
            {
                
foreach  (XmlNode statusnode  in  node.ChildNodes)
                {
                    
switch  (statusnode.Name)
                    {
                        
case   " item " :
                            temp
= new  item();
                            
bool  flag  =   true ;
                            
foreach  (XmlNode o  in  statusnode.ChildNodes)
                            {
                                
if  (flag)
                                {
                                    
switch  (o.Name)
                                    {
                                        
case   " title " :
                                            
if  (keyWord.Trim()  !=   "" )
                                            {
                                                
if  ( ! o.InnerText.Contains(keyWord))
                                                    flag 
=   false ;
                                            }
                                            temp.Title 
=  o.InnerText;
                                            
break ;
                                        
case   " link " :
                                            temp.Link 
=  o.InnerText;
                                            
break ;
                                        
case   " dc:creator " :
                                            temp.Dc_creator 
=  o.InnerText;
                                            
break ;
                                        
case   " author " :
                                            temp.Author 
=  o.InnerText;
                                            
break ;
                                        
case   " pubDate " :
                                            temp.PubDate 
=  DateTime.Parse(o.InnerText);
                                            
break ;
                                        
case   " guid " :
                                            temp.Guid 
=  o.InnerText;
                                            
break ;
                                        
case   " description " :
                                            temp.Description 
=  o.InnerText;
                                            
break ;
                                    }
                                }
                            }
                            
if (temp.Link != null )
                                itemList.Add(temp);
                            
break ;
                    }
                }
            }
            
if (itemList.Count > 0 )
                
return  itemList;
            
return   null ;
        }
    }
}

 

 

        PDF文件生成类,也是本软件中最重要的一个类,其实就是iTextSharp的运用(这个DLL文件在源代码中有):

 

代码
using  System;
using  System.Collections.Generic;
using  System.Linq;
using  System.Text;
using  iTextSharp.text;
using  iTextSharp.text.pdf;
using  System.IO;
using  System.Text.RegularExpressions;

namespace  BlogsConvert
{
    
public   class  ToPdf:IConvert
    {
        
#region  IConvert 成员

        
///   <summary>
        
///  转为PDF
        
///   </summary>
        
///   <param name="commonInfo"> 博主信息 </param>
        
///   <param name="itemList"> 文章信息 </param>
        
///   <param name="path"> 生成的PDF文件存放路径 </param>
         public   void  Convert(channel commonInfo, IList < item >  itemList, string  path)
        {
            
if  (commonInfo  !=   null   &&  itemList  !=   null )
            {
                
// 设置页面大小
                Rectangle pageSize  =  PageSize.A4;
                
// 创建文档对象
                Document document  =   new  Document(pageSize);
                PdfWriter.GetInstance(document,
new  FileStream(path,FileMode.Create));

                
// 打开文档
                document.Open();

                
// 定义字体
                BaseFont bfSongTi  =  BaseFont.CreateFont( @" Fonts\SIMHEI.TTF " ,BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
                Font font 
=   new  Font(bfSongTi,  12 );

                
// 定义字体
                BaseFont bfSongTiBlod  =  BaseFont.CreateFont( @" Fonts\SIMHEI.TTF " , BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
                Font fontBlod 
=   new  Font(bfSongTiBlod,  15 );

                
// 提示段落
                Paragraph pToop = new  Paragraph( new  Chunk( " 本文档由程序整理生成(生成时间: " + DateTime.Now + " " ,fontBlod));
                
// 1为居中,0为居左,2为居右
                pToop.Alignment  =   1 ;
                pToop.SpacingAfter 
=   20 ;
                document.Add(pToop);

                
// 博客标题
                Paragraph pTitle  =   new  Paragraph( new  Phrase(commonInfo.Title, fontBlod));
                pTitle.Alignment 
=   1 ;
                pTitle.SpacingAfter 
=   20 ;
                document.Add(pTitle);

                
// 添加博客子标题
                Paragraph pDescription = new  Paragraph(commonInfo.Description,font);
                pDescription.Alignment 
=   0 ;
                
// 行间距(倍)
                pDescription.MultipliedLeading  =   2 ;
                pDescription.SpacingAfter 
=   20 ;
                document.Add(pDescription);

                
// 博客目录
                Paragraph allGuid  =   new  Paragraph( " 目      录 " , fontBlod);
                allGuid.Alignment 
=   1 ;
                allGuid.SpacingBefore 
=   10 ;
                document.Add(allGuid);

                
// 添加目录
                Paragraph guid = new  Paragraph( "      " );
                guid.MultipliedLeading 
=   1 ;
                Anchor aTitle;
                
for  ( int  i  =   0 ; i  <  itemList.Count;i ++  )
                {
                    item o 
=  itemList[i];
                    aTitle 
=   new  Anchor( " " + (i + 1 ) + " 篇:  " + o.Title,font);
                    aTitle.Reference 
=   " #link "   +  o.PubDate.ToString();
                    document.Add(aTitle);
                    document.Add(guid);
                }
                document.Add(guid);
                document.Add(guid);
                document.Add(guid);

                
// 文章标题
                Paragraph blogTitle;
                
// 文章内容
                Paragraph blogContent;
                
// 分割线
                Paragraph hr = new  Paragraph( " -------------------------------------------------------------------------------------------------------- " );
                hr.Alignment
= 1 ;
                hr.SpacingAfter
= 20 ;
                hr.SpacingBefore
= 20 ;

                
// 提取图片
                 string  Content;
                Regex reg 
=   new  Regex( @" (?is)(?:<img[^>]*?src|\bbackground)=(?:(['""])(?<img>[^'"">]+)\1|(?<img>[^'""\s>]+)) " );
                MatchCollection mc;
                IList
< string >  picList;

                
// 内容处理
                 string [] ContentArray;
                Anchor lTitle;
                
int  index  =   1 ;
                
foreach  (var o  in  itemList)
                {
                    lTitle 
=   new  Anchor( " " + index + " 篇: " ,font);
                    lTitle.Name 
=   " link "   +  o.PubDate.ToString();
                    document.Add(lTitle);
                    index
++ ;
                    blogTitle
= new  Paragraph(o.Title,fontBlod);
                    blogTitle.Alignment 
=   1 ;
                    blogTitle.MultipliedLeading 
=   1 ;                    
                    document.Add(blogTitle);

                    Content 
=  o.Description;
                    Content 
=  Content.Replace( " <p> " " " );
                    Content 
=  Content.Replace( " <br /> " " " );
                    Content 
=  Content.Replace( " <br/ /> " " " );

                     mc
=  reg.Matches(Content);
                     picList 
=   new  List < string > ();
                    
for ( int  i = 0 ;i < mc.Count;i ++ )
                    {
                        Match m
= mc[i];
                        
if  ( ! m.Groups[ " img " ].Value.Contains( " OutliningIndicators " ))
                        {
                            picList.Add(m.Groups[
" img " ].Value);
                            Content 
=  Content.Replace(m.Groups[ " img " ].Value,  " \ "   /> 卍Pic "  + m.Groups[ " img " ].Value +  " ciP卍 < img src = \ "" );
                        }
                    }

                    
// 去掉Html标签
                    Content  =  NoHTML(Content);

                    
// 按文章内容生成段落
                    ContentArray  =  Content.Split( ' ' );
                    
for  ( int  i  =   0 ; i  <  ContentArray.Length; i ++ )
                    {
                        
for  ( int  j  =   0 ; j  <  picList.Count; j ++ )
                        {
                            
if  ( ContentArray[i]  ==   " Pic "   + picList[j]  +   " ciP " )
                            {
                                Image jpeg 
=  Image.GetInstance(picList[j]);
                                
if  (jpeg.Width  >  PageSize.A4.Width)
                                {
                                    jpeg.ScaleAbsolute(PageSize.A4.Width, jpeg.Width 
*  jpeg.Height  /  PageSize.A4.Width);
                                }
                                jpeg.Alignment 
=  Image.MIDDLE_ALIGN;
                                document.Add(jpeg);
                                ContentArray[i] 
=   " PicDRJciP " ;
                            }
                        }
                        
if  (ContentArray[i]  !=   " PicDRJciP " )
                        {
                            blogContent 
=   new  Paragraph(ContentArray[i], font);
                            blogContent.Alignment 
=   0 ;
                            blogContent.MultipliedLeading 
=   2 ;
                            blogContent.SpacingAfter 
=   10 ;
                            document.Add(blogContent);
                        }
                    }
                    document.Add(hr);
                }

                
// 提示信息
                Paragraph drj  =   new  Paragraph( new  Chunk( " 本程序由博客园——天行健(http://home.cnblogs.com/u/durongjian/)制作,如有建议请发邮件至drjchina@163.com " , font));
                
// 1为居中,0为居左,2为居右
                drj.Alignment  =   1 ;
                drj.SpacingAfter 
=   20 ;
                drj.SpacingBefore 
=   20 ;
                document.Add(drj);

                
// 关闭文档
                document.Close();
            }
        }

        
///   <summary>
        
///  去掉HTML标签
        
///   </summary>
        
///   <param name="Htmlstring"> 带有HTML标签的字符串 </param>
        
///   <returns> string </returns>
         public   static   string  NoHTML( string  Htmlstring)
        {
            Htmlstring 
=  Regex.Replace(Htmlstring,  @" <script[^>]*?>.*?</script> " "" , RegexOptions.IgnoreCase);
            Htmlstring 
=  Regex.Replace(Htmlstring,  @" <(.[^>]*)> " "" , RegexOptions.IgnoreCase);
            Htmlstring 
=  Regex.Replace(Htmlstring,  @" ([\r\n])[\s]+ " "" , RegexOptions.IgnoreCase);
            Htmlstring 
=  Regex.Replace(Htmlstring,  @" --> " "" , RegexOptions.IgnoreCase);
            Htmlstring 
=  Regex.Replace(Htmlstring,  @" <!--.* " "" , RegexOptions.IgnoreCase);

            Htmlstring 
=  Regex.Replace(Htmlstring,  @" &(quot|#34); " " \ "" , RegexOptions.IgnoreCase);
            Htmlstring  =  Regex.Replace(Htmlstring,  @" &(amp|#38); " " & " , RegexOptions.IgnoreCase);
            Htmlstring 
=  Regex.Replace(Htmlstring,  @" &(lt|#60); " " < " , RegexOptions.IgnoreCase);
            Htmlstring 
=  Regex.Replace(Htmlstring,  @" &(gt|#62); " " > " , RegexOptions.IgnoreCase);
            Htmlstring 
=  Regex.Replace(Htmlstring,  @" &(nbsp|#160); " "   " , RegexOptions.IgnoreCase);
            Htmlstring 
=  Regex.Replace(Htmlstring,  @" &(iexcl|#161); " " \xa1 " , RegexOptions.IgnoreCase);
            Htmlstring 
=  Regex.Replace(Htmlstring,  @" &(cent|#162); " " \xa2 " , RegexOptions.IgnoreCase);
            Htmlstring 
=  Regex.Replace(Htmlstring,  @" &(pound|#163); " " \xa3 " , RegexOptions.IgnoreCase);
            Htmlstring 
=  Regex.Replace(Htmlstring,  @" &(copy|#169); " " \xa9 " , RegexOptions.IgnoreCase);
            Htmlstring 
=  Regex.Replace(Htmlstring,  @" &#(\d+); " "" , RegexOptions.IgnoreCase);

            Htmlstring.Replace(
" < " "" );
            Htmlstring.Replace(
" > " "" );
            Htmlstring.Replace(
" \r\n " "" );

            
return  Htmlstring.Trim();
        }

        
#endregion
    }
}

 

 

      最后就是调用类了,先看一下软件界面吧:

      后台代码:

 

代码
using  System;
using  System.Collections.Generic;
using  System.ComponentModel;
using  System.Data;
using  System.Drawing;
using  System.Linq;
using  System.Text;
using  System.Windows.Forms;
using  BlogsConvert;
using  System.Net;
using  System.IO;

namespace  CnBlogsHelper
{
    
public   partial   class  BlogToPdf : Form
    {
        
public  channel commonInfo = new  channel();
        
public  IList < item >  blogInfos = new  List < item > ();
        
public  BlogToPdf()
        {
            InitializeComponent();
        }

        
private   void  BlogToPdf_Load( object  sender, EventArgs e)
        {
        }

        
///   <summary>
        
///  获取RSS源码,存入XML文件中
        
///   </summary>
        
///   <param name="PageUrl"> XML文件路径 </param>
         public   void  GetXML( string  PageUrl)
        {
            
// 发送GET请求,得到XML格式的数据
            WebRequest request  =  WebRequest.Create(PageUrl);
            WebResponse response 
=  request.GetResponse();
            Stream resStream 
=  response.GetResponseStream();
            StreamReader sr 
=   new  StreamReader(resStream, System.Text.Encoding.GetEncoding( " GB2312 " ));
            
string  Content  =  sr.ReadToEnd();
            
string  xmlPath  = Application.StartupPath +   @" \Blogs.xml " ;

            
// 如果XML文件不存在就创建
             if  ( ! System.IO.File.Exists(xmlPath))
            {
                System.IO.FileStream f 
=  System.IO.File.Create(xmlPath);
                f.Close();
            }
            
// 以覆盖的形式把数据写入XML文件
            System.IO.StreamWriter f2  =   new  System.IO.StreamWriter(xmlPath,  false , System.Text.Encoding.GetEncoding( " UTF-8 " ));
            f2.Write(Content);
            f2.Close();
            f2.Dispose();
            sr.Close();
            resStream.Close();

            
if  (Content.Trim()  ==   "" )
            {
                
throw   new  Exception( " 用户名有误,请检查后重新输入! " );
            }
        }

        
///   <summary>
        
///  生成PDF文件
        
///   </summary>
        
///   <param name="saveName"> 生成的PDF文件名 </param>
        
///   <param name="cha"> 博主信息 </param>
        
///   <param name="itemList"> 文章信息 </param>
         public   void  CreatePDF( string  saveName,channel cha,IList < item >  itemList)
        {
            BlogsInfo blog 
=   new  BlogsInfo();
            IConvert con 
=   new  ToPdf();
            
string  dir  =  Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);
            con.Convert(cha,itemList,dir
+ " \\ " + saveName + " .pdf " );
        }

        
// 生成事件
         private   void  btnCreate_Click( object  sender, EventArgs e)
        {
            
if  ( ! CheckForm())
                
return ;
            
try
            {
                
if  (blogInfos.Count  >   0 )
                {
                    Wait f 
=   new  Wait();
                    f.Show();
                    Application.DoEvents();

                    CreatePDF(txtFileName.Text.Trim(), commonInfo, blogInfos);

                    f.Close();
                    MessageBox.Show(
" PDF文档“ "   +  txtFileName.Text.Trim()  +   " .pdf”生成成功,文档在桌面! " );
                }
                
else
                {
                    MessageBox.Show(
" 博客数为0,请先提取博客信息! " );
                }
            }
            
catch  (Exception ex)
            {
                MessageBox.Show(
" 异常信息: " + ex.Message);
            }
        }

        
// 提取博客信息事件
         private   void  btnFind_Click( object  sender, EventArgs e)
        {
            
if  ( ! CheckForm())
                
return ;
            libBlog.Items.Clear();
            
string  pageUrl  =  txtBlogUrl.Text.Trim();
            
if  (pageUrl.Substring(pageUrl.Length  -   1 1 !=   " / " )
            {
                pageUrl 
=  pageUrl  +   @" / " ;
            }
            pageUrl 
=  pageUrl  +   " rss " ;
            
try
            {
                
// 弹出等待窗体
                Wait f  =   new  Wait();
                f.Show();
                Application.DoEvents();

                GetXML(pageUrl);
                
string  path  =  Application.StartupPath  +   @" \Blogs.xml " ;
                BlogsInfo blogInfo 
=   new  BlogsInfo();
                commonInfo 
=  blogInfo.GetChannel(path);
                blogInfos 
=  blogInfo.GetItems(path, txtKeyWord.Text.Trim()  ==   " 请输入标题中的关键字 " ? "" :txtKeyWord.Text.Trim());

                
foreach  (item o  in  blogInfos)
                {
                    libBlog.Items.Add(o.Title);
                }

                f.Close();
            }
            
catch  (Exception ex)
            {
                MessageBox.Show(
" 异常信息: "   +  ex.Message);
            }
        }

        
// 清空事件
         private   void  btnClearAll_Click( object  sender, EventArgs e)
        {
            libBlog.Items.Clear();
            blogInfos.Clear();
        }

        
// 删除当前选中项事件
         private   void  btnClearCurrent_Click( object  sender, EventArgs e)
        {
            
int  index = libBlog.SelectedIndex;
            libBlog.Items.Remove(libBlog.Items[index]);
            blogInfos.RemoveAt(index);
        }

        
// 鼠标进入文本框清空默认文本
         private   void  txtKeyWord_Click( object  sender, EventArgs e)
        {
            txtKeyWord.Text 
=  txtKeyWord.Text.Trim()  ==   " 请输入标题中的关键字 " ? "" :txtKeyWord.Text;
        }

        
private   bool  CheckForm()
        {
            
if  (txtBlogUrl.Text.Trim()  ==   ""   ||  txtFileName.Text.Trim()  ==   "" )
            {
                MessageBox.Show(
" 博客地址和保存文件名不能为空! " );
                txtBlogUrl.Text 
=   " http://www.cnblogs.com/ " ;
                txtFileName.Text 
=   " 我的博客 " ;
                
return   false ;
            }
            
return   true ;
        }
    }
}

 

      其中调用了一个等待窗体Wait,非常简单,这里就不说了,大家可以看源代码。

      博客园中高手如云,本人只能算个菜,只是把自己写的一点小东西拿出来跟大家分享,希望能帮到大家,欢迎各位朋友批评指正,如果使用过程中有错误请留言哦。

      本软件目地是服务博客园的朋友们,源代码完全开源,但转载或二次开发请注明出处。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值