package parseXML;
import java.io.File;
import java.io.FileInputStream;
import java.util.Iterator;
import java.util.List;
import org.jdom.Content;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.input.SAXBuilder;
import utils.NumberUtils;
import entity.SearchUnit;
import exception.ParseFileException;
public class JDomParse {
public SearchUnit getSearchUnit(File file) throws Exception {
SearchUnit unit = new SearchUnit();
SAXBuilder builder = new SAXBuilder(false);
Document doc = builder.build(new FileInputStream(file));
Element SEContent = doc.getRootElement();
unit.setTitle(SEContent.getChildText("Title").replaceAll("<[^>]*>", "").trim());
if (SEContent.getChildText("Description").replaceAll("<[^>]*>", "").trim().length() < 100)
unit.setDescription(SEContent.getChildText("Description").replaceAll("<[^>]*>", "").trim());
else
unit.setDescription(SEContent.getChildText("Description").replaceAll("<[^>]*>", "").trim().substring(0, 99));
unit.setSystem(file.getName().substring(5, 7));
String cost = SEContent.getChildText("Cost").trim();
unit.setCost(cost);
double costDouble = Double.parseDouble(cost);
unit.setCostBySorting(NumberUtils.pad(costDouble));
unit.setLocation(SEContent.getChildText("Location").trim());
unit.setCreateTime(SEContent.getChildText("CreateTime").trim());
unit.setLastModifyTime(SEContent.getChildText("LastModifyTime").trim());
if(SEContent.getChildText("MediaType").trim().equals("")||SEContent.getChildText("MediaType").trim()==null)
unit.setMediaType("T");
else
unit.setMediaType(SEContent.getChildText("MediaType").trim());
if (SEContent.getChildText("ThumbURL") == null
|| SEContent.getChildText("ThumbURL").length() <= 0) {
unit.setThumbURL("0");
} else {
unit.setThumbURL(SEContent.getChildText("ThumbURL"));
}
if (SEContent.getChildText("ExpertComment") == null
|| SEContent.getChildText("ExpertComment").length() <= 0) {
unit.setExpertComment("0");
} else {
unit.setExpertComment(SEContent.getChildText("ExpertComment"));
}
if (SEContent.getChildText("ConsumerComment") == null
|| SEContent.getChildText("ConsumerComment").length() <= 0) {
unit.setConsumerCommenet("0");
} else {
unit.setConsumerCommenet(SEContent.getChildText("ConsumerComment"));
}
if (SEContent.getChildText("Recommend") == null
|| SEContent.getChildText("Recommend").length() <= 0) {
unit.setRecommend("1");
} else {
unit.setRecommend(SEContent.getChildText("Recommend").trim());
}
if (SEContent.getChildText("SunFlower") == null
|| SEContent.getChildText("SunFlower").length() <= 0) {
unit.setSunFlower("0");
} else {
unit.setSunFlower(SEContent.getChildText("SunFlower").trim());
}
if (SEContent.getChildText("Discount") == null
|| SEContent.getChildText("Discount").length() <= 0) {
unit.setDiscount("1");
} else {
unit.setDiscount(SEContent.getChildText("Discount").trim());
}
unit.setTheme(SEContent.getChildText("Theme").trim());
unit.setOwner(((Element) SEContent.getChildren("Copyright").get(0)).getChildText("Owner").trim());
String contributors = new String();
Element contributor = (Element) SEContent.getChildren("Contributor").get(0);
List valueList = contributor.getChildren();
for (Iterator iter = valueList.iterator(); iter.hasNext();) {
Content value = (Content) iter.next();
String name = value.getValue();
contributors = contributors + name + ";";
}
unit.setContributor(contributors.substring(0, contributors.length() - 1));
StringBuffer contributorss = new StringBuffer();
//unit.setContributor(((Element) SEContent.getChildren("Contributor").get(0)).getChildText("value").trim());
return unit;
}
public static void main(String[] args) {
File file = new File("D:\\CEOM_01_M_20090805133924_0727_N.xml");
SearchUnit su = null;
try {
su = new JDomParse().getSearchUnit(file);
} catch (Exception e) {
ParseFileException pfe = new ParseFileException();
if (pfe.isFileNotFoundException(e)) {
System.out.println("File Not Found !");
}
}
try {
System.out.println(su.toString());
} catch (Exception e) {
ParseFileException pfe = new ParseFileException();
if (pfe.isNullPointerException(e))
System.out.println("Su is null !");
}
}
}
1.利用 replaceAll("<[^>]*>", "") 去除内容里面的成对的html标签
2.包含了xml解析中常遇到的情况
<root>
root
</root>
----
<root>
<name>root</name>
</root>
记下..

被折叠的 条评论
为什么被折叠?



