处理JSON文件和XML

1、递归处理xml文件的方法

使用dom4j依赖为:
 
        <dependency>
            <groupId>dom4j</groupId>
            <artifactId>dom4j</artifactId>
            <version>1.6.1</version>
        </dependency>

        <dependency>
            <groupId>jaxen</groupId>
            <artifactId>jaxen</artifactId>
            <version>1.1.6</version>
        </dependency>
函数方法:
public class XmlParse {

    public static void main(String[] args) throws Exception {
        SAXReader saxReader = new SAXReader();

        Document document = saxReader.read(new File("C:\\Users\\40275\\Desktop\\2triples_Astronaut_dev_challenge.xml"));

        treeWalk(document);

//        // 获取根元素
//        Element root = document.getRootElement();
//
//        List<Element> childElements = root.elements();
//        for (Element child : childElements) {
//            List<Element> secondChild = child.elements();
//            for (Element sec : secondChild) {
//
//                List<Element> originaltripleset = sec.elements("originaltripleset");
//                for (Element ele : originaltripleset) {
//                    String stringValue = ele.getStringValue();
//                    System.out.println(stringValue.trim());
//                }
//
//                List<Element> modifiedtripleset = sec.elements("modifiedtripleset");
//                for (Element ele : modifiedtripleset) {
//                    String stringValue = ele.getStringValue();
//                    System.out.println(stringValue.trim());
//                }
//
//                List<Element> lex = sec.elements("lex");
//                for (Element ele : lex) {
//                    String stringValue = ele.getStringValue();
//                    System.out.println(stringValue.trim());
//                }
//            }
//        }
    }


    public static void treeWalk(Document document) {
        treeWalk(document.getRootElement());
    }

    public static void treeWalk(Element element) {
        for (int i = 0, size = element.nodeCount(); i < size; i++) {
            Node node = element.node(i);
            if (node instanceof Element) {
                System.out.println(node.getName());
                treeWalk((Element) node);
            } else {
                // do something…
//                System.out.println("do something");
                System.out.println(node.getStringValue().trim());
            }
        }
    }
}

2、JSON文件的处理

json依赖

 <dependency>
            <groupId>org.json</groupId>
            <artifactId>json</artifactId>
            <version>20160810</version>
        </dependency>
        <!--加入对commons-io的依赖-->
        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <version>2.4</version>
        </dependency>

根据json文件的唯一标签作为key,json字符串作为value处理json文件

    public static void validAndTrained() throws IOException {
        String trainJsonFilePath = "C:\\Users\\40275\\Desktop\\traindev.json";
        String validOutJsonFilePath = "C:\\Users\\40275\\Desktop\\train_out.json";

        Map<String,JSONObject> traindevMap = new HashMap<String, JSONObject>();
        Map<String,JSONObject> validOutMap = new HashMap<String, JSONObject>();

        mapJsonFileToMap(trainJsonFilePath, traindevMap);
        mapJsonFileToMapCos(validOutJsonFilePath,validOutMap);

        System.out.println("traindev "+traindevMap.size());
        System.out.println("valid_out "+validOutMap.size());
        Set<String> sumKeys = new HashSet<String>(traindevMap.keySet());

        OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(new File("d:\\differ.txt")),"UTF-8");
        BufferedWriter bufferedWriter = new BufferedWriter(osw);

        //取交集,注释部分为
        int count = 0; //交集个数
        for (String devKey : validOutMap.keySet()){
            if (sumKeys.contains(devKey)){
                count++;
                //将交集输出到文件
                /*JSONObject jsonObject = traindevMap.get(devKey);
                String string = jsonObject.toString();
                bufferedWriter.write(string+"\t\n");*/
            }else {
                //输出不包含的key,测试用
//                System.out.println(devKey);
//                System.out.println();
//                System.out.println();
//                bufferedWriter.append(devKey);
//                bufferedWriter.append("\t\n");
//                bufferedWriter.append("\t\n");
            }
            bufferedWriter.flush();
        }

        System.out.println(count);
    }

    /**
     * 用于没有逗号
     * @param filePath
     * @param jsonMap
     * @throws IOException
     */
    private static void mapJsonFileToMap(String filePath, Map<String, JSONObject> jsonMap) throws IOException {
        File sumJsonFile = new File(filePath);
        FileInputStream fis = new FileInputStream(sumJsonFile);
        BufferedReader reader = new BufferedReader(new InputStreamReader(fis));
        String oneLine = null;
        while((oneLine = reader.readLine()) != null){
            JSONObject jsonObject = new JSONObject(oneLine);
            String key = (String) jsonObject.get("sentText");
            if (jsonMap.containsKey(key)){
                continue;
            }
            jsonMap.put(key,jsonObject);
        }
        reader.close();
        fis.close();
    }

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值