Updated Aug 16: Fix bugs when json is an array. Add a 'json:root' element always since valid xml should have a root. Remove 'obj' tag that is not necessary.
Updated Aug 15: A more complete json_parser.erl. Thanks for tonyg's beautiful work, fixed some bugs.
Updated Aug 5: rewrote json_parser.erl base on tonyg's RFC4627 implementation, fixed some bugs.
In my previous blog: A Simple XML State Machine Accepting SAX Events to Build xmerl Compitable XML Tree: icalendar demo, I wrote a simple state machine to parse icalendar to xmerl compitable XML tree. This time, I'll use this state machine to parse a JSON expression to xmerl compitable XML tree, the work is fairly simple:
%%---------------------------------------------------------------------------
%% Copyright (c) 2007 Tony Garnock-Jones <tonyg@kcbbs.gen.nz>
%% Copyright (c) 2007 LShift Ltd. <query@lshift.net>
%% Copyright (c) 2007 LightPole, Inc.
%%
%% Permission is hereby granted, free of charge, to any person
%% obtaining a copy of this software and associated documentation
%% files (the "Software"), to deal in the Software without
%% restriction, including without limitation the rights to use, copy,
%% modify, merge, publish, distribute, sublicense, and/or sell copies
%% of the Software, and to permit persons to whom the Software is
%% furnished to do so, subject to the following conditions:
%%
%% The above copyright notice and this permission notice shall be
%% included in all copies or substantial portions of the Software.
%%
%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
%% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
%% MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
%% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
%% BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
%% ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
%% CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
%% SOFTWARE.
%%---------------------------------------------------------------------------
%%
-module(json_parser).
-define(stateMachine, fun xml_sm:state/2).
-define(JsonNSUri, "http://www.lightpole.net/xmlns/1.0").
-define(JsonNSAtrr, {'xmlns:json', ?JsonNSUri}).
-define(JsonNSRoot, 'json:root').
-define(JsonNSArray, 'json:array').
-record(context, {machine,
qname}).
-export([parse_to_xml/1,
parse_to_poet/1]).
-export([test/0]).
parse_to_xml(Data) ->
parse(Data, #context{machine = fun xml_sm:state/2}).
parse_to_poet(Data) ->
parse(Data, #context{machine = fun poet_sm:state/2}).
parse(Bin, Context) when is_binary(Bin) ->
parse(binary_to_list(Bin), Context);
parse(Str, #context{machine=MachineFun}=Context) ->
State1 = MachineFun({startDocument}, undefined),
State2 = parse_root(skip_ws(Str), State1, Context),
_State = MachineFun({endDocument}, State2).
%% since a valid xml should have a root element, we add one here.
parse_root([${|T], State, #context{machine=MachineFun}=Context) ->
State1 = MachineFun({startElement, ?JsonNSUri, root, ?JsonNSRoot, [?JsonNSAtrr]}, State),
Context1 = Context#context{qname = undefined},
{_Rest, State2} = parse_object(skip_ws(T), State1, Context1),
_State = MachineFun({endElement, ?JsonNSUri, root, ?JsonNSRoot}, State2);
parse_root([$[|T], State, #context{machine=MachineFun}=Context) ->
State1 = MachineFun({startElement, ?JsonNSUri, root, ?JsonNSRoot, [?JsonNSAtrr]}, State),
Context1 = Context#context{qname = ?JsonNSArray},
{_Rest, State2} = parse_array(skip_ws(T), State1, Context1),
_State = MachineFun({endElement, ?JsonNSUri, root, ?JsonNSRoot}, State2).
parse_object([$}|T], State, _Context) ->
{T, State};
parse_object([$,|T], State, Context) ->
parse_object(skip_ws(T), State, Context);
parse_object([$"|T], State, #context{machine=MachineFun}=Context) ->
{Rest, ObjNameStr} = parse_string(skip_ws(T), []),
ObjName = list_to_atom(ObjNameStr),
Context1 = Context#context{qname = ObjName},
[$:|T1] = skip_ws(Rest),
{Rest1, State1} =
case skip_ws(T1) of
[$[|T2] ->
%% the value is array, we'll create a list of elements named as this 'ObjName'
parse_array(skip_ws(T2), State, Context1);
_ ->
StateX1 = MachineFun({startElement, "", ObjName, ObjName, []}, State),
{RestX, StateX2} = parse_value(skip_ws(T1), StateX1, Context1),
StateX3 = MachineFun({endElement, "", ObjName, ObjName}, StateX2),
{RestX, StateX3}
end,
parse_object(skip_ws(Rest1), State1, Context1).
parse_array([$]|T], State, _Context) ->
{T, State};
parse_array([$,|T], State, Context) ->
parse_array(skip_ws(T), State, Context);
parse_array(Chars, State, #context{machine=MachineFun, qname=QName}=Context) ->
State1 = MachineFun({startElement, "", QName, QName, []}, State),
{Rest, State2} = parse_value(Chars, State1, Context),
State3 = MachineFun({endElement, "", QName, QName}, State2),
parse_array(skip_ws(Rest), State3, Context).
parse_value([], State, _Context) ->
{[], State};
parse_value("true"++T, State, #context{machine=MachineFun}) ->
State1 = MachineFun({characters, "true"}, State),
{T, State1};
parse_value("false"++T, State, #context{machine=MachineFun}) ->
State1 = MachineFun({characters, "false"}, State),
{T, State1};
parse_value("null"++T, State, #context{machine=MachineFun}) ->
State1 = MachineFun({characters, "null"}, State),
{T, State1};
parse_value([$"|T], State, #context{machine=MachineFun}) ->
{Rest, Value} = parse_string(T, []),
State1 = MachineFun({characters, Value}, State),
{Rest, State1};
parse_value([${|T], State, Context) ->
parse_object(skip_ws(T), State, Context);
parse_value([$[|T], State, Context) ->
parse_array(skip_ws(T), State, Context);
parse_value(Chars, State, #context{machine=MachineFun}) ->
{Rest, Value} = parse_number(skip_ws(Chars), []),
State1 = MachineFun({characters, Value}, State),
{Rest, State1}.
parse_string([$"|T], Acc) ->
{T, lists:reverse(Acc)};
parse_string([$\\, Key|T], Acc) ->
parse_escaped_char(Key, T, Acc);
parse_string([H|T], Acc) ->
parse_string(T, [H|Acc]).
parse_escaped_char($b, Rest, Acc) -> parse_string(Rest, [8|Acc]);
parse_escaped_char($t, Rest, Acc) -> parse_string(Rest, [9|Acc]);
parse_escaped_char($n, Rest, Acc) -> parse_string(Rest, [10|Acc]);
parse_escaped_char($f, Rest, Acc) -> parse_string(Rest, [12|Acc]);
parse_escaped_char($r, Rest, Acc) -> parse_string(Rest, [13|Acc]);
parse_escaped_char($/, Rest, Acc) -> parse_string(Rest, [$/|Acc]);
parse_escaped_char($\\, Rest, Acc) -> parse_string(Rest, [$\\|Acc]);
parse_escaped_char($", Rest, Acc) -> parse_string(Rest, [$"|Acc]);
parse_escaped_char($u, [D0, D1, D2, D3|Rest], Acc) ->
parse_string(Rest, [(digit_hex(D0) bsl 12) +
(digit_hex(D1) bsl 8) +
(digit_hex(D2) bsl 4) +
(digit_hex(D3))|Acc]).
digit_hex($0) -> 0;
digit_hex($1) -> 1;
digit_hex($2) -> 2;
digit_hex($3) -> 3;
digit_hex($4) -> 4;
digit_hex($5) -> 5;
digit_hex($6) -> 6;
digit_hex($7) -> 7;
digit_hex($8) -> 8;
digit_hex($9) -> 9;
digit_hex($A) -> 10;
digit_hex($B) -> 11;
digit_hex($C) -> 12;
digit_hex($D) -> 13;
digit_hex($E) -> 14;
digit_hex($F) -> 15;
digit_hex($a) -> 10;
digit_hex($b) -> 11;
digit_hex($c) -> 12;
digit_hex($d) -> 13;
digit_hex($e) -> 14;
digit_hex($f) -> 15.
finish_number(Rest, Acc) ->
Value = lists:reverse(Acc),
% Value =
% case catch list_to_integer(Str) of
% {'EXIT', _} -> list_to_float(Str);
% Number -> Number
% end,
{Rest, Value}.
parse_number([], _Acc) ->
exit(syntax_error);
parse_number([$-|T], Acc) ->
parse_number1(T, [$-|Acc]);
parse_number(Rest, Acc) ->
parse_number1(Rest, Acc).
parse_number1(Rest, Acc) ->
{Acc1, Rest1} = parse_int_part(Rest, Acc),
case Rest1 of
[] -> finish_number([], Acc1);
[$.|More] ->
{Acc2, Rest2} = parse_int_part(More, [$.| Acc1]),
parse_exp(Rest2, Acc2, false);
_ ->
parse_exp(Rest1, Acc1, true)
end.
parse_int_part([], Acc) ->
{Acc, []};
parse_int_part([Ch|Rest], Acc) ->
case is_digit(Ch) of
true -> parse_int_part(Rest, [Ch | Acc]);
false -> {Acc, [Ch | Rest]}
end.
parse_exp([$e|T], Acc, NeedFrac) ->
parse_exp1(T, Acc, NeedFrac);
parse_exp([$E|T], Acc, NeedFrac) ->
parse_exp1(T, Acc, NeedFrac);
parse_exp(Rest, Acc, _NeedFrac) ->
finish_number(Rest, Acc).
parse_exp1(Rest, Acc, NeedFrac) ->
{Acc1, Rest1} = parse_signed_int_part(Rest, if NeedFrac -> [$e, $0, $.|Acc];
true -> [$e|Acc]
end),
finish_number(Rest1, Acc1).
parse_signed_int_part([$+|T], Acc) ->
parse_int_part(T, [$+|Acc]);
parse_signed_int_part([$-|T], Acc) ->
parse_int_part(T, [$-|Acc]);
parse_signed_int_part(Rest, Acc) ->
parse_int_part(Rest, Acc).
is_digit(C) when is_integer(C) andalso C >= $0 andalso C =< $9 -> true;
is_digit(_) -> false.
skip_ws([H|T]) when H =< 32 ->
skip_ws(T);
skip_ws(Chars) ->
Chars.
test() ->
Text1 = "{\"firstname\":\"Caoyuan\", \"iq\":\"150\"}",
{ok, Xml1} = parse_to_xml(Text1),
XmlText1 = lists:flatten(xmerl:export_simple([Xml1], xmerl_xml)),
io:fwrite(user, "Parsed XML: ~n~p~n", [XmlText1]),
{ok, Poet1} = parse_to_poet(Text1),
io:fwrite(user, "Parsed POET: ~n~p~n", [Poet1]),
Text2 = "[{\"firstname\":\"Caoyuan\", \"iq\":\"150\"},
{\"firstname\":\"Haobo\", \"iq\":150}]",
{ok, Xml2} = parse_to_xml(Text2),
XmlText2 = lists:flatten(xmerl:export_simple([Xml2], xmerl_xml)),
io:fwrite(user, "Parsed: ~n~p~n", [XmlText2]),
Text = "
{\"businesses\": [{\"address1\": \"650 Mission Street\",
\"address2\": \"\",
\"avg_rating\": 4.5,
\"categories\": [{\"category_filter\": \"localflavor\",
\"name\": \"Local Flavor\",
\"search_url\": \"http://lightpole.net/search\"}],
\"city\": \"San Francisco\",
\"distance\": 0.085253790020942688,
\"id\": \"4kMBvIEWPxWkWKFN__8SxQ\",
\"latitude\": 37.787185668945298,
\"longitude\": -122.40093994140599},
{\"address1\": \"25 Maiden Lane\",
\"address2\": \"\",
\"avg_rating\": 5.0,
\"categories\": [{\"category_filter\": \"localflavor\",
\"name\": \"Local Flavor\",
\"search_url\": \"http://lightpole.net/search\"}],
\"city\": \"San Francisco\",
\"distance\": 0.23186808824539185,
\"id\": \"O1zPF_b7RyEY_NNsizX7Yw\",
\"latitude\": 37.788387,
\"longitude\": -122.40401}]} ",
{ok, Xml} = parse_to_xml(Text),
%io:fwrite(user, "Xml Tree: ~p~n", [Xml]),
XmlText = lists:flatten(xmerl:export_simple([Xml], xmerl_xml)),
io:fwrite(user, "Parsed: ~n~p~n", [XmlText]),
Latitude1 = xmerl_xpath:string("/lp:root/businesses[1]/latitude/text()", Xml),
io:format(user, "Latitude1: ~p~n", [Latitude1]).
The result will be something like:
<?xml version="1.0"?>
<json:root xmlns:json="http://www.lightpole.net/xmlns/1.0">
<businesses>
<address1>650 Mission Street</address1>
<address2></address2>
<avg_rating>4.5</avg_rating>
<categories>
<category_filter>localflavor</category_filter>
<name>Local Flavor</name>
<search_url>http://lightpole.net/search</search_url>
</categories>
<city>San Francisco</city>
<distance>0.085253790020942688</distance>
<id>4kMBvIEWPxWkWKFN__8SxQ</id>
<latitude>37.787185668945298</latitude>
<longitude>-122.40093994140599</longitude>
</businesses>
<businesses>
<address1>25 Maiden Lane</address1>
<address2></address2>
<avg_rating>5.0</avg_rating>
<categories>
<category_filter>localflavor</category_filter>
<name>Local Flavor</name>
<search_url>http://lightpole.net/search</search_url>
</categories>
<city>San Francisco</city>
<distance>0.23186808824539185</distance>
<id>O1zPF_b7RyEY_NNsizX7Yw</id>
<latitude>37.788387</latitude>
<longitude>-122.40401</longitude>
</businesses>
</root>
Now you fecth element by:
> [Latitude1] = xmerl_xpath:string("/json:root/businesses[1]/latitude/text()", Xml),
> Latitude1#xmlText.value.
"37.787185668945298"
Next time, I'll write a simple Erlang Data state machine, which will parse icalendar and json to simple Erlang Lists + Tuples.
The code of xml_sm.erl can be found in my previous blog.
本文介绍了一个使用Erlang编写的简单状态机,该状态机能够将JSON数据解析并转换为符合XMerl规范的XML树。通过示例展示了如何处理JSON数组及对象,并将它们转化为XML元素。
3065

被折叠的 条评论
为什么被折叠?



