Normalise JSON. Update todo
[PreCom.git] / JSON.sh
1 #!/usr/bin/env bash
2
3 throw () {
4 echo "$*" >&2
5 exit 1
6 }
7
8 BRIEF=0
9 LEAFONLY=0
10 PRUNE=0
11 NORMALIZE_SOLIDUS=0
12
13 usage() {
14 echo
15 echo "Usage: JSON.sh [-b] [-l] [-p] [-s] [-h]"
16 echo
17 echo "-p - Prune empty. Exclude fields with empty values."
18 echo "-l - Leaf only. Only show leaf nodes, which stops data duplication."
19 echo "-b - Brief. Combines 'Leaf only' and 'Prune empty' options."
20 echo "-s - Remove escaping of the solidus symbol (stright slash)."
21 echo "-h - This help text."
22 echo
23 }
24
25 parse_options() {
26 set -- "$@"
27 local ARGN=$#
28 while [ "$ARGN" -ne 0 ]
29 do
30 case $1 in
31 -h) usage
32 exit 0
33 ;;
34 -b) BRIEF=1
35 LEAFONLY=1
36 PRUNE=1
37 ;;
38 -l) LEAFONLY=1
39 ;;
40 -p) PRUNE=1
41 ;;
42 -s) NORMALIZE_SOLIDUS=1
43 ;;
44 ?*) echo "ERROR: Unknown option."
45 usage
46 exit 0
47 ;;
48 esac
49 shift 1
50 ARGN=$((ARGN-1))
51 done
52 }
53
54 awk_egrep () {
55 local pattern_string=$1
56 gawk '{
57 while ($0) {
58 start=match($0, pattern);
59 token=substr($0, start, RLENGTH);
60 print token;
61 $0=substr($0, start+RLENGTH);
62 }
63 }' pattern="$pattern_string"
64 }
65
66 tokenize () {
67 local GREP
68 local ESCAPE
69 local CHAR
70
71 if echo "test string" | egrep -ao --color=never "test" &>/dev/null
72 then
73 GREP='egrep -ao --color=never'
74 else
75 GREP='egrep -ao'
76 fi
77
78 if echo "test string" | egrep -o "test" &>/dev/null
79 then
80 ESCAPE='(\\[^u[:cntrl:]]|\\u[0-9a-fA-F]{4})'
81 CHAR='[^[:cntrl:]"\\]'
82 else
83 GREP=awk_egrep
84 ESCAPE='(\\\\[^u[:cntrl:]]|\\u[0-9a-fA-F]{4})'
85 CHAR='[^[:cntrl:]"\\\\]'
86 fi
87
88 local STRING="\"$CHAR*($ESCAPE$CHAR*)*\""
89 local NUMBER='-?(0|[1-9][0-9]*)([.][0-9]*)?([eE][+-]?[0-9]*)?'
90 local KEYWORD='null|false|true'
91 local SPACE='[[:space:]]+'
92
93 $GREP "$STRING|$NUMBER|$KEYWORD|$SPACE|." | egrep -v "^$SPACE$"
94 }
95
96 parse_array () {
97 local index=0
98 local ary=''
99 read -r token
100 case "$token" in
101 ']') ;;
102 *)
103 while :
104 do
105 parse_value "$1" "$index"
106 index=$((index+1))
107 ary="$ary""$value"
108 read -r token
109 case "$token" in
110 ']') break ;;
111 ',') ary="$ary," ;;
112 *) throw "EXPECTED , or ] GOT ${token:-EOF}" ;;
113 esac
114 read -r token
115 done
116 ;;
117 esac
118 [ "$BRIEF" -eq 0 ] && value=$(printf '[%s]' "$ary") || value=
119 :
120 }
121
122 parse_object () {
123 local key
124 local obj=''
125 read -r token
126 case "$token" in
127 '}') ;;
128 *)
129 while :
130 do
131 case "$token" in
132 '"'*'"') key="$token" ;;
133 *) throw "EXPECTED string GOT ${token:-EOF}" ;;
134 esac
135 read -r token
136 case "$token" in
137 ':') ;;
138 *) throw "EXPECTED : GOT ${token:-EOF}" ;;
139 esac
140 read -r token
141 parse_value "$1" "$key"
142 obj="$obj$key:$value"
143 read -r token
144 case "$token" in
145 '}') break ;;
146 ',') obj="$obj," ;;
147 *) throw "EXPECTED , or } GOT ${token:-EOF}" ;;
148 esac
149 read -r token
150 done
151 ;;
152 esac
153 [ "$BRIEF" -eq 0 ] && value=$(printf '{%s}' "$obj") || value=
154 :
155 }
156
157 parse_value () {
158 local jpath="${1:+$1.}$2" isleaf=0 isempty=0 print=0
159 case "$token" in
160 '{') parse_object "$jpath" ;;
161 '[') parse_array "$jpath" ;;
162 # At this point, the only valid single-character tokens are digits.
163 ''|[!0-9]) throw "EXPECTED value GOT ${token:-EOF}" ;;
164 *) value=$token
165 # if asked, replace solidus ("\/") in json strings with normalized value: "/"
166 [ "$NORMALIZE_SOLIDUS" -eq 1 ] && value=${value//\\\//\/}
167 isleaf=1
168 [ "$value" = '""' ] && isempty=1
169 ;;
170 esac
171 [ "$value" = '' ] && return
172 [ "$LEAFONLY" -eq 0 ] && [ "$PRUNE" -eq 0 ] && print=1
173 [ "$LEAFONLY" -eq 1 ] && [ "$isleaf" -eq 1 ] && [ $PRUNE -eq 0 ] && print=1
174 [ "$LEAFONLY" -eq 0 ] && [ "$PRUNE" -eq 1 ] && [ "$isempty" -eq 0 ] && print=1
175 [ "$LEAFONLY" -eq 1 ] && [ "$isleaf" -eq 1 ] && \
176 [ $PRUNE -eq 1 ] && [ $isempty -eq 0 ] && print=1
177 [ "$print" -eq 1 ] && printf "%s\t%s\n" "${jpath//\"/}" "$value"
178 :
179 }
180
181 parse () {
182 read -r token
183 parse_value
184 read -r token
185 case "$token" in
186 '') ;;
187 *) throw "EXPECTED EOF GOT $token" ;;
188 esac
189 }
190
191 if ([ "$0" = "$BASH_SOURCE" ] || ! [ -n "$BASH_SOURCE" ]);
192 then
193 parse_options "$@"
194 tokenize | parse
195 fi
196
197 #The MIT License
198 #
199 #Copyright (c) 2011 Dominic Tarr
200 #
201 #Permission is hereby granted, free of charge,
202 #to any person obtaining a copy of this software and
203 #associated documentation files (the "Software"), to
204 #deal in the Software without restriction, including
205 #without limitation the rights to use, copy, modify,
206 #merge, publish, distribute, sublicense, and/or sell
207 #copies of the Software, and to permit persons to whom
208 #the Software is furnished to do so,
209 #subject to the following conditions:
210 #
211 #The above copyright notice and this permission notice
212 #shall be included in all copies or substantial portions of the Software.
213 #
214 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
215 #EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
216 #OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
217 #IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
218 #ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
219 #TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
220 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.