1 ## compare_shells: dash bash mksh zsh
2 ## oils_failures_allowed: 1
3
4 # Test the length oeprator, which dash supports. Dash doesn't support most
5 # other ops.
6
7 #### String length
8 v=foo
9 echo ${#v}
10 ## stdout: 3
11
12 #### Unicode string length (UTF-8)
13 v=$'_\u03bc_'
14 echo ${#v}
15 ## stdout: 3
16 ## N-I dash stdout: 9
17 ## N-I mksh stdout: 4
18
19 #### Unicode string length (spec/testdata/utf8-chars.txt)
20 v=$(cat $REPO_ROOT/spec/testdata/utf8-chars.txt)
21 echo ${#v}
22 ## stdout: 7
23 ## N-I dash stdout: 13
24 ## N-I mksh stdout: 13
25
26 #### String length with incomplete utf-8
27 for num_bytes in 0 1 2 3 4 5 6 7 8 9 10 11 12 13; do
28 s=$(head -c $num_bytes $REPO_ROOT/spec/testdata/utf8-chars.txt)
29 echo ${#s}
30 done 2> $TMP/err.txt
31
32 grep 'warning:' $TMP/err.txt
33 true # exit 0
34
35 ## STDOUT:
36 0
37 1
38 2
39 -1
40 3
41 4
42 -1
43 -1
44 5
45 6
46 -1
47 -1
48 -1
49 7
50 [ stdin ]:3: warning: UTF-8 decode: Truncated bytes at offset 2 in string of 3 bytes
51 [ stdin ]:3: warning: UTF-8 decode: Truncated bytes at offset 5 in string of 6 bytes
52 [ stdin ]:3: warning: UTF-8 decode: Truncated bytes at offset 5 in string of 7 bytes
53 [ stdin ]:3: warning: UTF-8 decode: Truncated bytes at offset 9 in string of 10 bytes
54 [ stdin ]:3: warning: UTF-8 decode: Truncated bytes at offset 9 in string of 11 bytes
55 [ stdin ]:3: warning: UTF-8 decode: Truncated bytes at offset 9 in string of 12 bytes
56 ## END
57 # zsh behavior actually matches bash!
58 ## BUG bash/zsh stderr-json: ""
59 ## BUG bash/zsh STDOUT:
60 0
61 1
62 2
63 3
64 3
65 4
66 5
67 6
68 5
69 6
70 7
71 8
72 9
73 7
74 ## END
75 ## N-I dash/mksh stderr-json: ""
76 ## N-I dash/mksh STDOUT:
77 0
78 1
79 2
80 3
81 4
82 5
83 6
84 7
85 8
86 9
87 10
88 11
89 12
90 13
91 ## END
92
93 #### String length with invalid utf-8 continuation bytes
94 for num_bytes in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14; do
95 s=$(head -c $num_bytes $REPO_ROOT/spec/testdata/utf8-chars.txt)$(echo -e "\xFF")
96 echo ${#s}
97 done 2> $TMP/err.txt
98
99 grep 'warning:' $TMP/err.txt
100 true
101
102 ## STDOUT:
103 -1
104 -1
105 -1
106 -1
107 -1
108 -1
109 -1
110 -1
111 -1
112 -1
113 -1
114 -1
115 -1
116 -1
117 -1
118 [ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 0 in string of 1 bytes
119 [ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 1 in string of 2 bytes
120 [ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 2 in string of 3 bytes
121 [ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 2 in string of 4 bytes
122 [ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 4 in string of 5 bytes
123 [ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 5 in string of 6 bytes
124 [ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 5 in string of 7 bytes
125 [ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 5 in string of 8 bytes
126 [ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 8 in string of 9 bytes
127 [ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 9 in string of 10 bytes
128 [ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 9 in string of 11 bytes
129 [ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 9 in string of 12 bytes
130 [ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 9 in string of 13 bytes
131 [ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 13 in string of 14 bytes
132 [ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 13 in string of 14 bytes
133 ## END
134 ## BUG bash/zsh stderr-json: ""
135 ## BUG bash/zsh STDOUT:
136 1
137 2
138 3
139 4
140 4
141 5
142 6
143 7
144 6
145 7
146 8
147 9
148 10
149 8
150 8
151 ## N-I dash stderr-json: ""
152 ## N-I dash STDOUT:
153 7
154 8
155 9
156 10
157 11
158 12
159 13
160 14
161 15
162 16
163 17
164 18
165 19
166 20
167 20
168 ## END
169 ## N-I mksh stderr-json: ""
170 ## N-I mksh STDOUT:
171 1
172 2
173 3
174 4
175 5
176 6
177 7
178 8
179 9
180 10
181 11
182 12
183 13
184 14
185 14
186 ## END
187
188 #### Length of undefined variable
189 echo ${#undef}
190 ## stdout: 0
191
192 #### Length of undefined variable with nounset
193 set -o nounset
194 echo ${#undef}
195 ## status: 1
196 ## OK dash status: 2
197
198 #### Length operator can't be followed by test operator
199 echo ${#x-default}
200
201 x=''
202 echo ${#x-default}
203
204 x='foo'
205 echo ${#x-default}
206
207 ## status: 2
208 ## OK bash/mksh status: 1
209 ## stdout-json: ""
210 ## BUG zsh status: 0
211 ## BUG zsh STDOUT:
212 7
213 0
214 3
215 ## END
216 ## BUG dash status: 0
217 ## BUG dash STDOUT:
218 0
219 0
220 3
221 ## END
222
223 #### ${#s} respects LC_ALL - length in bytes or code points
224 case $SH in dash) exit ;; esac
225
226 # This test case is sorta "infected" because spec-common.sh sets LC_ALL=C.UTF-8
227 #
228 # For some reason mksh behaves differently
229 #
230 # See demo/04-unicode.sh
231
232 #echo $LC_ALL
233 unset LC_ALL
234
235 # note: this may depend on the CI machine config
236 LANG=en_US.UTF-8
237
238 #LC_ALL=en_US.UTF-8
239
240 for s in $'\u03bc' $'\U00010000'; do
241 LC_ALL=
242 echo "len=${#s}"
243
244 LC_ALL=C
245 echo "len=${#s}"
246
247 echo
248 done
249
250 ## STDOUT:
251 len=1
252 len=2
253
254 len=1
255 len=4
256
257 ## END
258
259 ## N-I dash STDOUT:
260 ## END
261
262 ## BUG mksh STDOUT:
263 len=2
264 len=2
265
266 len=3
267 len=3
268
269 ## END