«Decode %hex encoded characters» by jamshark70

on 15 Oct'18 09:57 in utilitystringhex

A quick utility function for decoding UTF-8 characters in %hex codes (such as for web URLs). Save the code into a .sc file in your Extensions/ directory, recompile, and then:

"%E5%A4%A7%E5%AE%B6%E5%A5%BD".decodeHex; -> 大家好

This probably works only for UTF-8. SC can save the individual bytes of Unicode characters into String objects, and the IDE will print UTF-8 code points correctly, but there is no way to switch the IDE to other Unicode variants.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
+ String {
	decodeHex {
		var new = String.new, stream = CollStream(this), ch, int;
		while {
			ch = stream.next;
			ch.notNil
		} {
			if(ch == $%) {
				ch = stream.next;
				if(ch.notNil) {
					int = ch.digit << 4;
				} {
					// If the string ends with %, then 'ch' after that is nil
					// so we can just append the %
					^new.add($%);
				};
				ch = stream.next;
				if(ch.notNil) {
					ch = (int + ch.digit).asAscii;
				} {
					// If we got this far, we know there was one (only one) char after %
					// so add them both
					^new.add($%).add(this.last);
				};
			};
			new = new.add(ch);
		};
		^new
	}
}
raw 652 chars (focus & ctrl+a+c to copy)
reception
comments