Browse Source

Implement a utf8_decode function to produce wchar_t needed by curses

Luke Dashjr 12 years ago
parent
commit
81b70f1757
3 changed files with 107 additions and 0 deletions
  1. 1 0
      miner.c
  2. 100 0
      util.c
  3. 6 0
      util.h

+ 1 - 0
miner.c

@@ -10824,6 +10824,7 @@ int main(int argc, char *argv[])
 		test_cgpu_match();
 		test_cgpu_match();
 		test_intrange();
 		test_intrange();
 		test_decimal_width();
 		test_decimal_width();
+		utf8_test();
 	}
 	}
 
 
 #ifdef HAVE_CURSES
 #ifdef HAVE_CURSES

+ 100 - 0
util.c

@@ -1385,6 +1385,106 @@ double tdiff(struct timeval *end, struct timeval *start)
 	return end->tv_sec - start->tv_sec + (end->tv_usec - start->tv_usec) / 1000000.0;
 	return end->tv_sec - start->tv_sec + (end->tv_usec - start->tv_usec) / 1000000.0;
 }
 }
 
 
+
+int32_t utf8_decode(const void *b, int *out_len)
+{
+	int32_t w;
+	const unsigned char *s = b;
+	
+	if (!(s[0] & 0x80))
+	{
+		// ASCII
+		*out_len = 1;
+		return s[0];
+	}
+	
+#ifdef STRICT_UTF8
+	if (unlikely(!(s[0] & 0x40)))
+		goto invalid;
+#endif
+	
+	if (!(s[0] & 0x20))
+		*out_len = 2;
+	else
+	if (!(s[0] & 0x10))
+		*out_len = 3;
+	else
+	if (likely(!(s[0] & 8)))
+		*out_len = 4;
+	else
+		goto invalid;
+	
+	w = s[0] & ((2 << (6 - *out_len)) - 1);
+	for (int i = 1; i < *out_len; ++i)
+	{
+#ifdef STRICT_UTF8
+		if (unlikely((s[i] & 0xc0) != 0x80))
+			goto invalid;
+#endif
+		w = (w << 6) | (s[i] & 0x3f);
+	}
+	
+#if defined(STRICT_UTF8)
+	if (unlikely(w > 0x10FFFF))
+		goto invalid;
+	
+	// FIXME: UTF-8 requires smallest possible encoding; check it
+#endif
+	
+	return w;
+
+invalid:
+	*out_len = 1;
+	return REPLACEMENT_CHAR;
+}
+
+static
+void _utf8_test(const char *s, const wchar_t expected, int expectedlen)
+{
+	int len;
+	wchar_t r;
+	
+	r = utf8_decode(s, &len);
+	if (unlikely(r != expected || expectedlen != len))
+		applog(LOG_ERR, "UTF-8 test U+%06lX (len %d) failed: got U+%06lX (len %d)", (unsigned long)expected, expectedlen, (unsigned long)r, len);
+}
+#define _test_intrange(s, ...)  _test_intrange(s, (int[]){ __VA_ARGS__ })
+
+void utf8_test()
+{
+	_utf8_test("", 0, 1);
+	_utf8_test("\1", 1, 1);
+	_utf8_test("\x7f", 0x7f, 1);
+#if WCHAR_MAX >= 0x80
+	_utf8_test("\xc2\x80", 0x80, 2);
+#if WCHAR_MAX >= 0xff
+	_utf8_test("\xc3\xbf", 0xff, 2);
+#if WCHAR_MAX >= 0x7ff
+	_utf8_test("\xdf\xbf", 0x7ff, 2);
+#if WCHAR_MAX >= 0x800
+	_utf8_test("\xe0\xa0\x80", 0x800, 3);
+#if WCHAR_MAX >= 0xffff
+	_utf8_test("\xef\xbf\xbf", 0xffff, 3);
+#if WCHAR_MAX >= 0x10000
+	_utf8_test("\xf0\x90\x80\x80", 0x10000, 4);
+#if WCHAR_MAX >= 0x10ffff
+	_utf8_test("\xf4\x8f\xbf\xbf", 0x10ffff, 4);
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#ifdef STRICT_UTF8
+	_utf8_test("\x80", REPLACEMENT_CHAR, 1);
+	_utf8_test("\xbf", REPLACEMENT_CHAR, 1);
+	_utf8_test("\xfe", REPLACEMENT_CHAR, 1);
+	_utf8_test("\xff", REPLACEMENT_CHAR, 1);
+#endif
+}
+
+
 bool extract_sockaddr(char *url, char **sockaddr_url, char **sockaddr_port)
 bool extract_sockaddr(char *url, char **sockaddr_url, char **sockaddr_port)
 {
 {
 	char *url_begin, *url_end, *ipv6_begin, *ipv6_end, *port_start = NULL;
 	char *url_begin, *url_end, *ipv6_begin, *ipv6_end, *port_start = NULL;

+ 6 - 0
util.h

@@ -440,6 +440,12 @@ struct timeval *select_timeout(struct timeval *tvp_timeout, struct timeval *tvp_
 #define _SNP(...)  _SNP2(snprintf, __VA_ARGS__)
 #define _SNP(...)  _SNP2(snprintf, __VA_ARGS__)
 
 
 
 
+#define REPLACEMENT_CHAR (0xFFFD)
+extern int32_t utf8_decode(const void *, int *out_len);
+extern void utf8_test();
+
+
+
 #define RUNONCE(rv)  do {  \
 #define RUNONCE(rv)  do {  \
 	static bool _runonce = false;  \
 	static bool _runonce = false;  \
 	if (_runonce)  \
 	if (_runonce)  \