[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Mldonkey-commits] Changes to mldonkey/src/utils/lib/charset.ml
From: |
mldonkey-commits |
Subject: |
[Mldonkey-commits] Changes to mldonkey/src/utils/lib/charset.ml |
Date: |
Mon, 30 May 2005 17:03:00 -0400 |
Index: mldonkey/src/utils/lib/charset.ml
diff -u mldonkey/src/utils/lib/charset.ml:1.2
mldonkey/src/utils/lib/charset.ml:1.3
--- mldonkey/src/utils/lib/charset.ml:1.2 Wed Apr 13 21:52:27 2005
+++ mldonkey/src/utils/lib/charset.ml Mon May 30 21:02:56 2005
@@ -29,6 +29,8 @@
(*
*)
(**********************************************************************************)
+type uchar = int
+
type charset =
| ANSI_X3_4_1968 | ANSI_X3_4_1986 | ASCII | CP367 | IBM367 | ISO_IR_6 |
ISO646_US | ISO_646_IRV_1991 | US | US_ASCII | CSASCII
| UTF_8
@@ -165,6 +167,159 @@
(**********************************************************************************)
(*
*)
+(* utf8_get
*)
+(*
*)
+(**********************************************************************************)
+
+(* taken from camomile *)
+(* $Id: charset.ml,v 1.3 2005/05/30 21:02:56 spiralvoice Exp $ *)
+(* Copyright 2002, 2003 Yamagata Yoriyuki. distributed with LGPL *)
+
+let utf8_look s i =
+ let n' =
+ let n = Char.code s.[i] in
+ if n < 0x80 then n else
+ if n <= 0xdf then
+ (n - 0xc0) lsl 6 lor (0x7f land (Char.code s.[i + 1]))
+ else if n <= 0xef then
+ let n' = n - 0xe0 in
+ let m0 = Char.code s.[i + 2] in
+ let m = Char.code (String.unsafe_get s (i + 1)) in
+ let n' = n' lsl 6 lor (0x7f land m) in
+ n' lsl 6 lor (0x7f land m0)
+ else if n <= 0xf7 then
+ let n' = n - 0xf0 in
+ let m0 = Char.code s.[i + 3] in
+ let m = Char.code (String.unsafe_get s (i + 1)) in
+ let n' = n' lsl 6 lor (0x7f land m) in
+ let m = Char.code (String.unsafe_get s (i + 2)) in
+ let n' = n' lsl 6 lor (0x7f land m) in
+ n' lsl 6 lor (0x7f land m0)
+ else if n <= 0xfb then
+ let n' = n - 0xf8 in
+ let m0 = Char.code s.[i + 4] in
+ let m = Char.code (String.unsafe_get s (i + 1)) in
+ let n' = n' lsl 6 lor (0x7f land m) in
+ let m = Char.code (String.unsafe_get s (i + 2)) in
+ let n' = n' lsl 6 lor (0x7f land m) in
+ let m = Char.code (String.unsafe_get s (i + 3)) in
+ let n' = n' lsl 6 lor (0x7f land m) in
+ n' lsl 6 lor (0x7f land m0)
+ else if n <= 0xfd then
+ let n' = n - 0xfc in
+ let m0 = Char.code s.[i + 5] in
+ let m = Char.code (String.unsafe_get s (i + 1)) in
+ let n' = n' lsl 6 lor (0x7f land m) in
+ let m = Char.code (String.unsafe_get s (i + 2)) in
+ let n' = n' lsl 6 lor (0x7f land m) in
+ let m = Char.code (String.unsafe_get s (i + 3)) in
+ let n' = n' lsl 6 lor (0x7f land m) in
+ let m = Char.code (String.unsafe_get s (i + 4)) in
+ let n' = n' lsl 6 lor (0x7f land m) in
+ n' lsl 6 lor (0x7f land m0)
+ else invalid_arg "utf8_look"
+ in
+ if n' lsr 31 = 0 then n' else
+ invalid_arg "utf8_look char_of_uint"
+
+let rec search_head s i =
+ if i >= String.length s then i else
+ let n = Char.code (String.unsafe_get s i) in
+ if n < 0x80 || n >= 0xc2 then i else
+ search_head s (i + 1)
+
+let utf8_next s i =
+ let n = Char.code s.[i] in
+ if n < 0x80 then i + 1 else
+ if n < 0xc0 then search_head s (i + 1) else
+ if n <= 0xdf then i + 2
+ else if n <= 0xef then i + 3
+ else if n <= 0xf7 then i + 4
+ else if n <= 0xfb then i + 5
+ else if n <= 0xfd then i + 6
+ else invalid_arg "utf8_next"
+
+let rec nth_aux s i n =
+ if n = 0 then i else
+ nth_aux s (utf8_next s i) (n - 1)
+
+let nth s n = nth_aux s 0 n
+
+let utf8_get s n = utf8_look s (nth s n)
+
+(**********************************************************************************)
+(*
*)
+(* utf8_length
*)
+(*
*)
+(**********************************************************************************)
+
+(* taken from camomile *)
+(* $Id: charset.ml,v 1.3 2005/05/30 21:02:56 spiralvoice Exp $ *)
+(* Copyright 2002, 2003 Yamagata Yoriyuki. distributed with LGPL *)
+
+let rec length_aux s c i =
+ if i >= String.length s then c else
+ let n = Char.code (String.unsafe_get s i) in
+ let k =
+ if n < 0x80 then 1 else
+ if n < 0xc0 then invalid_arg "UTF8.length" else
+ if n < 0xe0 then 2 else
+ if n < 0xf0 then 3 else
+ if n < 0xf8 then 4 else
+ if n < 0xfc then 5 else
+ if n < 0xfe then 6 else
+ invalid_arg "UTF8.length" in
+ length_aux s (c + 1) (i + k)
+
+let utf8_length s = length_aux s 0 0
+
+(**********************************************************************************)
+(*
*)
+(* add_uchar (internal)
*)
+(*
*)
+(**********************************************************************************)
+
+
+(* taken from camomile *)
+(* $Id: charset.ml,v 1.3 2005/05/30 21:02:56 spiralvoice Exp $ *)
+(* Copyright 2002, 2003 Yamagata Yoriyuki. distributed with LGPL *)
+
+external uint_code : uchar -> int = "%identity"
+
+let add_uchar buf u =
+ let masq = 0b111111 in
+ let k = uint_code u in
+ if k < 0 || k >= 0x4000000 then begin
+ Buffer.add_char buf (Char.chr (0xfc + (k lsr 30)));
+ Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 24) land masq)));
+ Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 18) land masq)));
+ Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 12) land masq)));
+ Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 6) land masq)));
+ Buffer.add_char buf (Char.unsafe_chr (0x80 lor (k land masq)));
+ end else if k <= 0x7f then
+ Buffer.add_char buf (Char.unsafe_chr k)
+ else if k <= 0x7ff then begin
+ Buffer.add_char buf (Char.unsafe_chr (0xc0 lor (k lsr 6)));
+ Buffer.add_char buf (Char.unsafe_chr (0x80 lor (k land masq)))
+ end else if k <= 0xffff then begin
+ Buffer.add_char buf (Char.unsafe_chr (0xe0 lor (k lsr 12)));
+ Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 6) land masq)));
+ Buffer.add_char buf (Char.unsafe_chr (0x80 lor (k land masq)));
+ end else if k <= 0x1fffff then begin
+ Buffer.add_char buf (Char.unsafe_chr (0xf0 + (k lsr 18)));
+ Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 12) land masq)));
+ Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 6) land masq)));
+ Buffer.add_char buf (Char.unsafe_chr (0x80 lor (k land masq)));
+ end else begin
+ Buffer.add_char buf (Char.unsafe_chr (0xf8 + (k lsr 24)));
+ Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 18) land masq)));
+ Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 12) land masq)));
+ Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 6) land masq)));
+ Buffer.add_char buf (Char.unsafe_chr (0x80 lor (k land masq)));
+ end
+
+(**********************************************************************************)
+(*
*)
(* charset_to_string
*)
(*
*)
(**********************************************************************************)
@@ -1493,6 +1648,13 @@
with _ -> ASCII
let locstr =
+ Printf.printf "Blocking system signals until core is started...\n";
+ (* block signals until core started correctly *)
+ (MlUnix.set_signal Sys.sigint
+ (Sys.Signal_handle (fun _ -> ())));
+ (MlUnix.set_signal Sys.sigterm
+ (Sys.Signal_handle (fun _ -> ())));
+
let s = charset_to_string locale in
Printf.printf "Current locale of the target machine is %s\n" s;
flush stdout;
@@ -1656,16 +1818,22 @@
(**********************************************************************************)
(*
*)
-(* slow_encode
*)
+(* slow_encode_from_utf8
*)
(*
*)
(**********************************************************************************)
-let slow_encode s to_codeset =
+let slow_encode_from_utf8 s to_codeset =
let us = ref "" in
- let slen = String.length s in
+ let slen = utf8_length s in
+ let buf = Buffer.create 10 in
for i = 0 to (slen - 1) do
try
- us := !us ^ (convert_string (String.sub s i 1) to_codeset locstr)
+ let uchar = utf8_get s i in
+ add_uchar buf uchar;
+ let s' = Buffer.contents buf in
+ Buffer.reset buf;
+ let s' = convert_string s' to_codeset "UTF-8" in
+ us := !us ^ s'
with _ ->
us := !us ^ char_const
done;
@@ -1673,6 +1841,27 @@
(**********************************************************************************)
(*
*)
+(* slow_encode
*)
+(*
*)
+(**********************************************************************************)
+
+let slow_encode s to_codeset =
+ if is_utf8 s
+ then slow_encode_from_utf8 s to_codeset
+ else begin
+ let us = ref "" in
+ let slen = String.length s in
+ for i = 0 to (slen - 1) do
+ try
+ us := !us ^ (convert_string (String.sub s i 1) to_codeset locstr)
+ with _ ->
+ us := !us ^ char_const
+ done;
+ !us
+ end
+
+(**********************************************************************************)
+(*
*)
(* fast_encode
*)
(*
*)
(**********************************************************************************)
@@ -1719,20 +1908,9 @@
try
convert_string s locstr "UTF-8"
with _ ->
- begin
- let us = ref "" in
- let slen = String.length s in
- for i = 0 to (slen - 1) do
- try
- us := !us ^ (convert_string (String.sub s i 1) locstr
"UTF-8")
- with _ ->
- us := !us ^ char_const
- done;
- !us
- end
+ slow_encode_from_utf8 s locstr
end
end
-
let _ =
set_default_charset_list default_language
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Mldonkey-commits] Changes to mldonkey/src/utils/lib/charset.ml,
mldonkey-commits <=