00001 /** 00002 * @copyright 00003 * ==================================================================== 00004 * Copyright (c) 2000-2004 CollabNet. All rights reserved. 00005 * 00006 * This software is licensed as described in the file COPYING, which 00007 * you should have received as part of this distribution. The terms 00008 * are also available at http://subversion.tigris.org/license-1.html. 00009 * If newer versions of this license are posted there, you may use a 00010 * newer version instead, at your option. 00011 * 00012 * This software consists of voluntary contributions made by many 00013 * individuals. For exact contribution history, see the revision 00014 * history and logs, available at http://subversion.tigris.org/. 00015 * ==================================================================== 00016 * @endcopyright 00017 * 00018 * @file svn_utf.h 00019 * @brief UTF-8 conversion routines 00020 */ 00021 00022 00023 00024 #ifndef SVN_UTF_H 00025 #define SVN_UTF_H 00026 00027 #include <apr_xlate.h> 00028 00029 #include "svn_error.h" 00030 #include "svn_string.h" 00031 00032 #ifdef __cplusplus 00033 extern "C" { 00034 #endif /* __cplusplus */ 00035 00036 00037 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src; 00038 * allocate @a *dest in @a pool. 00039 */ 00040 svn_error_t *svn_utf_stringbuf_to_utf8 (svn_stringbuf_t **dest, 00041 const svn_stringbuf_t *src, 00042 apr_pool_t *pool); 00043 00044 00045 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate 00046 * @a *dest in @a pool. 00047 */ 00048 svn_error_t *svn_utf_string_to_utf8 (const svn_string_t **dest, 00049 const svn_string_t *src, 00050 apr_pool_t *pool); 00051 00052 00053 /** Set @a *dest to a utf8-encoded C string from native C string @a src; 00054 * allocate @a *dest in @a pool. 00055 */ 00056 svn_error_t *svn_utf_cstring_to_utf8 (const char **dest, 00057 const char *src, 00058 apr_pool_t *pool); 00059 00060 00061 /** Set @a *dest to a utf8-encoded C string from @a frompage C string 00062 * @a src; allocate @a *dest in @a pool. Use @a convset_key as the 00063 * cache key for the charset converter; if it's NULL, don't cache the 00064 * converter. 00065 */ 00066 svn_error_t *svn_utf_cstring_to_utf8_ex (const char **dest, 00067 const char *src, 00068 const char *frompage, 00069 const char *convset_key, 00070 apr_pool_t *pool); 00071 00072 00073 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src; 00074 * allocate @a *dest in @a pool. 00075 */ 00076 svn_error_t *svn_utf_stringbuf_from_utf8 (svn_stringbuf_t **dest, 00077 const svn_stringbuf_t *src, 00078 apr_pool_t *pool); 00079 00080 00081 /** Set @a *dest to a natively-encoded string from utf8 string @a src; 00082 * allocate @a *dest in @a pool. 00083 */ 00084 svn_error_t *svn_utf_string_from_utf8 (const svn_string_t **dest, 00085 const svn_string_t *src, 00086 apr_pool_t *pool); 00087 00088 00089 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src; 00090 * allocate @a *dest in @a pool. 00091 */ 00092 svn_error_t *svn_utf_cstring_from_utf8 (const char **dest, 00093 const char *src, 00094 apr_pool_t *pool); 00095 00096 00097 /** Set @a *dest to a @a frompage encoded C string from utf8 C string 00098 * @a src; allocate @a *dest in @a pool. Use @a convset_key as the 00099 * cache key for the charset converter; if it's NULL, don't cache the 00100 * converter. 00101 */ 00102 svn_error_t *svn_utf_cstring_from_utf8_ex (const char **dest, 00103 const char *src, 00104 const char *topage, 00105 const char *convset_key, 00106 apr_pool_t *pool); 00107 00108 00109 /** Return a fuzzily native-encoded C string from utf8 C string @a src, 00110 * allocated in @a pool. A fuzzy recoding leaves all 7-bit ascii 00111 * characters the same, and substitutes "?\\XXX" for others, where XXX 00112 * is the unsigned decimal code for that character. 00113 * 00114 * This function cannot error; it is guaranteed to return something. 00115 * First it will recode as described above and then attempt to convert 00116 * the (new) 7-bit UTF-8 string to native encoding. If that fails, it 00117 * will return the raw fuzzily recoded string, which may or may not be 00118 * meaningful in the client's locale, but is (presumably) better than 00119 * nothing. 00120 * 00121 * ### Notes: 00122 * 00123 * Improvement is possible, even imminent. The original problem was 00124 * that if you converted a UTF-8 string (say, a log message) into a 00125 * locale that couldn't represent all the characters, you'd just get a 00126 * static placeholder saying "[unconvertible log message]". Then 00127 * Justin Erenkrantz pointed out how on platforms that didn't support 00128 * conversion at all, "svn log" would still fail completely when it 00129 * encountered unconvertible data. 00130 * 00131 * Now for both cases, the caller can at least fall back on this 00132 * function, which converts the message as best it can, substituting 00133 * ?\\XXX escape codes for the non-ascii characters. 00134 * 00135 * Ultimately, some callers may prefer the iconv "//TRANSLIT" option, 00136 * so when we can detect that at configure time, things will change. 00137 * Also, this should (?) be moved to apr/apu eventually. 00138 * 00139 * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for 00140 * details. 00141 */ 00142 const char *svn_utf_cstring_from_utf8_fuzzy (const char *src, 00143 apr_pool_t *pool); 00144 00145 00146 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src; 00147 * allocate @a *dest in @a pool. 00148 */ 00149 svn_error_t *svn_utf_cstring_from_utf8_stringbuf (const char **dest, 00150 const svn_stringbuf_t *src, 00151 apr_pool_t *pool); 00152 00153 00154 /** Set @a *dest to a natively-encoded C string from utf8 string @a src; 00155 * allocate @a *dest in @a pool. 00156 */ 00157 svn_error_t *svn_utf_cstring_from_utf8_string (const char **dest, 00158 const svn_string_t *src, 00159 apr_pool_t *pool); 00160 00161 #ifdef __cplusplus 00162 } 00163 #endif /* __cplusplus */ 00164 00165 #endif /* SVN_XML_H */