00001 /** 00002 * @copyright 00003 * ==================================================================== 00004 * Copyright (c) 2000-2004 CollabNet. All rights reserved. 00005 * 00006 * This software is licensed as described in the file COPYING, which 00007 * you should have received as part of this distribution. The terms 00008 * are also available at http://subversion.tigris.org/license-1.html. 00009 * If newer versions of this license are posted there, you may use a 00010 * newer version instead, at your option. 00011 * 00012 * This software consists of voluntary contributions made by many 00013 * individuals. For exact contribution history, see the revision 00014 * history and logs, available at http://subversion.tigris.org/. 00015 * ==================================================================== 00016 * @endcopyright 00017 * 00018 * @file svn_utf.h 00019 * @brief UTF-8 conversion routines 00020 */ 00021 00022 00023 00024 #ifndef SVN_UTF_H 00025 #define SVN_UTF_H 00026 00027 #include <apr_xlate.h> 00028 00029 #include "svn_error.h" 00030 #include "svn_string.h" 00031 00032 #ifdef __cplusplus 00033 extern "C" { 00034 #endif /* __cplusplus */ 00035 00036 00037 /** 00038 * @since New in 1.1. 00039 * 00040 * Initialize the UTF-8 encoding/decoding routines. 00041 * Allocate cached translation handles in a subpool of @a pool. 00042 * 00043 * @note It is optional to call this function, but if it is used, no other 00044 * svn function may be in use in other threads during the call of this 00045 * function or when @a pool is cleared or destroyed. 00046 * Initializing the UTF-8 routines will improve performance. 00047 */ 00048 void svn_utf_initialize (apr_pool_t *pool); 00049 00050 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src; 00051 * allocate @a *dest in @a pool. 00052 */ 00053 svn_error_t *svn_utf_stringbuf_to_utf8 (svn_stringbuf_t **dest, 00054 const svn_stringbuf_t *src, 00055 apr_pool_t *pool); 00056 00057 00058 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate 00059 * @a *dest in @a pool. 00060 */ 00061 svn_error_t *svn_utf_string_to_utf8 (const svn_string_t **dest, 00062 const svn_string_t *src, 00063 apr_pool_t *pool); 00064 00065 00066 /** Set @a *dest to a utf8-encoded C string from native C string @a src; 00067 * allocate @a *dest in @a pool. 00068 */ 00069 svn_error_t *svn_utf_cstring_to_utf8 (const char **dest, 00070 const char *src, 00071 apr_pool_t *pool); 00072 00073 00074 /** Set @a *dest to a utf8-encoded C string from @a frompage C string 00075 * @a src; allocate @a *dest in @a pool. Use @a convset_key as the 00076 * cache key for the charset converter; if it's NULL, don't cache the 00077 * converter. 00078 */ 00079 svn_error_t *svn_utf_cstring_to_utf8_ex (const char **dest, 00080 const char *src, 00081 const char *frompage, 00082 const char *convset_key, 00083 apr_pool_t *pool); 00084 00085 00086 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src; 00087 * allocate @a *dest in @a pool. 00088 */ 00089 svn_error_t *svn_utf_stringbuf_from_utf8 (svn_stringbuf_t **dest, 00090 const svn_stringbuf_t *src, 00091 apr_pool_t *pool); 00092 00093 00094 /** Set @a *dest to a natively-encoded string from utf8 string @a src; 00095 * allocate @a *dest in @a pool. 00096 */ 00097 svn_error_t *svn_utf_string_from_utf8 (const svn_string_t **dest, 00098 const svn_string_t *src, 00099 apr_pool_t *pool); 00100 00101 00102 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src; 00103 * allocate @a *dest in @a pool. 00104 */ 00105 svn_error_t *svn_utf_cstring_from_utf8 (const char **dest, 00106 const char *src, 00107 apr_pool_t *pool); 00108 00109 00110 /** Set @a *dest to a @a frompage encoded C string from utf8 C string 00111 * @a src; allocate @a *dest in @a pool. Use @a convset_key as the 00112 * cache key for the charset converter; if it's NULL, don't cache the 00113 * converter. 00114 */ 00115 svn_error_t *svn_utf_cstring_from_utf8_ex (const char **dest, 00116 const char *src, 00117 const char *topage, 00118 const char *convset_key, 00119 apr_pool_t *pool); 00120 00121 00122 /** Return a fuzzily native-encoded C string from utf8 C string @a src, 00123 * allocated in @a pool. A fuzzy recoding leaves all 7-bit ascii 00124 * characters the same, and substitutes "?\\XXX" for others, where XXX 00125 * is the unsigned decimal code for that character. 00126 * 00127 * This function cannot error; it is guaranteed to return something. 00128 * First it will recode as described above and then attempt to convert 00129 * the (new) 7-bit UTF-8 string to native encoding. If that fails, it 00130 * will return the raw fuzzily recoded string, which may or may not be 00131 * meaningful in the client's locale, but is (presumably) better than 00132 * nothing. 00133 * 00134 * ### Notes: 00135 * 00136 * Improvement is possible, even imminent. The original problem was 00137 * that if you converted a UTF-8 string (say, a log message) into a 00138 * locale that couldn't represent all the characters, you'd just get a 00139 * static placeholder saying "[unconvertible log message]". Then 00140 * Justin Erenkrantz pointed out how on platforms that didn't support 00141 * conversion at all, "svn log" would still fail completely when it 00142 * encountered unconvertible data. 00143 * 00144 * Now for both cases, the caller can at least fall back on this 00145 * function, which converts the message as best it can, substituting 00146 * ?\\XXX escape codes for the non-ascii characters. 00147 * 00148 * Ultimately, some callers may prefer the iconv "//TRANSLIT" option, 00149 * so when we can detect that at configure time, things will change. 00150 * Also, this should (?) be moved to apr/apu eventually. 00151 * 00152 * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for 00153 * details. 00154 */ 00155 const char *svn_utf_cstring_from_utf8_fuzzy (const char *src, 00156 apr_pool_t *pool); 00157 00158 00159 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src; 00160 * allocate @a *dest in @a pool. 00161 */ 00162 svn_error_t *svn_utf_cstring_from_utf8_stringbuf (const char **dest, 00163 const svn_stringbuf_t *src, 00164 apr_pool_t *pool); 00165 00166 00167 /** Set @a *dest to a natively-encoded C string from utf8 string @a src; 00168 * allocate @a *dest in @a pool. 00169 */ 00170 svn_error_t *svn_utf_cstring_from_utf8_string (const char **dest, 00171 const svn_string_t *src, 00172 apr_pool_t *pool); 00173 00174 #ifdef __cplusplus 00175 } 00176 #endif /* __cplusplus */ 00177 00178 #endif /* SVN_UTF_H */