Main Page   Modules   Data Structures   File List   Data Fields  

svn_utf.h

00001 /**
00002  * @copyright
00003  * ====================================================================
00004  * Copyright (c) 2000-2004 CollabNet.  All rights reserved.
00005  *
00006  * This software is licensed as described in the file COPYING, which
00007  * you should have received as part of this distribution.  The terms
00008  * are also available at http://subversion.tigris.org/license-1.html.
00009  * If newer versions of this license are posted there, you may use a
00010  * newer version instead, at your option.
00011  *
00012  * This software consists of voluntary contributions made by many
00013  * individuals.  For exact contribution history, see the revision
00014  * history and logs, available at http://subversion.tigris.org/.
00015  * ====================================================================
00016  * @endcopyright
00017  *
00018  * @file svn_utf.h
00019  * @brief UTF-8 conversion routines
00020  */
00021 
00022 
00023 
00024 #ifndef SVN_UTF_H
00025 #define SVN_UTF_H
00026 
00027 #include <apr_xlate.h>
00028 
00029 #include "svn_error.h"
00030 #include "svn_string.h"
00031 
00032 #ifdef __cplusplus
00033 extern "C" {
00034 #endif /* __cplusplus */
00035 
00036 
00037 /**
00038  * Initialize the UTF-8 encoding/decoding routines.
00039  * Allocate cached translation handles in a subpool of @a pool.
00040  *
00041  * @note It is optional to call this function, but if it is used, no other
00042  * svn function may be in use in other threads during the call of this
00043  * function or when @a pool is cleared or destroyed.
00044  * Initializing the UTF-8 routines will improve performance.
00045  *
00046  * @since New in 1.1.
00047  */
00048 void svn_utf_initialize (apr_pool_t *pool);
00049 
00050 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src;
00051  * allocate @a *dest in @a pool.
00052  */
00053 svn_error_t *svn_utf_stringbuf_to_utf8 (svn_stringbuf_t **dest,
00054                                         const svn_stringbuf_t *src,
00055                                         apr_pool_t *pool);
00056 
00057 
00058 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate
00059  * @a *dest in @a pool.
00060  */
00061 svn_error_t *svn_utf_string_to_utf8 (const svn_string_t **dest,
00062                                      const svn_string_t *src,
00063                                      apr_pool_t *pool);
00064 
00065 
00066 /** Set @a *dest to a utf8-encoded C string from native C string @a src;
00067  * allocate @a *dest in @a pool.
00068  */
00069 svn_error_t *svn_utf_cstring_to_utf8 (const char **dest,
00070                                       const char *src,
00071                                       apr_pool_t *pool);
00072 
00073 
00074 /** Set @a *dest to a utf8-encoded C string from @a frompage C string
00075  * @a src; allocate @a *dest in @a pool.  Use @a convset_key as the
00076  * cache key for the charset converter; if it's NULL, don't cache the
00077  * converter.
00078  */
00079 svn_error_t *svn_utf_cstring_to_utf8_ex (const char **dest,
00080                                          const char *src,
00081                                          const char *frompage,
00082                                          const char *convset_key,
00083                                          apr_pool_t *pool);
00084 
00085 
00086 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src;
00087  * allocate @a *dest in @a pool.
00088  */
00089 svn_error_t *svn_utf_stringbuf_from_utf8 (svn_stringbuf_t **dest,
00090                                           const svn_stringbuf_t *src,
00091                                           apr_pool_t *pool);
00092 
00093 
00094 /** Set @a *dest to a natively-encoded string from utf8 string @a src;
00095  * allocate @a *dest in @a pool.
00096  */
00097 svn_error_t *svn_utf_string_from_utf8 (const svn_string_t **dest,
00098                                        const svn_string_t *src,
00099                                        apr_pool_t *pool);
00100 
00101 
00102 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src;
00103  * allocate @a *dest in @a pool.
00104  */
00105 svn_error_t *svn_utf_cstring_from_utf8 (const char **dest,
00106                                         const char *src,
00107                                         apr_pool_t *pool);
00108 
00109 
00110 /** Set @a *dest to a @a frompage encoded C string from utf8 C string
00111  * @a src; allocate @a *dest in @a pool.  Use @a convset_key as the
00112  * cache key for the charset converter; if it's NULL, don't cache the
00113  * converter.
00114  */
00115 svn_error_t *svn_utf_cstring_from_utf8_ex (const char **dest,
00116                                            const char *src,
00117                                            const char *topage,
00118                                            const char *convset_key,
00119                                            apr_pool_t *pool);
00120 
00121 
00122 /** Return a fuzzily native-encoded C string from utf8 C string @a src,
00123  * allocated in @a pool.  A fuzzy recoding leaves all 7-bit ascii
00124  * characters the same, and substitutes "?\\XXX" for others, where XXX
00125  * is the unsigned decimal code for that character.
00126  *
00127  * This function cannot error; it is guaranteed to return something.
00128  * First it will recode as described above and then attempt to convert
00129  * the (new) 7-bit UTF-8 string to native encoding.  If that fails, it
00130  * will return the raw fuzzily recoded string, which may or may not be
00131  * meaningful in the client's locale, but is (presumably) better than
00132  * nothing.
00133  *
00134  * ### Notes:
00135  *
00136  * Improvement is possible, even imminent.  The original problem was
00137  * that if you converted a UTF-8 string (say, a log message) into a
00138  * locale that couldn't represent all the characters, you'd just get a
00139  * static placeholder saying "[unconvertible log message]".  Then
00140  * Justin Erenkrantz pointed out how on platforms that didn't support
00141  * conversion at all, "svn log" would still fail completely when it
00142  * encountered unconvertible data.
00143  *
00144  * Now for both cases, the caller can at least fall back on this
00145  * function, which converts the message as best it can, substituting
00146  * ?\\XXX escape codes for the non-ascii characters.
00147  *
00148  * Ultimately, some callers may prefer the iconv "//TRANSLIT" option,
00149  * so when we can detect that at configure time, things will change.
00150  * Also, this should (?) be moved to apr/apu eventually.
00151  *
00152  * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for
00153  * details.
00154  */
00155 const char *svn_utf_cstring_from_utf8_fuzzy (const char *src,
00156                                              apr_pool_t *pool);
00157 
00158 
00159 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src;
00160  * allocate @a *dest in @a pool.
00161  */
00162 svn_error_t *svn_utf_cstring_from_utf8_stringbuf (const char **dest,
00163                                                   const svn_stringbuf_t *src,
00164                                                   apr_pool_t *pool);
00165 
00166 
00167 /** Set @a *dest to a natively-encoded C string from utf8 string @a src;
00168  * allocate @a *dest in @a pool.
00169  */
00170 svn_error_t *svn_utf_cstring_from_utf8_string (const char **dest,
00171                                                const svn_string_t *src,
00172                                                apr_pool_t *pool);
00173 
00174 #ifdef __cplusplus
00175 }
00176 #endif /* __cplusplus */
00177 
00178 #endif /* SVN_UTF_H */

Generated on Wed Jun 7 11:02:15 2006 for Subversion by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002