00001 /** 00002 * @copyright 00003 * ==================================================================== 00004 * Copyright (c) 2000-2004 CollabNet. All rights reserved. 00005 * 00006 * This software is licensed as described in the file COPYING, which 00007 * you should have received as part of this distribution. The terms 00008 * are also available at http://subversion.tigris.org/license-1.html. 00009 * If newer versions of this license are posted there, you may use a 00010 * newer version instead, at your option. 00011 * 00012 * This software consists of voluntary contributions made by many 00013 * individuals. For exact contribution history, see the revision 00014 * history and logs, available at http://subversion.tigris.org/. 00015 * ==================================================================== 00016 * @endcopyright 00017 * 00018 * @file svn_xml.h 00019 * @brief XML code shared by various Subversion libraries. 00020 */ 00021 00022 00023 00024 #ifndef SVN_XML_H 00025 #define SVN_XML_H 00026 00027 #include <apr.h> 00028 #include <apr_pools.h> 00029 #include <apr_hash.h> 00030 00031 #include "svn_error.h" 00032 #include "svn_string.h" 00033 00034 #ifdef __cplusplus 00035 extern "C" { 00036 #endif /* __cplusplus */ 00037 00038 /** The namespace all Subversion XML uses. */ 00039 #define SVN_XML_NAMESPACE "svn:" 00040 00041 /** Used as style argument to svn_xml_make_open_tag() and friends. */ 00042 enum svn_xml_open_tag_style { 00043 /** <tag ...> */ 00044 svn_xml_normal = 1, 00045 00046 /** <tag ...>, no cosmetic newline */ 00047 svn_xml_protect_pcdata, 00048 00049 /** <tag .../> */ 00050 svn_xml_self_closing 00051 }; 00052 00053 00054 00055 /** Determine if a string of character @a data of length @a len is a 00056 * safe bet for use with the svn_xml_escape_* functions found in this 00057 * header. 00058 * 00059 * Return @c TRUE if it is, @c FALSE otherwise. 00060 * 00061 * Essentially, this function exists to determine whether or not 00062 * simply running a string of bytes through the Subversion XML escape 00063 * routines will produce legitimate XML. It should only be necessary 00064 * for data which might contain bytes that cannot be safely encoded 00065 * into XML (certain control characters, for example). 00066 */ 00067 svn_boolean_t svn_xml_is_xml_safe (const char *data, 00068 apr_size_t len); 00069 00070 00071 /** Create or append in @a *outstr an xml-escaped version of @a string, 00072 * suitable for output as character data. 00073 * 00074 * If @a *outstr is @c NULL, store a new stringbuf, else append to the 00075 * existing stringbuf there. 00076 */ 00077 void svn_xml_escape_cdata_stringbuf (svn_stringbuf_t **outstr, 00078 const svn_stringbuf_t *string, 00079 apr_pool_t *pool); 00080 00081 /** Same as svn_xml_escape_cdata_stringbuf(), but @a string is an 00082 * @c svn_string_t. 00083 */ 00084 void svn_xml_escape_cdata_string (svn_stringbuf_t **outstr, 00085 const svn_string_t *string, 00086 apr_pool_t *pool); 00087 00088 /** Same as svn_xml_escape_cdata_stringbuf(), but @a string is a 00089 * null-terminated C string. 00090 */ 00091 void svn_xml_escape_cdata_cstring (svn_stringbuf_t **outstr, 00092 const char *string, 00093 apr_pool_t *pool); 00094 00095 00096 /** Create or append in @a *outstr an xml-escaped version of @a string, 00097 * suitable for output as an attribute value. 00098 * 00099 * If @a *outstr is @c NULL, store a new stringbuf, else append to the 00100 * existing stringbuf there. 00101 */ 00102 void svn_xml_escape_attr_stringbuf (svn_stringbuf_t **outstr, 00103 const svn_stringbuf_t *string, 00104 apr_pool_t *pool); 00105 00106 /** Same as svn_xml_escape_attr_stringbuf(), but @a string is an 00107 * @c svn_string_t. 00108 */ 00109 void svn_xml_escape_attr_string (svn_stringbuf_t **outstr, 00110 const svn_string_t *string, 00111 apr_pool_t *pool); 00112 00113 /** Same as svn_xml_escape_attr_stringbuf(), but @a string is a 00114 * null-terminated C string. 00115 */ 00116 void svn_xml_escape_attr_cstring (svn_stringbuf_t **outstr, 00117 const char *string, 00118 apr_pool_t *pool); 00119 00120 /** 00121 * Return UTF-8 string @a string if it contains no characters that are 00122 * unrepresentable in XML. Else, return a copy of @a string, 00123 * allocated in @a pool, with each unrepresentable character replaced 00124 * by "?\uuu", where "uuu" is the three-digit unsigned decimal value 00125 * of that character. 00126 * 00127 * Neither the input nor the output need be valid XML; however, the 00128 * output can always be safely XML-escaped. 00129 * 00130 * @note The current implementation treats all Unicode characters as 00131 * representable, except for most ASCII control characters (the 00132 * exceptions being CR, LF, and TAB, which are valid in XML). There 00133 * may be other UTF-8 characters that are invalid in XML; see 00134 * http://subversion.tigris.org/servlets/ReadMsg?list=dev&msgNo=90591 00135 * and its thread for details. 00136 * 00137 * @since New in 1.2. 00138 */ 00139 const char *svn_xml_fuzzy_escape (const char *string, 00140 apr_pool_t *pool); 00141 00142 00143 /*---------------------------------------------------------------*/ 00144 00145 /* Generalized Subversion XML Parsing */ 00146 00147 /** A generalized Subversion XML parser object */ 00148 typedef struct svn_xml_parser_t svn_xml_parser_t; 00149 00150 typedef void (*svn_xml_start_elem)(void *baton, 00151 const char *name, 00152 const char **atts); 00153 00154 typedef void (*svn_xml_end_elem)(void *baton, const char *name); 00155 00156 /* data is not NULL-terminated. */ 00157 typedef void (*svn_xml_char_data)(void *baton, 00158 const char *data, 00159 apr_size_t len); 00160 00161 00162 /** Create a general Subversion XML parser */ 00163 svn_xml_parser_t *svn_xml_make_parser (void *baton, 00164 svn_xml_start_elem start_handler, 00165 svn_xml_end_elem end_handler, 00166 svn_xml_char_data data_handler, 00167 apr_pool_t *pool); 00168 00169 00170 /** Free a general Subversion XML parser */ 00171 void svn_xml_free_parser (svn_xml_parser_t *svn_parser); 00172 00173 00174 /** Push @a len bytes of xml data in @a buf at @a svn_parser. 00175 * 00176 * If this is the final push, @a is_final must be set. 00177 * 00178 * An error will be returned if there was a syntax problem in the XML, 00179 * or if any of the callbacks set an error using 00180 * svn_xml_signal_bailout(). 00181 * 00182 * If an error is returned, the @c svn_xml_parser_t will have been freed 00183 * automatically, so the caller should not call svn_xml_free_parser(). 00184 */ 00185 svn_error_t *svn_xml_parse (svn_xml_parser_t *parser, 00186 const char *buf, 00187 apr_size_t len, 00188 svn_boolean_t is_final); 00189 00190 00191 00192 /** The way to officially bail out of xml parsing. 00193 * 00194 * Store @a error in @a svn_parser and set all expat callbacks to @c NULL. 00195 */ 00196 void svn_xml_signal_bailout (svn_error_t *error, 00197 svn_xml_parser_t *svn_parser); 00198 00199 00200 00201 00202 00203 /*** Helpers for dealing with the data Expat gives us. ***/ 00204 00205 /** Return the value associated with @a name in expat attribute array @a atts, 00206 * else return @c NULL. 00207 * 00208 * (There could never be a @c NULL attribute value in the XML, 00209 * although the empty string is possible.) 00210 * 00211 * @a atts is an array of c-strings: even-numbered indexes are names, 00212 * odd-numbers hold values. If all is right, it should end on an 00213 * even-numbered index pointing to @c NULL. 00214 */ 00215 const char *svn_xml_get_attr_value (const char *name, const char **atts); 00216 00217 00218 00219 /* Converting between Expat attribute lists and APR hash tables. */ 00220 00221 00222 /** Create an attribute hash from @c va_list @a ap. 00223 * 00224 * The contents of @a ap are alternating <tt>char *</tt> keys and 00225 * <tt>char *</tt> vals, terminated by a final @c NULL falling on an 00226 * odd index (zero-based). 00227 */ 00228 apr_hash_t *svn_xml_ap_to_hash (va_list ap, apr_pool_t *pool); 00229 00230 /** Create a hash that corresponds to Expat xml attribute list @a atts. 00231 * 00232 * The hash's keys and values are <tt>char *</tt>'s. 00233 * 00234 * @a atts may be null, in which case you just get an empty hash back 00235 * (this makes life more convenient for some callers). 00236 */ 00237 apr_hash_t *svn_xml_make_att_hash (const char **atts, apr_pool_t *pool); 00238 00239 00240 /** Like svn_xml_make_att_hash(), but takes a hash and preserves any 00241 * key/value pairs already in it. 00242 */ 00243 void svn_xml_hash_atts_preserving (const char **atts, 00244 apr_hash_t *ht, 00245 apr_pool_t *pool); 00246 00247 /** Like svn_xml_make_att_hash(), but takes a hash and overwrites 00248 * key/value pairs already in it that also appear in @a atts. 00249 */ 00250 void svn_xml_hash_atts_overlaying (const char **atts, 00251 apr_hash_t *ht, 00252 apr_pool_t *pool); 00253 00254 00255 00256 /* Printing XML */ 00257 00258 /** Create an XML header and return it in @a *str. 00259 * 00260 * Fully-formed XML documents should start out with a header, 00261 * something like 00262 * <?xml version="1.0" encoding="utf-8"?> 00263 * 00264 * This function returns such a header. @a *str must either be @c NULL, in 00265 * which case a new string is created, or it must point to an existing 00266 * string to be appended to. 00267 */ 00268 void svn_xml_make_header (svn_stringbuf_t **str, apr_pool_t *pool); 00269 00270 00271 /** Store a new xml tag @a tagname in @a *str. 00272 * 00273 * If @a str is @c NULL, allocate @a *str in @a pool; else append the new 00274 * tag to @a *str, allocating in @a str's pool 00275 * 00276 * Take the tag's attributes from varargs, a null-terminated list of 00277 * alternating <tt>char *</tt> key and <tt>char *</tt> val. Do xml-escaping 00278 * on each val. 00279 * 00280 * @a style is one of the enumerated styles in @c svn_xml_open_tag_style. 00281 */ 00282 void svn_xml_make_open_tag (svn_stringbuf_t **str, 00283 apr_pool_t *pool, 00284 enum svn_xml_open_tag_style style, 00285 const char *tagname, 00286 ...); 00287 00288 00289 /** Like svn_xml_make_open_tag(), but takes a @c va_list instead of being 00290 * variadic. 00291 */ 00292 void svn_xml_make_open_tag_v (svn_stringbuf_t **str, 00293 apr_pool_t *pool, 00294 enum svn_xml_open_tag_style style, 00295 const char *tagname, 00296 va_list ap); 00297 00298 00299 /** Like svn_xml_make_open_tag(), but takes a hash table of attributes 00300 * (<tt>char *</tt> keys mapping to <tt>char *</tt> values). 00301 * 00302 * You might ask, why not just provide svn_xml_make_tag_atts()? 00303 * 00304 * The reason is that a hash table is the most natural interface to an 00305 * attribute list; the fact that Expat uses <tt>char **</tt> atts instead is 00306 * certainly a defensible implementation decision, but since we'd have 00307 * to have special code to support such lists throughout Subversion 00308 * anyway, we might as well write that code for the natural interface 00309 * (hashes) and then convert in the few cases where conversion is 00310 * needed. Someday it might even be nice to change expat-lite to work 00311 * with apr hashes. 00312 * 00313 * See conversion functions svn_xml_make_att_hash() and 00314 * svn_xml_make_att_hash_overlaying(). Callers should use those to 00315 * convert Expat attr lists into hashes when necessary. 00316 */ 00317 void svn_xml_make_open_tag_hash (svn_stringbuf_t **str, 00318 apr_pool_t *pool, 00319 enum svn_xml_open_tag_style style, 00320 const char *tagname, 00321 apr_hash_t *attributes); 00322 00323 00324 /** Makes a close tag. */ 00325 void svn_xml_make_close_tag (svn_stringbuf_t **str, 00326 apr_pool_t *pool, 00327 const char *tagname); 00328 00329 00330 00331 #ifdef __cplusplus 00332 } 00333 #endif /* __cplusplus */ 00334 00335 #endif /* SVN_XML_H */