00001 /** 00002 * @copyright 00003 * ==================================================================== 00004 * Copyright (c) 2000-2004 CollabNet. All rights reserved. 00005 * 00006 * This software is licensed as described in the file COPYING, which 00007 * you should have received as part of this distribution. The terms 00008 * are also available at http://subversion.tigris.org/license-1.html. 00009 * If newer versions of this license are posted there, you may use a 00010 * newer version instead, at your option. 00011 * 00012 * This software consists of voluntary contributions made by many 00013 * individuals. For exact contribution history, see the revision 00014 * history and logs, available at http://subversion.tigris.org/. 00015 * ==================================================================== 00016 * @endcopyright 00017 * 00018 * @file svn_path.h 00019 * @brief A path manipulation library 00020 * 00021 * All incoming and outgoing paths are non-null and in UTF-8, unless 00022 * otherwise documented. 00023 * 00024 * No result path ever ends with a separator, no matter whether the 00025 * path is a file or directory, because we always canonicalize() it. 00026 * 00027 * All paths passed to the @c svn_path_xxx functions, with the exceptions of 00028 * the @c svn_path_canonicalize and @c svn_path_internal_style functions, must 00029 * be in canonical form. 00030 * 00031 * todo: this library really needs a test suite! 00032 */ 00033 00034 #ifndef SVN_PATH_H 00035 #define SVN_PATH_H 00036 00037 00038 #include <apr_pools.h> 00039 #include <apr_tables.h> 00040 00041 #include "svn_string.h" 00042 #include "svn_error.h" 00043 00044 00045 #ifdef __cplusplus 00046 extern "C" { 00047 #endif /* __cplusplus */ 00048 00049 00050 00051 /** Convert @a path from the local style to the canonical internal style. */ 00052 const char *svn_path_internal_style (const char *path, apr_pool_t *pool); 00053 00054 /** Convert @a path from the canonical internal style to the local style. */ 00055 const char *svn_path_local_style (const char *path, apr_pool_t *pool); 00056 00057 00058 /** Join a base path (@a base) with a component (@a component), allocated in 00059 * @a pool. 00060 * 00061 * If either @a base or @a component is the empty path, then the other 00062 * argument will be copied and returned. If both are the empty path the 00063 * empty path is returned. 00064 * 00065 * If the @a component is an absolute path, then it is copied and returned. 00066 * Exactly one slash character ('/') is used to joined the components, 00067 * accounting for any trailing slash in @a base. 00068 * 00069 * Note that the contents of @a base are not examined, so it is possible to 00070 * use this function for constructing URLs, or for relative URLs or 00071 * repository paths. 00072 * 00073 * This function is NOT appropriate for native (local) file paths. Only 00074 * for "internal" paths, since it uses '/' for the separator. Further, 00075 * an absolute path (for @a component) is based on a leading '/' character. 00076 * Thus, an "absolute URI" for the @a component won't be detected. An 00077 * absolute URI can only be used for the base. 00078 */ 00079 char *svn_path_join (const char *base, 00080 const char *component, 00081 apr_pool_t *pool); 00082 00083 /** Join multiple components onto a @a base path, allocated in @a pool. The 00084 * components are terminated by a @c NULL. 00085 * 00086 * If any component is the empty string, it will be ignored. 00087 * 00088 * If any component is an absolute path, then it resets the base and 00089 * further components will be appended to it. 00090 * 00091 * See @c svn_path_join() for further notes about joining paths. 00092 */ 00093 char *svn_path_join_many (apr_pool_t *pool, const char *base, ...); 00094 00095 00096 /** Get the basename of the specified @a path. The basename is defined as 00097 * the last component of the path (ignoring any trailing slashes). If 00098 * the @a path is root ("/"), then that is returned. Otherwise, the 00099 * returned value will have no slashes in it. 00100 * 00101 * Example: svn_path_basename("/foo/bar") -> "bar" 00102 * 00103 * The returned basename will be allocated in @a pool. 00104 * 00105 * Note: if an empty string is passed, then an empty string will be returned. 00106 */ 00107 char *svn_path_basename (const char *path, apr_pool_t *pool); 00108 00109 /** Get the dirname of the specified @a path, defined as the path with its 00110 * basename removed. 00111 * 00112 * Get the dirname of the specified @a path, defined as the path with its 00113 * basename removed. If @a path is root ("/"), it is returned unchanged. 00114 * 00115 * The returned dirname will be allocated in @a pool. 00116 */ 00117 char *svn_path_dirname (const char *path, apr_pool_t *pool); 00118 00119 /** Add a @a component (a null-terminated C-string) to @a path. @a component 00120 * is allowed to contain directory separators. 00121 * 00122 * If @a path is non-empty, append the appropriate directory separator 00123 * character, and then @a component. If @a path is empty, simply set it to 00124 * @a component; don't add any separator character. 00125 * 00126 * If the result ends in a separator character, then remove the separator. 00127 */ 00128 void svn_path_add_component (svn_stringbuf_t *path, 00129 const char *component); 00130 00131 /** Remove one component off the end of @a path. */ 00132 void svn_path_remove_component (svn_stringbuf_t *path); 00133 00134 00135 /** Divide @a path into @a *dirpath and @a *base_name, allocated in @a pool. 00136 * 00137 * If @a dirpath or @a base_name is null, then don't set that one. 00138 * 00139 * Either @a dirpath or @a base_name may be @a path's own address, but they 00140 * may not both be the same address, or the results are undefined. 00141 * 00142 * If @a path has two or more components, the separator between @a dirpath 00143 * and @a base_name is not included in either of the new names. 00144 * 00145 * examples: 00146 * - <pre>"/foo/bar/baz" ==> "/foo/bar" and "baz"</pre> 00147 * - <pre>"/bar" ==> "/" and "bar"</pre> 00148 * - <pre>"/" ==> "/" and "/"</pre> 00149 * - <pre>"bar" ==> "" and "bar"</pre> 00150 * - <pre>"" ==> "" and ""</pre> 00151 */ 00152 void svn_path_split (const char *path, 00153 const char **dirpath, 00154 const char **base_name, 00155 apr_pool_t *pool); 00156 00157 00158 /** Return non-zero iff @a path is empty ("") or represents the current 00159 * directory -- that is, if prepending it as a component to an existing 00160 * path would result in no meaningful change. 00161 */ 00162 int svn_path_is_empty (const char *path); 00163 00164 00165 /** Return a new path like @a path, but with any trailing separators that don't 00166 * affect @a path's meaning removed. Will convert a "." path to "". Allocate 00167 * the new path in @a pool if anything changed, else just return @a path. 00168 * 00169 * (At some future point, this may make other semantically inoperative 00170 * transformations.) 00171 */ 00172 const char *svn_path_canonicalize (const char *path, apr_pool_t *pool); 00173 00174 00175 /** Return an integer greater than, equal to, or less than 0, according 00176 * as @a path1 is greater than, equal to, or less than @a path2. 00177 */ 00178 int svn_path_compare_paths (const char *path1, const char *path2); 00179 00180 00181 /** Return the longest common path shared by both @a path1 and @a path2. If 00182 * there's no common ancestor, return the empty path. 00183 * 00184 * @a path1 and @a path2 may be URLs. In order for two URLs to have 00185 * a common ancestor, they must (a) have the same protocol (since two URLs 00186 * with the same path but different protocols may point at completely 00187 * different resources), and (b) share a common ancestor in their path 00188 * component, i.e. 'protocol://' is not a sufficient ancestor. 00189 */ 00190 char *svn_path_get_longest_ancestor (const char *path1, 00191 const char *path2, 00192 apr_pool_t *pool); 00193 00194 /** Convert @a relative path to an absolute path and return the results in 00195 * @a *pabsolute, allocated in @a pool. 00196 * 00197 * @a relative may be a URL, in which case no attempt is made to convert it, 00198 * and a copy of the URL is returned. 00199 */ 00200 svn_error_t * 00201 svn_path_get_absolute (const char **pabsolute, 00202 const char *relative, 00203 apr_pool_t *pool); 00204 00205 /** Return the path part of @a path in @a *pdirectory, and the file part in 00206 * @a *pfile. If @a path is a directory, set @a *pdirectory to @a path, and 00207 * @a *pfile to the empty string. If @a path does not exist it is treated 00208 * as if it is a file, since directories do not normally vanish. 00209 */ 00210 svn_error_t * 00211 svn_path_split_if_file(const char *path, 00212 const char **pdirectory, 00213 const char **pfile, 00214 apr_pool_t *pool); 00215 00216 /** Find the common prefix of the paths in @a targets (an array of @a 00217 * const char *'s), and remove redundant paths if @a 00218 * remove_redundancies is true. 00219 * 00220 * - Set @a *pcommon to the absolute path of the path or URL common to 00221 * all of the targets. If the targets have no common prefix, or 00222 * are a mix of URLs and local paths, set @a *pcommon to the 00223 * empty string. 00224 * 00225 * - If @a pcondensed_targets is non-null, set @a *pcondensed_targets 00226 * to an array of targets relative to @a *pcommon, and if 00227 * @a remove_redundancies is true, omit any paths/URLs that are 00228 * descendants of another path/URL in @a targets. If *pcommon 00229 * is empty, @a *pcondensed_targets will contain full URLs and/or 00230 * absolute paths; redundancies can still be removed (from both URLs 00231 * and paths). If @a pcondensed_targets is null, leave it alone. 00232 * 00233 * Else if there is exactly one target, then 00234 * 00235 * - Set @a *pcommon to that target, and 00236 * 00237 * - If @a pcondensed_targets is non-null, set @a *pcondensed_targets 00238 * to an array containing zero elements. Else if 00239 * @a pcondensed_targets is null, leave it alone. 00240 * 00241 * If there are no items in @a targets, set @a *pcommon and (if 00242 * applicable) @a *pcondensed_targets to @c NULL. 00243 * 00244 * NOTE: There is no guarantee that @a *pcommon is within a working 00245 * copy. */ 00246 svn_error_t * 00247 svn_path_condense_targets (const char **pcommon, 00248 apr_array_header_t **pcondensed_targets, 00249 const apr_array_header_t *targets, 00250 svn_boolean_t remove_redundancies, 00251 apr_pool_t *pool); 00252 00253 00254 /** Copy a list of @a targets, one at a time, into @a pcondensed_targets, 00255 * omitting any targets that are found earlier in the list, or whose 00256 * ancestor is found earlier in the list. Ordering of targets in the 00257 * original list is preserved in the condensed list of targets. Use 00258 * @a pool for any allocations. 00259 * 00260 * How does this differ in functionality from @c svn_path_condense_targets? 00261 * 00262 * Here's the short version: 00263 * 00264 * 1. Disclaimer: if you wish to debate the following, talk to Karl. :-) 00265 * Order matters for updates because a multi-arg update is not 00266 * atomic, and CVS users are used to, when doing 'cvs up targetA 00267 * targetB' seeing targetA get updated, then targetB. I think the 00268 * idea is that if you're in a time-sensitive or flaky-network 00269 * situation, a user can say, "I really *need* to update 00270 * wc/A/D/G/tau, but I might as well update my whole working copy if 00271 * I can." So that user will do 'svn up wc/A/D/G/tau wc', and if 00272 * something dies in the middles of the 'wc' update, at least the 00273 * user has 'tau' up-to-date. 00274 * 00275 * 2. Also, we have this notion of an anchor and a target for updates 00276 * (the anchor is where the update editor is rooted, the target is 00277 * the actual thing we want to update). I needed a function that 00278 * would NOT screw with my input paths so that I could tell the 00279 * difference between someone being in A/D and saying 'svn up G' and 00280 * being in A/D/G and saying 'svn up .' -- believe it or not, these 00281 * two things don't mean the same thing. @c svn_path_condense_targets 00282 * plays with absolute paths (which is fine, so does 00283 * @c svn_path_remove_redundancies), but the difference is that it 00284 * actually tweaks those targets to be relative to the "grandfather 00285 * path" common to all the targets. Updates don't require a 00286 * "grandfather path" at all, and even if it did, the whole 00287 * conversion to an absolute path drops the crucial difference 00288 * between saying "i'm in foo, update bar" and "i'm in foo/bar, 00289 * update '.'" 00290 */ 00291 svn_error_t * 00292 svn_path_remove_redundancies (apr_array_header_t **pcondensed_targets, 00293 const apr_array_header_t *targets, 00294 apr_pool_t *pool); 00295 00296 00297 /** Decompose @a path into an array of <tt>const char *</tt> components, 00298 * allocated in @a pool. If @a path is absolute, the first component will 00299 * be a lone dir separator (the root directory). 00300 */ 00301 apr_array_header_t *svn_path_decompose (const char *path, 00302 apr_pool_t *pool); 00303 00304 00305 /** Test that @a name is a single path component, that is: 00306 * - not @c NULL or empty. 00307 * - not a `/'-separated directory path 00308 * - not empty or `..' 00309 */ 00310 svn_boolean_t svn_path_is_single_path_component (const char *name); 00311 00312 00313 /** Test if @a path2 is a child of @a path1. 00314 * If not, return @c NULL. 00315 * If so, return a copy of the remainder path, allocated in @a pool. 00316 * (The remainder is the component which, added to @a path1, yields 00317 * @a path2. The remainder does not begin with a dir separator.) 00318 * 00319 * Both paths must be in canonical form, and must either be absolute, 00320 * or contain no ".." components. 00321 * 00322 * ### todo: the ".." restriction is unfortunate, and would ideally 00323 * be lifted by making the implementation smarter. But this is not 00324 * trivial: if the path is "../foo", how do you know whether or not 00325 * the current directory is named "foo" in its parent? 00326 */ 00327 const char *svn_path_is_child (const char *path1, 00328 const char *path2, 00329 apr_pool_t *pool); 00330 00331 00332 /** URI/URL stuff 00333 * 00334 * @defgroup svn_path_uri_stuff URI/URL stuff 00335 * @{ 00336 */ 00337 00338 /** Return @c TRUE iff @a path looks like a valid URL, @c FALSE otherwise. */ 00339 svn_boolean_t svn_path_is_url (const char *path); 00340 00341 /** Return @c TRUE iff @a path is URI-safe, @c FALSE otherwise. */ 00342 svn_boolean_t svn_path_is_uri_safe (const char *path); 00343 00344 /** Return a URI-encoded copy of @a path, allocated in @a pool. */ 00345 const char *svn_path_uri_encode (const char *path, apr_pool_t *pool); 00346 00347 /** Return a URI-decoded copy of @a path, allocated in @a pool. */ 00348 const char *svn_path_uri_decode (const char *path, apr_pool_t *pool); 00349 00350 /** Extend @a url by a single @a component, URI-encoding that @a component 00351 * before adding it to the @a url. Return the new @a url, allocated in 00352 * @a pool. Notes: if @a component is @c NULL, just return a copy or @a url 00353 * allocated in @a pool; if @a component is already URI-encoded, calling 00354 * code should just use <tt>svn_path_join (url, component, pool)</tt>. @a url 00355 * does not need to be a canonical path, it may have trailing '/'. 00356 */ 00357 const char *svn_path_url_add_component (const char *url, 00358 const char *component, 00359 apr_pool_t *pool); 00360 00361 /** @} */ 00362 00363 /** Charset conversion stuff 00364 * 00365 * @defgroup svn_path_charset_stuff Charset conversion stuff 00366 * @{ 00367 */ 00368 00369 /** Convert @a path_utf8 from UTF-8 to the internal encoding used by APR. */ 00370 svn_error_t *svn_path_cstring_from_utf8 (const char **path_apr, 00371 const char *path_utf8, 00372 apr_pool_t *pool); 00373 00374 /** Convert @a path_apr from the internal encoding used by APR to UTF-8. */ 00375 svn_error_t *svn_path_cstring_to_utf8 (const char **path_utf8, 00376 const char *path_apr, 00377 apr_pool_t *pool); 00378 00379 00380 /** @} */ 00381 00382 #ifdef __cplusplus 00383 } 00384 #endif /* __cplusplus */ 00385 00386 00387 #endif /* SVN_PATH_H */