From 13a3f5bc3e2d25fc76850f86070dc34efe60d77a Mon Sep 17 00:00:00 2001 From: James Moger <james.moger@gitblit.com> Date: Fri, 07 Sep 2012 22:06:15 -0400 Subject: [PATCH] Draft project pages, project metadata, and RSS feeds --- src/com/gitblit/utils/StringUtils.java | 78 ++++++++++++++++++++++++++++++++++++++- 1 files changed, 76 insertions(+), 2 deletions(-) diff --git a/src/com/gitblit/utils/StringUtils.java b/src/com/gitblit/utils/StringUtils.java index 2c35724..08fd497 100644 --- a/src/com/gitblit/utils/StringUtils.java +++ b/src/com/gitblit/utils/StringUtils.java @@ -16,13 +16,25 @@ package com.gitblit.utils; import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.UnsupportedCharsetException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.LinkedHashSet; import java.util.List; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; /** @@ -66,7 +78,7 @@ * @return plain text escaped for html */ public static String escapeForHtml(String inStr, boolean changeSpace) { - StringBuffer retStr = new StringBuffer(); + StringBuilder retStr = new StringBuilder(); int i = 0; while (i < inStr.length()) { if (inStr.charAt(i) == '&') { @@ -107,7 +119,7 @@ * @return properly escaped url */ public static String encodeURL(String inStr) { - StringBuffer retStr = new StringBuffer(); + StringBuilder retStr = new StringBuilder(); int i = 0; while (i < inStr.length()) { if (inStr.charAt(i) == '/') { @@ -550,4 +562,66 @@ // remember to append any characters to the right of a match return sb.toString(); } + + /** + * Decodes a string by trying several charsets until one does not throw a + * coding exception. Last resort is to interpret as UTF-8 with illegal + * character substitution. + * + * @param content + * @param charsets optional + * @return a string + */ + public static String decodeString(byte [] content, String... charsets) { + Set<String> sets = new LinkedHashSet<String>(); + if (!ArrayUtils.isEmpty(charsets)) { + sets.addAll(Arrays.asList(charsets)); + } + String value = null; + sets.addAll(Arrays.asList("UTF-8", "ISO-8859-1", Charset.defaultCharset().name())); + for (String charset : sets) { + try { + Charset cs = Charset.forName(charset); + CharsetDecoder decoder = cs.newDecoder(); + CharBuffer buffer = decoder.decode(ByteBuffer.wrap(content)); + value = buffer.toString(); + break; + } catch (CharacterCodingException e) { + // ignore and advance to the next charset + } catch (IllegalCharsetNameException e) { + // ignore illegal charset names + } catch (UnsupportedCharsetException e) { + // ignore unsupported charsets + } + } + if (value.startsWith("\uFEFF")) { + // strip UTF-8 BOM + return value.substring(1); + } + return value; + } + + /** + * Attempt to extract a repository name from a given url using regular + * expressions. If no match is made, then return whatever trails after + * the final / character. + * + * @param regexUrls + * @return a repository path + */ + public static String extractRepositoryPath(String url, String... urlpatterns) { + for (String urlPattern : urlpatterns) { + Pattern p = Pattern.compile(urlPattern); + Matcher m = p.matcher(url); + while (m.find()) { + String repositoryPath = m.group(1); + return repositoryPath; + } + } + // last resort + if (url.lastIndexOf('/') > -1) { + return url.substring(url.lastIndexOf('/') + 1); + } + return url; + } } \ No newline at end of file -- Gitblit v1.9.1