From 0adceb4b64dfe0dd509da33c6d733a47fbf803a2 Mon Sep 17 00:00:00 2001 From: James Moger <james.moger@gitblit.com> Date: Wed, 01 Aug 2012 21:21:32 -0400 Subject: [PATCH] Regex exclusions for repository search (issue 103) --- src/com/gitblit/utils/StringUtils.java | 157 +++++++++++++++++++++++++++++++++++++++++++-------- 1 files changed, 131 insertions(+), 26 deletions(-) diff --git a/src/com/gitblit/utils/StringUtils.java b/src/com/gitblit/utils/StringUtils.java index a51c157..baed5f0 100644 --- a/src/com/gitblit/utils/StringUtils.java +++ b/src/com/gitblit/utils/StringUtils.java @@ -16,12 +16,23 @@ package com.gitblit.utils; import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.UnsupportedCharsetException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.LinkedHashSet; import java.util.List; +import java.util.Set; import java.util.regex.PatternSyntaxException; /** @@ -127,7 +138,7 @@ * @param values * @return flattened list */ - public static String flattenStrings(List<String> values) { + public static String flattenStrings(Collection<String> values) { return flattenStrings(values, " "); } @@ -139,7 +150,7 @@ * @param separator * @return flattened list */ - public static String flattenStrings(List<String> values, String separator) { + public static String flattenStrings(Collection<String> values, String separator) { StringBuilder sb = new StringBuilder(); for (String value : values) { sb.append(value).append(separator); @@ -165,16 +176,6 @@ return value; } return value.substring(0, max - 3) + "..."; - } - - /** - * Returns a trimmed shortlog message. - * - * @param string - * @return trimmed shortlog message - */ - public static String trimShortLog(String string) { - return trimString(string, 60); } /** @@ -336,20 +337,24 @@ * @return list of strings */ public static List<String> getStringsFromValue(String value, String separator) { - List<String> strings = new ArrayList<String>(); - try { - String[] chunks = value.split(separator); - for (String chunk : chunks) { - chunk = chunk.trim(); - if (chunk.length() > 0) { - strings.add(chunk); - } - } - } catch (PatternSyntaxException e) { - throw new RuntimeException(e); - } - return strings; - } + List<String> strings = new ArrayList<String>(); + try { + String[] chunks = value.split(separator + "(?=([^\"]*\"[^\"]*\")*[^\"]*$)"); + for (String chunk : chunks) { + chunk = chunk.trim(); + if (chunk.length() > 0) { + if (chunk.charAt(0) == '"' && chunk.charAt(chunk.length() - 1) == '"') { + // strip double quotes + chunk = chunk.substring(1, chunk.length() - 1).trim(); + } + strings.add(chunk); + } + } + } catch (PatternSyntaxException e) { + throw new RuntimeException(e); + } + return strings; + } /** * Validates that a name is composed of letters, digits, or limited other @@ -487,4 +492,104 @@ String bs = Integer.toHexString((int) (b * 256)); return "#" + rs + gs + bs; } + + /** + * Strips a trailing ".git" from the value. + * + * @param value + * @return a stripped value or the original value if .git is not found + */ + public static String stripDotGit(String value) { + if (value.toLowerCase().endsWith(".git")) { + return value.substring(0, value.length() - 4); + } + return value; + } + + /** + * Count the number of lines in a string. + * + * @param value + * @return the line count + */ + public static int countLines(String value) { + if (isEmpty(value)) { + return 0; + } + return value.split("\n").length; + } + + /** + * Returns the file extension of a path. + * + * @param path + * @return a blank string or a file extension + */ + public static String getFileExtension(String path) { + int lastDot = path.lastIndexOf('.'); + if (lastDot > -1) { + return path.substring(lastDot + 1); + } + return ""; + } + + /** + * Replace all occurences of a substring within a string with + * another string. + * + * From Spring StringUtils. + * + * @param inString String to examine + * @param oldPattern String to replace + * @param newPattern String to insert + * @return a String with the replacements + */ + public static String replace(String inString, String oldPattern, String newPattern) { + StringBuilder sb = new StringBuilder(); + int pos = 0; // our position in the old string + int index = inString.indexOf(oldPattern); + // the index of an occurrence we've found, or -1 + int patLen = oldPattern.length(); + while (index >= 0) { + sb.append(inString.substring(pos, index)); + sb.append(newPattern); + pos = index + patLen; + index = inString.indexOf(oldPattern, pos); + } + sb.append(inString.substring(pos)); + // remember to append any characters to the right of a match + return sb.toString(); + } + + /** + * Decodes a string by trying several charsets until one does not throw a + * coding exception. Last resort is to interpret as UTF-8 with illegal + * character substitution. + * + * @param content + * @param charsets optional + * @return a string + */ + public static String decodeString(byte [] content, String... charsets) { + Set<String> sets = new LinkedHashSet<String>(); + if (!ArrayUtils.isEmpty(charsets)) { + sets.addAll(Arrays.asList(charsets)); + } + sets.addAll(Arrays.asList("UTF-8", "ISO-8859-1", Charset.defaultCharset().name())); + for (String charset : sets) { + try { + Charset cs = Charset.forName(charset); + CharsetDecoder decoder = cs.newDecoder(); + CharBuffer buffer = decoder.decode(ByteBuffer.wrap(content)); + return buffer.toString(); + } catch (CharacterCodingException e) { + // ignore and advance to the next charset + } catch (IllegalCharsetNameException e) { + // ignore illegal charset names + } catch (UnsupportedCharsetException e) { + // ignore unsupported charsets + } + } + return new String(content, Charset.forName("UTF-8")); + } } \ No newline at end of file -- Gitblit v1.9.1