Try multiple encodings when working with string blobs (issue-97)
| | |
| | | # SINCE 0.5.0
|
| | | web.repositoriesMessage = gitblit
|
| | |
|
| | | # Ordered list of charsets/encodings to use when trying to display a blob.
|
| | | # If empty, UTF-8 and ISO-8859-1 are used. The server's default charset
|
| | | # is always appended to the encoding list. If all encodings fail to cleanly
|
| | | # decode the blob content, UTF-8 will be used with the standard malformed
|
| | | # input/unmappable character replacement strings.
|
| | | # |
| | | # SPACE-DELIMITED
|
| | | # SINCE 1.0.0
|
| | | web.blobEncodings = UTF-8 ISO-8859-1
|
| | |
|
| | | # Manually set the default timezone to be used by Gitblit for display in the
|
| | | # web ui. This value is independent of the JVM timezone. Specifying a blank
|
| | | # value will default to the JVM timezone.
|
| | |
| | | # e.g.
|
| | | # web.otherUrls = ssh://localhost/git/{0} git://localhost/git/{0}
|
| | | #
|
| | | # SPACE-DELIMITED
|
| | | # SINCE 0.5.0
|
| | | web.otherUrls =
|
| | |
|
| | |
| | |
|
| | | #### additions
|
| | |
|
| | | - Added setting to control charsets for blob string decoding. Default encodings are UTF-8, ISO-8859-1, and server's default charset. (issue 97) |
| | | **New:** *web.blobEncodings = UTF-8 ISO-8859-1* |
| | | - Exposed JGit's internal configuration settings in gitblit.properties/web.xml (issue 93)
|
| | | **New:** *git.packedGitWindowSize = 8k*
|
| | | **New:** *git.packedGitLimit = 10m*
|
| | |
| | | return self().timezone;
|
| | | }
|
| | |
|
| | | /**
|
| | | * Returns the user-defined blob encodings.
|
| | | * |
| | | * @return an array of encodings, may be empty
|
| | | */
|
| | | public static String [] getEncodings() {
|
| | | return getStrings(Keys.web.blobEncodings).toArray(new String[0]);
|
| | | }
|
| | | |
| | |
|
| | | /**
|
| | | * Returns the boolean value for the specified key. If the key does not
|
| | |
| | | String branch, RevCommit commit) {
|
| | | IndexResult result = new IndexResult();
|
| | | try {
|
| | | String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
|
| | | List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
|
| | | String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
|
| | | Resolution.MINUTE);
|
| | |
| | | if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
|
| | | // read the blob content
|
| | | String str = JGitUtils.getStringContent(repository, commit.getTree(),
|
| | | path.path);
|
| | | path.path, encodings);
|
| | | doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
|
| | | writer.addDocument(doc);
|
| | | }
|
| | |
| | | }
|
| | | response.setDateHeader("Last-Modified", JGitUtils.getCommitDate(commit).getTime());
|
| | |
|
| | | String [] encodings = GitBlit.getEncodings();
|
| | |
|
| | | RevTree tree = commit.getTree();
|
| | | byte[] content = null;
|
| | | if (StringUtils.isEmpty(resource)) {
|
| | | // find resource
|
| | | String[] files = { "index.html", "index.htm", "index.mkd" };
|
| | | for (String file : files) {
|
| | | content = JGitUtils.getStringContent(r, tree, file)
|
| | | content = JGitUtils.getStringContent(r, tree, file, encodings)
|
| | | .getBytes(Constants.ENCODING);
|
| | | if (content != null) {
|
| | | resource = file;
|
| | |
| | | contentType = "text/plain";
|
| | | }
|
| | | if (contentType.startsWith("text")) {
|
| | | content = JGitUtils.getStringContent(r, tree, resource).getBytes(
|
| | | content = JGitUtils.getStringContent(r, tree, resource, encodings).getBytes(
|
| | | Constants.ENCODING);
|
| | | } else {
|
| | | content = JGitUtils.getByteContent(r, tree, resource);
|
| | |
| | |
|
| | | // no content, try custom 404 page
|
| | | if (ArrayUtils.isEmpty(content)) {
|
| | | String custom404 = JGitUtils.getStringContent(r, tree, "404.html");
|
| | | String custom404 = JGitUtils.getStringContent(r, tree, "404.html", encodings);
|
| | | if (!StringUtils.isEmpty(custom404)) {
|
| | | content = custom404.getBytes(Constants.ENCODING);
|
| | | }
|
| | |
| | | import java.io.IOException;
|
| | | import java.io.InputStream;
|
| | | import java.io.OutputStream;
|
| | | import java.nio.charset.Charset;
|
| | | import java.text.MessageFormat;
|
| | | import java.util.ArrayList;
|
| | | import java.util.Arrays;
|
| | |
| | | * @param tree
|
| | | * if null, the RevTree from HEAD is assumed.
|
| | | * @param blobPath
|
| | | * @param charsets optional
|
| | | * @return UTF-8 string content
|
| | | */
|
| | | public static String getStringContent(Repository repository, RevTree tree, String blobPath) {
|
| | | public static String getStringContent(Repository repository, RevTree tree, String blobPath, String... charsets) {
|
| | | byte[] content = getByteContent(repository, tree, blobPath);
|
| | | if (content == null) {
|
| | | return null;
|
| | | }
|
| | | return new String(content, Charset.forName(Constants.CHARACTER_ENCODING));
|
| | | return StringUtils.decodeString(content, charsets);
|
| | | }
|
| | |
|
| | | /**
|
| | |
| | | *
|
| | | * @param repository
|
| | | * @param objectId
|
| | | * @param charsets optional
|
| | | * @return UTF-8 string content
|
| | | */
|
| | | public static String getStringContent(Repository repository, String objectId) {
|
| | | public static String getStringContent(Repository repository, String objectId, String... charsets) {
|
| | | byte[] content = getByteContent(repository, objectId);
|
| | | if (content == null) {
|
| | | return null;
|
| | | }
|
| | | return new String(content, Charset.forName(Constants.CHARACTER_ENCODING));
|
| | | return StringUtils.decodeString(content, charsets);
|
| | | }
|
| | |
|
| | | /**
|
| | |
| | | package com.gitblit.utils;
|
| | |
|
| | | import java.io.UnsupportedEncodingException;
|
| | | import java.nio.ByteBuffer;
|
| | | import java.nio.CharBuffer;
|
| | | import java.nio.charset.CharacterCodingException;
|
| | | import java.nio.charset.Charset;
|
| | | import java.nio.charset.CharsetDecoder;
|
| | | import java.nio.charset.IllegalCharsetNameException;
|
| | | import java.nio.charset.UnsupportedCharsetException;
|
| | | import java.security.MessageDigest;
|
| | | import java.security.NoSuchAlgorithmException;
|
| | | import java.util.ArrayList;
|
| | | import java.util.Arrays;
|
| | | import java.util.Collection;
|
| | | import java.util.Collections;
|
| | | import java.util.Comparator;
|
| | | import java.util.LinkedHashSet;
|
| | | import java.util.List;
|
| | | import java.util.Set;
|
| | | import java.util.regex.PatternSyntaxException;
|
| | |
|
| | | /**
|
| | |
| | | // remember to append any characters to the right of a match
|
| | | return sb.toString();
|
| | | }
|
| | | |
| | | /**
|
| | | * Decodes a string by trying several charsets until one does not throw a
|
| | | * coding exception. Last resort is to interpret as UTF-8 with illegal
|
| | | * character substitution.
|
| | | * |
| | | * @param content
|
| | | * @param charsets optional
|
| | | * @return a string
|
| | | */
|
| | | public static String decodeString(byte [] content, String... charsets) {
|
| | | Set<String> sets = new LinkedHashSet<String>();
|
| | | if (!ArrayUtils.isEmpty(charsets)) {
|
| | | sets.addAll(Arrays.asList(charsets));
|
| | | }
|
| | | sets.addAll(Arrays.asList("UTF-8", "ISO-8859-1", Charset.defaultCharset().name()));
|
| | | for (String charset : sets) {
|
| | | try {
|
| | | Charset cs = Charset.forName(charset);
|
| | | CharsetDecoder decoder = cs.newDecoder();
|
| | | CharBuffer buffer = decoder.decode(ByteBuffer.wrap(content));
|
| | | return buffer.toString();
|
| | | } catch (CharacterCodingException e) {
|
| | | // ignore and advance to the next charset
|
| | | } catch (IllegalCharsetNameException e) {
|
| | | // ignore illegal charset names
|
| | | } catch (UnsupportedCharsetException e) {
|
| | | // ignore unsupported charsets
|
| | | }
|
| | | }
|
| | | return new String(content, Charset.forName("UTF-8"));
|
| | | }
|
| | | } |
| | |
| | |
|
| | | Repository r = getRepository();
|
| | | final String blobPath = WicketUtils.getPath(params);
|
| | | String [] encodings = GitBlit.getEncodings();
|
| | |
|
| | | if (StringUtils.isEmpty(blobPath)) {
|
| | | // blob by objectid
|
| | |
| | | add(new BookmarkablePageLink<Void>("headLink", BlobPage.class).setEnabled(false));
|
| | | add(new CommitHeaderPanel("commitHeader", objectId));
|
| | | add(new PathBreadcrumbsPanel("breadcrumbs", repositoryName, blobPath, objectId));
|
| | | Component c = new Label("blobText", JGitUtils.getStringContent(r, objectId));
|
| | | Component c = new Label("blobText", JGitUtils.getStringContent(r, objectId, encodings));
|
| | | WicketUtils.setCssClass(c, "plainprint");
|
| | | add(c);
|
| | | } else {
|
| | |
| | | case 1:
|
| | | // PrettyPrint blob text
|
| | | c = new Label("blobText", JGitUtils.getStringContent(r, commit.getTree(),
|
| | | blobPath));
|
| | | blobPath, encodings));
|
| | | WicketUtils.setCssClass(c, "prettyprint linenums");
|
| | | break;
|
| | | case 2:
|
| | |
| | | default:
|
| | | // plain text
|
| | | c = new Label("blobText", JGitUtils.getStringContent(r, commit.getTree(),
|
| | | blobPath));
|
| | | blobPath, encodings));
|
| | | WicketUtils.setCssClass(c, "plainprint");
|
| | | }
|
| | | add(c);
|
| | | } else {
|
| | | // plain text
|
| | | Label blobLabel = new Label("blobText", JGitUtils.getStringContent(r,
|
| | | commit.getTree(), blobPath));
|
| | | commit.getTree(), blobPath, encodings));
|
| | | WicketUtils.setCssClass(blobLabel, "plainprint");
|
| | | add(blobLabel);
|
| | | }
|
| | |
| | | import org.eclipse.jgit.lib.Repository;
|
| | | import org.eclipse.jgit.revwalk.RevCommit;
|
| | |
|
| | | import com.gitblit.GitBlit;
|
| | | import com.gitblit.utils.JGitUtils;
|
| | | import com.gitblit.utils.MarkdownUtils;
|
| | | import com.gitblit.wicket.WicketUtils;
|
| | |
| | |
|
| | | Repository r = getRepository();
|
| | | RevCommit commit = JGitUtils.getCommit(r, objectId);
|
| | | String [] encodings = GitBlit.getEncodings();
|
| | |
|
| | | // markdown page links
|
| | | add(new BookmarkablePageLink<Void>("blameLink", BlamePage.class,
|
| | |
| | | WicketUtils.newPathParameter(repositoryName, Constants.HEAD, markdownPath)));
|
| | |
|
| | | // Read raw markdown content and transform it to html
|
| | | String markdownText = JGitUtils.getStringContent(r, commit.getTree(), markdownPath);
|
| | | String markdownText = JGitUtils.getStringContent(r, commit.getTree(), markdownPath, encodings);
|
| | | String htmlText;
|
| | | try {
|
| | | htmlText = MarkdownUtils.transformMarkdown(markdownText);
|
| | |
| | | final String repositoryName = WicketUtils.getRepositoryName(params);
|
| | | final String objectId = WicketUtils.getObject(params);
|
| | | final String blobPath = WicketUtils.getPath(params);
|
| | | String [] encodings = GitBlit.getEncodings();
|
| | |
|
| | | Repository r = GitBlit.self().getRepository(repositoryName);
|
| | | if (r == null) {
|
| | |
| | |
|
| | | if (StringUtils.isEmpty(blobPath)) {
|
| | | // objectid referenced raw view
|
| | | Label blobLabel = new Label("rawText", JGitUtils.getStringContent(r, objectId));
|
| | | Label blobLabel = new Label("rawText", JGitUtils.getStringContent(r, objectId, encodings));
|
| | | WicketUtils.setCssClass(blobLabel, "plainprint");
|
| | | add(blobLabel);
|
| | | } else {
|
| | |
| | | default:
|
| | | // plain text
|
| | | c = new Label("rawText", JGitUtils.getStringContent(r, commit.getTree(),
|
| | | blobPath));
|
| | | blobPath, encodings));
|
| | | WicketUtils.setCssClass(c, "plainprint");
|
| | | }
|
| | | add(c);
|
| | | } else {
|
| | | // plain text
|
| | | Label blobLabel = new Label("rawText", JGitUtils.getStringContent(r,
|
| | | commit.getTree(), blobPath));
|
| | | commit.getTree(), blobPath, encodings));
|
| | | WicketUtils.setCssClass(blobLabel, "plainprint");
|
| | | add(blobLabel);
|
| | | }
|
| | |
| | | }
|
| | | }
|
| | | if (!StringUtils.isEmpty(readme)) {
|
| | | String markdownText = JGitUtils.getStringContent(r, head.getTree(), readme);
|
| | | String [] encodings = GitBlit.getEncodings();
|
| | | String markdownText = JGitUtils.getStringContent(r, head.getTree(), readme, encodings);
|
| | | htmlText = MarkdownUtils.transformMarkdown(markdownText);
|
| | | }
|
| | | } catch (ParseException p) {
|
| | |
| | | import org.eclipse.jgit.lib.Repository;
|
| | | import org.eclipse.jgit.lib.RepositoryCache.FileKey;
|
| | | import org.eclipse.jgit.revwalk.RevCommit;
|
| | | import org.eclipse.jgit.revwalk.RevTree;
|
| | | import org.eclipse.jgit.util.FS;
|
| | | import org.eclipse.jgit.util.FileUtils;
|
| | | import org.junit.Test;
|
| | |
| | | @Test
|
| | | public void testStringContent() throws Exception {
|
| | | Repository repository = GitBlitSuite.getHelloworldRepository();
|
| | | String contentA = JGitUtils.getStringContent(repository, null, "java.java");
|
| | | String contentA = JGitUtils.getStringContent(repository, (RevTree) null, "java.java");
|
| | | RevCommit commit = JGitUtils.getCommit(repository, Constants.HEAD);
|
| | | String contentB = JGitUtils.getStringContent(repository, commit.getTree(), "java.java");
|
| | | String contentC = JGitUtils.getStringContent(repository, commit.getTree(), "missing.txt");
|