本地化字符串处理与HTML字符串截断-优快云博客

本文链接：https://blog.youkuaiyun.com/magic_dreamer/article/details/83525472

本文介绍了一个用于处理本地化字符串的方法，能够根据不同地区设置返回相应的字符串，并实现了一个用于截断HTML字符串的实用工具，确保字符串长度适中且保留HTML标签的完整性。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

/**
* 取得本地化的字符串。
*/
public static String localize(String str) {
return localize(str, null);
}

/**
* 取得制定locale的字符串
*
* @param str
* @param locale
* @return
*/
public static String localize(String str, Locale locale) {
if (str == null) {
return null;
}

return new LocalizeTokenizer(str, locale).parse();
}

private static class LocalizeTokenizer {
private static final char BLOCK_START = '{';
private static final char BLOCK_END = '}';
private static final char LOCALE_START = '[';
private static final char LOCALE_END = ']';
private String str;
private Locale locale;
private int blockStartIndex;
private int blockEndIndex;
private int localeStartIndex;
private int localeEndIndex;
private Locale currentLocale;

public LocalizeTokenizer(String str, Locale locale) {
this.str = str;
this.locale = locale;
}

public String parse() {
StringBuffer result = new StringBuffer(str.length());
String block;
int index = 0;

while ((block = findBlock(index)) != null) {
// findBlock返回非null时，blockStartIndex和blockEndIndex同时被设置。
result.append(str.substring(index, blockStartIndex));
result.append(block);

index = blockEndIndex + 1;
}

result.append(str.substring(index));

return result.toString();
}

// 查找"{"和"}"之间的block。
private String findBlock(int index) {
Map blocks = new HashMap(4);

blockEndIndex = index;

for (;;) {
blockStartIndex = str.indexOf(BLOCK_START, index);

if ((blockStartIndex != -1) && (blockEndIndex != -1)) {
blockEndIndex = str.indexOf(BLOCK_END, blockStartIndex + 1);
index = blockStartIndex + 1; // 下次循环从blockStartIndex之后开始

if (blockEndIndex != -1) {
// 扫描block中的localized block，在(blockStartIndex,
// blockEndIndex)范围内。
String localizedBlock;
Locale lastLocale = null;
int localizedIndex = blockStartIndex + 1;

while ((localizedBlock = findLocalizedBlock(localizedIndex)) != null) {
// findLocalizedBlock返回非null时，currentLocale,
// localeStartIndex和localeEndIndex同时被设置。
blocks.put(ObjectUtil.toString(lastLocale),
localizedBlock);
lastLocale = currentLocale;
localizedIndex = localeEndIndex + 1;
}

break;
}
} else {
break;
}
}

// 选择locale。
if (!blocks.isEmpty()) {
if (blocks.size() == 1) {
String key = (String) blocks.keySet().iterator().next();

if (StringUtil.isEmpty(key)) {
return "" + BLOCK_START + blocks.get(key) + BLOCK_END;
}
return (String) blocks.get(key);
}
List locales = LocaleUtil.calculateBundleNames("",
(locale == null) ? LocaleUtil.getContext().getLocale()
: locale);

for (int i = locales.size() - 1; i >= 0; i--) {
String localizedBlock = (String) blocks.get(locales.get(i));

if (localizedBlock != null) {
return localizedBlock;
}
}
}

return null;
}

// 查找blockStartIndex和blockEndIndex之间的localized block。
private String findLocalizedBlock(int localizedIndex) {
int startIndex = localizedIndex;
boolean eof = false;

if (startIndex >= blockEndIndex) {
return null;
}

currentLocale = null;
localeEndIndex = localizedIndex;

for (;;) {
localeStartIndex = str.indexOf(LOCALE_START, localizedIndex);

if ((localeStartIndex >= blockStartIndex)
&& (localeStartIndex < blockEndIndex)
&& (localeEndIndex >= blockStartIndex)
&& (localeEndIndex < blockEndIndex)) {
localeEndIndex = str.indexOf(LOCALE_END,
localeStartIndex + 1);
localizedIndex = localeStartIndex + 1; // 下次循环从localeStartIndex之后开始

if ((localeEndIndex >= blockStartIndex)
&& (localeEndIndex < blockEndIndex)) {
String localeName = str.substring(localeStartIndex + 1,
localeEndIndex);

if (StringUtil.isNotEmpty(localeName)) {
currentLocale = LocaleUtil.parseLocale(localeName);

if (LocaleUtil.isLocaleSupported(currentLocale)) {
return str.substring(startIndex,
localeStartIndex);
}
currentLocale = null;
}
}
} else {
eof = true;
break;
}
}

if (eof) {
localeStartIndex = startIndex;
localeEndIndex = blockEndIndex;
return str.substring(startIndex, blockEndIndex);
}
return null;
}
}

/**
* Trim a HTML string to <code>maxSize</code>.
*
* <p>
* The string may contain HTML tags. After conversion, all HTML tags still
* remain in the string, however, if the rest part of the string is longer
* than <code>maxSize</code>, it will be trimed and to
* <code>maxSize</code>, and has an ellipsis <code>...</code> appended
* to it.
*
* <p>
* Single byte characters are counted as half length of double byte
* characters.
*
* <p>
* For the sake of simplicity and performance, we don't use HTML parser,
* instead we use a naive method to deal with HTML tags.
*
* @param string -
* the string to be trimmed.
* @param maxLength -
* max length of the string.
*
* @return - a trimmed string
*/
public static String trimHTMLString(String string, int maxLength) {
if (string == null) {
return "";
} else if (string.length() <= maxLength) {
return string;
} else {
StringBuffer result = new StringBuffer();

int maxWeight = 2 * maxLength;
int weight = 0;

int totalLength = string.length();
int index = 0;
int leftTagCount = 0;

while ((index < totalLength)
&& ((weight <= maxWeight) || (leftTagCount > 0))) {
char c = string.charAt(index);

index++;

if (c == '<') {
// eat all tags
result.append(c);

if (index < totalLength) {
c = string.charAt(index);
index++;
result.append(c);

if (c == '/') {
if (leftTagCount > 0) {
leftTagCount--;
}
} else {
leftTagCount++;
}
}

while ((c != '>') && (index < totalLength)) {
c = string.charAt(index);
index++;
result.append(c);
}
} else {
if (weight <= maxWeight) {
if (weight == maxWeight) {
result.append("...");
weight += 3;
} else {
if (c == '&') {
// peek to see if it is a html entity
int peekIndex = string.indexOf(';', index);

if (peekIndex > 0) {
int entityValue = Entities.HTML40
.getEntityValue(string.substring(
index, peekIndex));

if (entityValue > 0) {
// count as one;
result.append(string.substring(
index - 1, peekIndex + 1));
index = peekIndex + 1;

if (entityValue < 256) {
weight += 1;
} else {
weight += 2;
}

if (weight > maxWeight) {
result.append("...");
weight += 3;
}

continue;
}
}
}

// ordinary characters
result.append(c);

if (c < 256) {
weight += 1;
} else {
weight += 2;
}

if (weight > maxWeight) {
result.append("...");
weight += 3;
}
}
}
}
}

return result.toString();
}
}

/**
* 转换字符串为int
*
* @param s
* @param def
* @return
*/
public static int getInt(String s, int def) {
int i = def;
try {
i = Integer.parseInt(s);
} catch (NumberFormatException e) {
// ignore
}
return i;
}

/**
* 转换字符串为int
*
* @param s
* @return
*/
public static int getInt(String s) {
return getInt(s, 0);
}

HTML过滤和补齐（四）