go example 18: Strings and Runes_go 语言中strings and runes-优快云博客

golang的字符串是只读的字节数组切片。go语言和标准库把字符串当成按照UTF8文本编码的容器。其他语言字符串理解成字符“characters”。在go中字符的概念是“rune ”- 它是一个表示 Unicode 码的整数。

package main

import (
	"fmt"
	"unicode/utf8"
)

func main() {
	// s是字符串，被赋值了泰语的hello。
	const s = "สวัสดี"

	// 因为字符串是字节数组[]byte，因此有长度字段
	fmt.Println("Len:", len(s))
	// print
	// Len: 18

	// 遍历字符数组中的每个字符，生成十六进制。
	for i := 0; i < len(s); i++ {
		fmt.Printf("%x ", s[i])
	}
	fmt.Println()
	// print
	// e0 b8 aa e0 b8 a7 e0 b8 b1 e0 b8 aa e0 b8 94 e0 b8 b5

	// 计算有多少字符“runes”。RuneCountInString会按照顺序解码每个UTF8的字符rune。
	// 一些泰语字符需要用多个UTF8码表示，所以得到长度结果很奇怪。
	fmt.Println("Rune count:", utf8.RuneCountInString(s))
	// print
	// Rune count: 6

	// range处理字符串，解码每个rune，可以自动处理好每个偏移量。%#U会输出Unicode码以及字面量
	for idx, runeValue := range s {
		fmt.Printf("%#U starts at %d\n", runeValue, idx)
	}
	// print
	// U+0E2A 'ส' starts at 0
	// U+0E27 'ว' starts at 3
	// U+0E31 'ั' starts at 6
	// U+0E2A 'ส' starts at 9
	// U+0E14 'ด' starts at 12
	// U+0E35 'ี' starts at 15

	// 可以用DecodeRuneInString显示的遍历
	fmt.Println("\nUsing DecodeRuneInString")
	for i, w := 0, 0; i < len(s); i += w {
		runeValue, width := utf8.DecodeRuneInString(s[i:])
		fmt.Printf("%#U starts at %d\n", runeValue, i)
		w = width

		// 传递rune类型作为参数
		examineRune(runeValue)
		// print
		// U+0E2A 'ส' starts at 05
		// found so sua
		// U+0E27 'ว' starts at 3
		// U+0E31 'ั' starts at 6
		// U+0E2A 'ส' starts at 9
		// found so sua
		// U+0E14 'ด' starts at 12
		// U+0E35 'ี' starts at 15
	}

}

// 用单引号包住的值是rune字面值，可以用rune 值和字面值进行比较。
func examineRune(r rune) {
	if r == 't' {
		fmt.Println("found tee")
	} else if r == 'ส' {
		fmt.Println("found so sua")
	}
}