【Golang】strings库

幺零九零零

已于 2024-11-22 00:44:41 修改

阅读量991

点赞数 42

分类专栏： Go 文章标签： golang 开发语言后端

于 2024-11-21 23:45:40 首次发布

本文链接：https://blog.youkuaiyun.com/lcadna/article/details/143957740

版权

Go 专栏收录该内容

46 篇文章

订阅专栏

func Count(s, substr string) int

源码及解析

// count计算s中substr的非重叠实例的个数。
//如果substr是空字符串，那么按照逻辑，它应该出现在s的每个字符之间，以及字符串的开头和结尾。因此，返回值应该是字符串s中的字符个数加1。
//，字符串是UTF-8编码的，这意味着字符串中的每个字符可以是1到4个字节，计算字符个数要用utf8.RuneCountInString(s)。
func Count(s, substr string) int {
	// special case
	if len(substr) == 0 {
		return utf8.RuneCountInString(s) + 1
	}
	if len(substr) == 1 {
		return bytealg.CountString(s, substr[0])
	}
	n := 0
	for {
		i := Index(s, substr)
		if i == -1 {
			return n
		}
		n++
		s = s[i+len(substr):]
	}
}

单元测试【count_test.go】单元测试文件名称要以_test.go结尾，函数名称由TestXxx组成（Test也可以）

func TestCount(t *testing.T) {
	// 测试空字符串
	var c int
	if c = strings.Count("", ""); c != 1 {
		println(c)
		t.Errorf("Count of empty string should be 1")
	}
	println(c)
	// 测试子串为空
	if c = strings.Count("111c", ""); c != 5 {
		println(c)
		t.Errorf("Count of empty string should be 1")
	}
	println(c)
	// 测试单个字符的子串
	if c = strings.Count("hello", "l"); c != 2 {
		println(c)
		t.Errorf("Count of 'l' in 'hello' should be 2")
	}
	println(c)

	// 测试不存在的子串
	if c = strings.Count("hello", "z"); c != 0 {
		println(c)
		t.Errorf("Count of non-existent substring should be 0")
	}
	println(c)

	// 测试较长的子串
	if c = strings.Count("hello world", "world"); c != 1 {
		println(c)
		t.Errorf("Count of 'world' in 'hello world' should be 1")
	}
	println(c)
}

前后缀

// 前缀测试 字符串s是否以前缀开头。
func HasPrefix(s, prefix string) bool {
	return len(s) >= len(prefix) && s[0:len(prefix)] == prefix
}

//后缀测试 字符串s是否以后缀结尾。
func HasSuffix(s, suffix string) bool {
	return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
}

const maxInt int = int(^uint(0) >> 1) // 9223372036854775807

maxInt是Go语言中int类型能表示的最大值，它的值在32位系统上是2^31 - 1，在64位系统上是2^63 - 1。

// repeat返回由字符串s的count个副本组成的新字符串。
//如果count为负或者（len(s) * count）的结果 
//溢出。
func Repeat(s string, count int) string {
	switch count {
	case 0:
		return ""
	case 1:
		return s
	}

	// Since we cannot return an error on overflow,
	// we should panic if the repeat will generate an overflow.
	// See golang.org/issue/16237.
	if count < 0 {
		panic("strings: negative Repeat count")
	}
	if len(s) >= maxInt/count {
		panic("strings: Repeat output length overflow")
	}
	n := len(s) * count

func Trim(s, cutset string) string

// trim返回字符串s的切片，其中删除了cutset中包含的所有前导和尾unicode码位。
func Trim(s, cutset string) string {
	if s == "" || cutset == "" {
		return s
	}
	if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
		return trimLeftByte(trimRightByte(s, cutset[0]), cutset[0])
	}
	if as, ok := makeASCIISet(cutset); ok {
		return trimLeftASCII(trimRightASCII(s, &as), &as)
	}
	return trimLeftUnicode(trimRightUnicode(s, cutset), cutset)
}

// trim left返回字符串s的切片，并删除cutset中包含的所有前缀unicode码位。 
//删除前缀，使用trim prefix代替。
func TrimLeft(s, cutset string) string {
	if s == "" || cutset == "" {
		return s
	}
	if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
		return trimLeftByte(s, cutset[0])
	}
	if as, ok := makeASCIISet(cutset); ok {
		return trimLeftASCII(s, &as)
	}
	return trimLeftUnicode(s, cutset)
}


func trimLeftByte(s string, c byte) string {
	for len(s) > 0 && s[0] == c {
		s = s[1:]
	}
	return s
}

//每个 uint32 类型的元素可以存储 32 位二进制数据，这意味着每个元素可以表示 32 个 ASCII 字符的存在状态。由于 ASCII 字符集只包含 128 个字符，因此使用 8 个 uint32 元素足以表示整个 ASCII 字符集。具体来说，asciiSet 数组的每个元素对应于 ASCII 字符集中的 32 个字符。数组的第一个元素对应于 ASCII 字符集中的前 32 个字符（即字符代码从 0 到 31），第二个元素对应于接下来的 32 个字符（即字符代码从 32 到 63），以此类推。例如，如果我们想要表示字符集中包含字符 'a'（其 ASCII 码为 97），我们需要找到 'a' 在 ASCII 字符集中的位置。由于 'a' 是第 97 个字符，它位于第二个 uint32 元素的第 1 位（因为每个元素代表 32 个字符，所以 97 除以 32 的商为 3，余数为 1）。因此，我们需要将第二个 uint32 元素的第 1 位设置为 1。

type asciiSet [8]uint32

// makeASCIISet creates a set of ASCII characters and reports whether all characters in chars are ASCII.
func makeASCIISet(chars string) (as asciiSet, ok bool) {
	for i := 0; i < len(chars); i++ {
		c := chars[i]
		if c >= utf8.RuneSelf {
			return as, false
		}
		//对应的ASCII位图位置置1
		as[c/32] |= 1 << (c % 32)
	}
	return as, true
}


func trimLeftASCII(s string, as *asciiSet) string {
	for len(s) > 0 {
		if !as.contains(s[0]) {
			break
		}
		s = s[1:]
	}
	return s
}

func trimLeftUnicode(s, cutset string) string {
	for len(s) > 0 {
		r, n := rune(s[0]), 1
		if r >= utf8.RuneSelf {
			r, n = utf8.DecodeRuneInString(s)
		}
		if !ContainsRune(cutset, r) {
			break
		}
		s = s[n:]
	}
	return s
}

持续更新...