package files import ( "os" "unicode/utf8" ) func isBinary(content []byte) bool { maybeStr := string(content) runeCnt := utf8.RuneCount(content) runeIndex := 0 gotRuneErrCnt := 0 firstRuneErrIndex := -1 const ( // 8 and below are control chars (e.g. backspace, null, eof, etc) maxControlCharsCode = 8 // 0xFFFD(65533) is the "error" Rune or "Unicode replacement character" // see https://golang.org/pkg/unicode/utf8/#pkg-constants unicodeReplacementChar = 0xFFFD ) for _, b := range maybeStr { if b <= maxControlCharsCode { return true } if b == unicodeReplacementChar { // if it is not the last (utf8.UTFMax - x) rune if runeCnt > utf8.UTFMax && runeIndex < runeCnt-utf8.UTFMax { return true } // else it is the last (utf8.UTFMax - x) rune // there maybe Vxxx, VVxx, VVVx, thus, we may got max 3 0xFFFD rune (assume V is the byte we got) // for Chinese, it can only be Vxx, VVx, we may got max 2 0xFFFD rune gotRuneErrCnt++ // mark the first time if firstRuneErrIndex == -1 { firstRuneErrIndex = runeIndex } } runeIndex++ } // if last (utf8.UTFMax - x ) rune has the "error" Rune, but not all if firstRuneErrIndex != -1 && gotRuneErrCnt != runeCnt-firstRuneErrIndex { return true } return false } func IsNamedPipe(mode os.FileMode) bool { return mode&os.ModeNamedPipe != 0 } func IsSymlink(mode os.FileMode) bool { return mode&os.ModeSymlink != 0 }