# PowerShell 脚本:检查文件编码是否为 UTF-8 # 用法: .\check_encoding.ps1 [文件路径或目录路径] # 输出编码: UTF-8 param( [Parameter(Mandatory=$false)] [string]$Path = "." ) # 设置控制台和输出编码为 UTF-8,修复中文乱码 if ([Console]::OutputEncoding.CodePage -ne 65001) { chcp 65001 | Out-Null } [Console]::OutputEncoding = [System.Text.Encoding]::UTF8 $PSDefaultParameterValues['*:Encoding'] = 'utf8' $OutputEncoding = [System.Text.Encoding]::UTF8 # 定义代码文件扩展名列表 $codeFileExtensions = @( # 源代码文件 '.cs', '.js', '.ts', '.jsx', '.tsx', '.java', '.py', '.cpp', '.c', '.h', '.hpp', '.go', '.rs', '.rb', '.php', '.swift', '.kt', '.scala', '.vb', '.fs', '.dart', # 配置文件/标记文件 '.json', '.xml', '.html', '.htm', '.css', '.scss', '.sass', '.less', '.yaml', '.yml', '.toml', '.ini', '.config', '.properties', '.conf', # 脚本文件 '.ps1', '.psm1', '.psd1', '.sh', '.bash', '.zsh', '.fish', '.bat', '.cmd', # 数据/标记文件 '.md', '.txt', '.log', '.csv', '.sql', '.r', '.m', '.mm', # 其他文本格式 '.vue', '.svelte', '.tsx', '.jsx', '.dts', '.map' ) function Test-IsCodeFile { param([string]$FilePath) $extension = [System.IO.Path]::GetExtension($FilePath).ToLower() return $codeFileExtensions -contains $extension } function Test-IsUtf8 { param([string]$FilePath) try { # 读取文件的前几个字节检查 BOM $bytes = [System.IO.File]::ReadAllBytes($FilePath) # 检查 UTF-8 BOM (EF BB BF) if ($bytes.Length -ge 3 -and $bytes[0] -eq 0xEF -and $bytes[1] -eq 0xBB -and $bytes[2] -eq 0xBF) { return @{ IsUtf8 = $true; HasBom = $true; Encoding = "UTF-8 with BOM" } } # 尝试用 UTF-8 解码整个文件 $content = [System.IO.File]::ReadAllText($FilePath, [System.Text.Encoding]::UTF8) # 检查是否包含无效的 UTF-8 序列(通过重新编码比较) $utf8Bytes = [System.Text.Encoding]::UTF8.GetBytes($content) $originalBytes = [System.IO.File]::ReadAllBytes($FilePath) # 如果重新编码后的字节与原始文件相同(忽略 BOM),则为有效 UTF-8 if ($bytes.Length -ge 3 -and $bytes[0] -eq 0xEF -and $bytes[1] -eq 0xBB -and $bytes[2] -eq 0xBF) { $originalBytes = $originalBytes[3..($originalBytes.Length-1)] } # 简单检查:尝试用 UTF-8 解码,如果成功且能再编码回去,可能是 UTF-8 # 更准确的方法是用 chardet 或类似库,但 PowerShell 内置方法如下: try { $decoded = [System.Text.Encoding]::UTF8.GetString($originalBytes) $reencoded = [System.Text.Encoding]::UTF8.GetBytes($decoded) # 如果字节数组匹配(或接近),很可能是 UTF-8 $match = $true if ($reencoded.Length -ne $originalBytes.Length) { $match = $false } else { for ($i = 0; $i -lt [Math]::Min($reencoded.Length, $originalBytes.Length); $i++) { if ($reencoded[$i] -ne $originalBytes[$i]) { $match = $false break } } } return @{ IsUtf8 = $match; HasBom = $false; Encoding = if ($match) { "UTF-8 without BOM" } else { "Unknown/Not UTF-8" } } } catch { return @{ IsUtf8 = $false; HasBom = $false; Encoding = "Not UTF-8" } } } catch { return @{ IsUtf8 = $false; HasBom = $false; Encoding = "Error: $($_.Exception.Message)" } } } # 主逻辑 if (Test-Path $Path -PathType Leaf) { # 单个文件 if (-not (Test-IsCodeFile -FilePath $Path)) { Write-Host "Skipping non-code file: $Path" -ForegroundColor Gray exit 0 } $result = Test-IsUtf8 -FilePath $Path if (-not $result.IsUtf8) { Write-Host "[X] Not UTF-8: $Path" -ForegroundColor Red Write-Host " Encoding: $($result.Encoding)" -ForegroundColor Yellow } } elseif (Test-Path $Path -PathType Container) { # 目录 Write-Host "Checking directory: $Path" -ForegroundColor Cyan Write-Host "" $allFiles = Get-ChildItem -Path $Path -File -Recurse $files = $allFiles | Where-Object { Test-IsCodeFile -FilePath $_.FullName } if ($files.Count -eq 0) { Write-Host "No code files found in the directory." -ForegroundColor Yellow exit 0 } Write-Host "Found $($files.Count) code file(s) (filtered from $($allFiles.Count) total files)" -ForegroundColor Gray Write-Host "" $utf8Count = 0 $nonUtf8Count = 0 $nonUtf8Files = @() foreach ($file in $files) { $result = Test-IsUtf8 -FilePath $file.FullName if ($result.IsUtf8) { $utf8Count++ } else { $nonUtf8Count++ $nonUtf8Files += $file.FullName Write-Host "[X] Not UTF-8: $($file.FullName)" -ForegroundColor Red Write-Host " Encoding: $($result.Encoding)" -ForegroundColor Yellow Write-Host "" } } Write-Host "Summary:" -ForegroundColor Cyan Write-Host " Total code files: $($files.Count)" Write-Host " UTF-8 files: $utf8Count" -ForegroundColor Green if ($nonUtf8Count -gt 0) { Write-Host " Non UTF-8 files: $nonUtf8Count" -ForegroundColor Red } else { Write-Host " Non UTF-8 files: 0" -ForegroundColor Green Write-Host "" Write-Host "All code files are UTF-8 encoded!" -ForegroundColor Green } } else { Write-Host "Error: Path does not exist: $Path" -ForegroundColor Red exit 1 }