JiShe.IOT.Admin/check_encoding.ps1

153 lines
5.7 KiB
PowerShell
Raw Normal View History

# PowerShell 脚本:检查文件编码是否为 UTF-8
# 用法: .\check_encoding.ps1 [文件路径或目录路径]
# 输出编码: UTF-8
param(
[Parameter(Mandatory=$false)]
[string]$Path = "."
)
# 设置控制台和输出编码为 UTF-8修复中文乱码
if ([Console]::OutputEncoding.CodePage -ne 65001) {
chcp 65001 | Out-Null
}
[Console]::OutputEncoding = [System.Text.Encoding]::UTF8
$PSDefaultParameterValues['*:Encoding'] = 'utf8'
$OutputEncoding = [System.Text.Encoding]::UTF8
# 定义代码文件扩展名列表
$codeFileExtensions = @(
# 源代码文件
'.cs', '.js', '.ts', '.jsx', '.tsx', '.java', '.py', '.cpp', '.c', '.h', '.hpp',
'.go', '.rs', '.rb', '.php', '.swift', '.kt', '.scala', '.vb', '.fs', '.dart',
# 配置文件/标记文件
'.json', '.xml', '.html', '.htm', '.css', '.scss', '.sass', '.less',
'.yaml', '.yml', '.toml', '.ini', '.config', '.properties', '.conf',
# 脚本文件
'.ps1', '.psm1', '.psd1', '.sh', '.bash', '.zsh', '.fish', '.bat', '.cmd',
# 数据/标记文件
'.md', '.txt', '.log', '.csv', '.sql', '.r', '.m', '.mm',
# 其他文本格式
'.vue', '.svelte', '.tsx', '.jsx', '.dts', '.map'
)
function Test-IsCodeFile {
param([string]$FilePath)
$extension = [System.IO.Path]::GetExtension($FilePath).ToLower()
return $codeFileExtensions -contains $extension
}
function Test-IsUtf8 {
param([string]$FilePath)
try {
# 读取文件的前几个字节检查 BOM
$bytes = [System.IO.File]::ReadAllBytes($FilePath)
# 检查 UTF-8 BOM (EF BB BF)
if ($bytes.Length -ge 3 -and $bytes[0] -eq 0xEF -and $bytes[1] -eq 0xBB -and $bytes[2] -eq 0xBF) {
return @{ IsUtf8 = $true; HasBom = $true; Encoding = "UTF-8 with BOM" }
}
# 尝试用 UTF-8 解码整个文件
$content = [System.IO.File]::ReadAllText($FilePath, [System.Text.Encoding]::UTF8)
# 检查是否包含无效的 UTF-8 序列(通过重新编码比较)
$utf8Bytes = [System.Text.Encoding]::UTF8.GetBytes($content)
$originalBytes = [System.IO.File]::ReadAllBytes($FilePath)
# 如果重新编码后的字节与原始文件相同(忽略 BOM则为有效 UTF-8
if ($bytes.Length -ge 3 -and $bytes[0] -eq 0xEF -and $bytes[1] -eq 0xBB -and $bytes[2] -eq 0xBF) {
$originalBytes = $originalBytes[3..($originalBytes.Length-1)]
}
# 简单检查:尝试用 UTF-8 解码,如果成功且能再编码回去,可能是 UTF-8
# 更准确的方法是用 chardet 或类似库,但 PowerShell 内置方法如下:
try {
$decoded = [System.Text.Encoding]::UTF8.GetString($originalBytes)
$reencoded = [System.Text.Encoding]::UTF8.GetBytes($decoded)
# 如果字节数组匹配(或接近),很可能是 UTF-8
$match = $true
if ($reencoded.Length -ne $originalBytes.Length) {
$match = $false
} else {
for ($i = 0; $i -lt [Math]::Min($reencoded.Length, $originalBytes.Length); $i++) {
if ($reencoded[$i] -ne $originalBytes[$i]) {
$match = $false
break
}
}
}
return @{ IsUtf8 = $match; HasBom = $false; Encoding = if ($match) { "UTF-8 without BOM" } else { "Unknown/Not UTF-8" } }
} catch {
return @{ IsUtf8 = $false; HasBom = $false; Encoding = "Not UTF-8" }
}
} catch {
return @{ IsUtf8 = $false; HasBom = $false; Encoding = "Error: $($_.Exception.Message)" }
}
}
# 主逻辑
if (Test-Path $Path -PathType Leaf) {
# 单个文件
if (-not (Test-IsCodeFile -FilePath $Path)) {
Write-Host "Skipping non-code file: $Path" -ForegroundColor Gray
exit 0
}
$result = Test-IsUtf8 -FilePath $Path
if (-not $result.IsUtf8) {
Write-Host "[X] Not UTF-8: $Path" -ForegroundColor Red
Write-Host " Encoding: $($result.Encoding)" -ForegroundColor Yellow
}
} elseif (Test-Path $Path -PathType Container) {
# 目录
Write-Host "Checking directory: $Path" -ForegroundColor Cyan
Write-Host ""
$allFiles = Get-ChildItem -Path $Path -File -Recurse
$files = $allFiles | Where-Object { Test-IsCodeFile -FilePath $_.FullName }
if ($files.Count -eq 0) {
Write-Host "No code files found in the directory." -ForegroundColor Yellow
exit 0
}
Write-Host "Found $($files.Count) code file(s) (filtered from $($allFiles.Count) total files)" -ForegroundColor Gray
Write-Host ""
$utf8Count = 0
$nonUtf8Count = 0
$nonUtf8Files = @()
foreach ($file in $files) {
$result = Test-IsUtf8 -FilePath $file.FullName
if ($result.IsUtf8) {
$utf8Count++
} else {
$nonUtf8Count++
$nonUtf8Files += $file.FullName
Write-Host "[X] Not UTF-8: $($file.FullName)" -ForegroundColor Red
Write-Host " Encoding: $($result.Encoding)" -ForegroundColor Yellow
Write-Host ""
}
}
Write-Host "Summary:" -ForegroundColor Cyan
Write-Host " Total code files: $($files.Count)"
Write-Host " UTF-8 files: $utf8Count" -ForegroundColor Green
if ($nonUtf8Count -gt 0) {
Write-Host " Non UTF-8 files: $nonUtf8Count" -ForegroundColor Red
} else {
Write-Host " Non UTF-8 files: 0" -ForegroundColor Green
Write-Host ""
Write-Host "All code files are UTF-8 encoded!" -ForegroundColor Green
}
} else {
Write-Host "Error: Path does not exist: $Path" -ForegroundColor Red
exit 1
}