JiShe.IOT.Admin/check_encoding.ps1

153 lines
5.7 KiB
PowerShell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# PowerShell 脚本:检查文件编码是否为 UTF-8
# 用法: .\check_encoding.ps1 [文件路径或目录路径]
# 输出编码: UTF-8
param(
[Parameter(Mandatory=$false)]
[string]$Path = "."
)
# 设置控制台和输出编码为 UTF-8修复中文乱码
if ([Console]::OutputEncoding.CodePage -ne 65001) {
chcp 65001 | Out-Null
}
[Console]::OutputEncoding = [System.Text.Encoding]::UTF8
$PSDefaultParameterValues['*:Encoding'] = 'utf8'
$OutputEncoding = [System.Text.Encoding]::UTF8
# 定义代码文件扩展名列表
$codeFileExtensions = @(
# 源代码文件
'.cs', '.js', '.ts', '.jsx', '.tsx', '.java', '.py', '.cpp', '.c', '.h', '.hpp',
'.go', '.rs', '.rb', '.php', '.swift', '.kt', '.scala', '.vb', '.fs', '.dart',
# 配置文件/标记文件
'.json', '.xml', '.html', '.htm', '.css', '.scss', '.sass', '.less',
'.yaml', '.yml', '.toml', '.ini', '.config', '.properties', '.conf',
# 脚本文件
'.ps1', '.psm1', '.psd1', '.sh', '.bash', '.zsh', '.fish', '.bat', '.cmd',
# 数据/标记文件
'.md', '.txt', '.log', '.csv', '.sql', '.r', '.m', '.mm',
# 其他文本格式
'.vue', '.svelte', '.tsx', '.jsx', '.dts', '.map'
)
function Test-IsCodeFile {
param([string]$FilePath)
$extension = [System.IO.Path]::GetExtension($FilePath).ToLower()
return $codeFileExtensions -contains $extension
}
function Test-IsUtf8 {
param([string]$FilePath)
try {
# 读取文件的前几个字节检查 BOM
$bytes = [System.IO.File]::ReadAllBytes($FilePath)
# 检查 UTF-8 BOM (EF BB BF)
if ($bytes.Length -ge 3 -and $bytes[0] -eq 0xEF -and $bytes[1] -eq 0xBB -and $bytes[2] -eq 0xBF) {
return @{ IsUtf8 = $true; HasBom = $true; Encoding = "UTF-8 with BOM" }
}
# 尝试用 UTF-8 解码整个文件
$content = [System.IO.File]::ReadAllText($FilePath, [System.Text.Encoding]::UTF8)
# 检查是否包含无效的 UTF-8 序列(通过重新编码比较)
$utf8Bytes = [System.Text.Encoding]::UTF8.GetBytes($content)
$originalBytes = [System.IO.File]::ReadAllBytes($FilePath)
# 如果重新编码后的字节与原始文件相同(忽略 BOM则为有效 UTF-8
if ($bytes.Length -ge 3 -and $bytes[0] -eq 0xEF -and $bytes[1] -eq 0xBB -and $bytes[2] -eq 0xBF) {
$originalBytes = $originalBytes[3..($originalBytes.Length-1)]
}
# 简单检查:尝试用 UTF-8 解码,如果成功且能再编码回去,可能是 UTF-8
# 更准确的方法是用 chardet 或类似库,但 PowerShell 内置方法如下:
try {
$decoded = [System.Text.Encoding]::UTF8.GetString($originalBytes)
$reencoded = [System.Text.Encoding]::UTF8.GetBytes($decoded)
# 如果字节数组匹配(或接近),很可能是 UTF-8
$match = $true
if ($reencoded.Length -ne $originalBytes.Length) {
$match = $false
} else {
for ($i = 0; $i -lt [Math]::Min($reencoded.Length, $originalBytes.Length); $i++) {
if ($reencoded[$i] -ne $originalBytes[$i]) {
$match = $false
break
}
}
}
return @{ IsUtf8 = $match; HasBom = $false; Encoding = if ($match) { "UTF-8 without BOM" } else { "Unknown/Not UTF-8" } }
} catch {
return @{ IsUtf8 = $false; HasBom = $false; Encoding = "Not UTF-8" }
}
} catch {
return @{ IsUtf8 = $false; HasBom = $false; Encoding = "Error: $($_.Exception.Message)" }
}
}
# 主逻辑
if (Test-Path $Path -PathType Leaf) {
# 单个文件
if (-not (Test-IsCodeFile -FilePath $Path)) {
Write-Host "Skipping non-code file: $Path" -ForegroundColor Gray
exit 0
}
$result = Test-IsUtf8 -FilePath $Path
if (-not $result.IsUtf8) {
Write-Host "[X] Not UTF-8: $Path" -ForegroundColor Red
Write-Host " Encoding: $($result.Encoding)" -ForegroundColor Yellow
}
} elseif (Test-Path $Path -PathType Container) {
# 目录
Write-Host "Checking directory: $Path" -ForegroundColor Cyan
Write-Host ""
$allFiles = Get-ChildItem -Path $Path -File -Recurse
$files = $allFiles | Where-Object { Test-IsCodeFile -FilePath $_.FullName }
if ($files.Count -eq 0) {
Write-Host "No code files found in the directory." -ForegroundColor Yellow
exit 0
}
Write-Host "Found $($files.Count) code file(s) (filtered from $($allFiles.Count) total files)" -ForegroundColor Gray
Write-Host ""
$utf8Count = 0
$nonUtf8Count = 0
$nonUtf8Files = @()
foreach ($file in $files) {
$result = Test-IsUtf8 -FilePath $file.FullName
if ($result.IsUtf8) {
$utf8Count++
} else {
$nonUtf8Count++
$nonUtf8Files += $file.FullName
Write-Host "[X] Not UTF-8: $($file.FullName)" -ForegroundColor Red
Write-Host " Encoding: $($result.Encoding)" -ForegroundColor Yellow
Write-Host ""
}
}
Write-Host "Summary:" -ForegroundColor Cyan
Write-Host " Total code files: $($files.Count)"
Write-Host " UTF-8 files: $utf8Count" -ForegroundColor Green
if ($nonUtf8Count -gt 0) {
Write-Host " Non UTF-8 files: $nonUtf8Count" -ForegroundColor Red
} else {
Write-Host " Non UTF-8 files: 0" -ForegroundColor Green
Write-Host ""
Write-Host "All code files are UTF-8 encoded!" -ForegroundColor Green
}
} else {
Write-Host "Error: Path does not exist: $Path" -ForegroundColor Red
exit 1
}