[Visual Basic] 纯文本查看 复制代码
' --- 文本清理脚本 ---
Set fso = CreateObject("Scripting.FileSystemObject")
Set shell = CreateObject("WScript.Shell")
Dim txtCount, docxCount, xlsxCount
txtCount = 0
docxCount = 0
xlsxCount = 0
If WScript.Arguments.Count = 0 Then
MsgBox "请将要处理的 txt/docx/xlsx 文件拖到这个脚本上。", vbExclamation, "未提供文件"
WScript.Quit
End If
For i = 0 To WScript.Arguments.Count - 1
filePath = WScript.Arguments(i)
If fso.FileExists(filePath) Then
ext = LCase(fso.GetExtensionName(filePath))
Select Case ext
Case "txt"
CleanTxt filePath
txtCount = txtCount + 1
Case "docx"
CleanDocx filePath
docxCount = docxCount + 1
Case "xlsx"
CleanXlsx filePath
xlsxCount = xlsxCount + 1
End Select
End If
Next
MsgBox "文本清理完成:" & vbCrLf & _
"TXT 文件:" & txtCount & vbCrLf & _
"DOCX 文件:" & docxCount & vbCrLf & _
"XLSX 文件:" & xlsxCount, vbInformation, "完成"
Sub CleanTxt(filePath)
Dim inputStream, outputStream, cleanedText, line, lines, newFilePath
' === 1. 以 UTF-8 读取文本 ===
Set inputStream = CreateObject("ADODB.Stream")
inputStream.Type = 2 ' 文本模式
inputStream.Charset = "utf-8"
inputStream.Open
inputStream.LoadFromFile filePath
lines = Split(inputStream.ReadText, vbCrLf)
inputStream.Close
' === 2. 清理文本内容:去空行、前后空格、制表符 ===
cleanedText = ""
For Each line In lines
line = Trim(Replace(Replace(line, vbTab, ""), Chr(9), ""))
If line <> "" Then cleanedText = cleanedText & line & vbCrLf
Next
' === 3. 构造新文件名:原名_cleaned.txt ===
newFilePath = Left(filePath, Len(filePath) - Len(fso.GetExtensionName(filePath)) - 1) & "_cleaned.txt"
' === 4. 以 UTF-8 写入新的文本文件 ===
Set outputStream = CreateObject("ADODB.Stream")
outputStream.Type = 2 ' 文本模式
outputStream.Charset = "utf-8"
outputStream.Open
outputStream.WriteText cleanedText
outputStream.SaveToFile newFilePath, 2 ' 2 = 覆盖模式(如果文件存在)
outputStream.Close
End Sub
Sub CleanDocx(filePath)
On Error Resume Next
Dim wordApp, doc, para, paras, i, text, ext, newFilePath, saveFormat
Set wordApp = CreateObject("Word.Application")
wordApp.Visible = False
Set doc = wordApp.Documents.Open(filePath, False, False)
' 倒序清理段落内容
Set paras = doc.Paragraphs
For i = paras.Count To 1 Step -1
Set para = paras(i)
text = para.Range.Text
text = Replace(Replace(text, vbTab, ""), Chr(9), "") ' 去掉制表符
text = Trim(text) ' 清除前后空白字符
' 去掉段尾空格但保留段落标记
If Right(text, 1) = vbCr Or Right(text, 1) = vbLf Then
text = RTrim(Left(text, Len(text) - 1)) & vbCr
Else
text = RTrim(text)
End If
If Len(Replace(text, vbCr, "")) = 0 Then
para.Range.Delete
Else
para.Range.Text = text
End If
Next
' 构造 _cleaned 文件名
ext = LCase(fso.GetExtensionName(filePath))
newFilePath = Left(filePath, Len(filePath) - Len(ext) - 1) & "_cleaned." & ext
' 设置保存格式
If ext = "docx" Then
saveFormat = 16 ' wdFormatDocumentDefault
ElseIf ext = "doc" Then
saveFormat = 0 ' wdFormatDocument
Else
saveFormat = 16
End If
doc.SaveAs2 newFilePath, saveFormat
doc.Close False
wordApp.Quit
Set doc = Nothing
Set wordApp = Nothing
On Error GoTo 0
End Sub
Sub CleanXlsx(filePath)
On Error Resume Next
Dim xlApp, wb, ws, r, c, isEmpty, lastRow, lastCol, newFilePath, ext, baseName, saveFormat
Set xlApp = CreateObject("Excel.Application")
xlApp.Visible = False
Set wb = xlApp.Workbooks.Open(filePath)
For Each ws In wb.Worksheets
lastRow = ws.UsedRange.Rows.Count
lastCol = ws.UsedRange.Columns.Count
' 清理单元格内容(去除首尾空格、制表符、全角空格)
For r = 1 To lastRow
For c = 1 To lastCol
If Not IsEmpty(ws.Cells(r, c).Value) Then
Select Case VarType(ws.Cells(r, c).Value)
Case vbString
val = ws.Cells(r, c).Value
val = Trim(Replace(val, vbTab, ""))
val = Replace(val, Chr(9), "")
val = Replace(val, ChrW(&H3000), "")
ws.Cells(r, c).Value = val
Case vbDouble, vbInteger, vbLong
' 保留原值
Case vbError
' 忽略错误单元格
Case Else
' 其他类型不处理
End Select
End If
Next
Next
' 删除空行(从下往上)
For r = lastRow To 1 Step -1
isEmpty = True
For c = 1 To lastCol
If Trim(ws.Cells(r, c).Value) <> "" Then
isEmpty = False
Exit For
End If
Next
If isEmpty Then ws.Rows(r).Delete
Next
' 删除空列(从右往左)
For c = lastCol To 1 Step -1
isEmpty = True
For r = 1 To ws.UsedRange.Rows.Count
If Trim(ws.Cells(r, c).Value) <> "" Then
isEmpty = False
Exit For
End If
Next
If isEmpty Then ws.Columns(c).Delete
Next
Next
' 构造新文件名
ext = LCase(fso.GetExtensionName(filePath))
baseName = Left(filePath, Len(filePath) - Len(ext) - 1)
newFilePath = baseName & "_cleaned." & ext
' 设置保存格式(56 = .xls, 51 = .xlsx)
If ext = "xls" Then
saveFormat = 56 ' Excel 97-2003 Workbook
Else
saveFormat = 51 ' OpenXML Workbook (.xlsx)
End If
wb.SaveAs newFilePath, saveFormat
wb.Close False
xlApp.Quit
Set wb = Nothing
Set xlApp = Nothing
On Error GoTo 0
End Sub