-
Notifications
You must be signed in to change notification settings - Fork 6
/
Convert-WordDocument.ps1
223 lines (169 loc) · 7.05 KB
/
Convert-WordDocument.ps1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
<#
.Synopsis
PowerShell script to convert Word documents
.Description
This script converts Word compatible documents to a selected format utilizing the Word SaveAs function. Each file is converted by a single dedicated Word COM instance.
The script converts either all documents ina singlefolder of a matching an include filter or a single file.
Currently supported target document types:
- Default --> Word 2016
- PDF
- XPS
- HTML
Author: Thomas Stensitzki
Version 1.1 2019-11-26
.NOTES
Requirements
- Word 2016+ installed locally
Revision History
--------------------------------------------------------------------------------
1.0 Initial release
1.1 Updated Word cleanup code
.LINK
http://scripts.granikos.eu
.PARAMETER SourcePath
Source path to a folder containing the documents to convert or full path to a single document
.PARAMETER IncludeFilter
File extension filter when converting all files in a single folder. Default: *.doc
.PARAMETER TargetFormat
Word Save AS target format. Currently supported: Default, PDF, XPS, HTML
.PARAMETER DeleteExistingFiles
Switch to delete an exiting target file
.EXAMPLE
Convert all .doc files in E:\temp to Default
.\Convert-WordDocument.ps1 -SourcePath E:\Temp -IncludeFilter *.doc
.EXAMPLE
Convert all .doc files in E:\temp to XPS
.\Convert-WordDocument.ps1 -SourcePath E:\Temp -IncludeFilter *.doc -TargetFormat XPS
.EXAMPLE
Convert a single document to Word default format
.\Convert-WordDocument.ps1 -SourcePath E:\Temp\MyDocument.doc
#>
[CmdletBinding()]
Param(
[string]$SourcePath = '',
[string]$IncludeFilter = '*.doc',
[ValidateSet('Default','PDF','XPS','HTML')] # Only some of the supported file formats are currently tested
[string]$TargetFormat = 'Default',
[switch]$DeleteExistingFiles
)
$ERR_OK = 0
$ERR_COMOBJECT = 1001
$ERR_SOURCEPATHMISSING = 1002
$ERR_WORDSAVEAS = 1003
# Define Word target document types
# Source: https://msdn.microsoft.com/en-us/vba/word-vba/articles/wdsaveformat-enumeration-word
$wdFormat = @{
'Document' = 0 # Microsoft Office Word 97 - 2003 binary file format
'Template' = 1 # Word 97 - 2003 template format
'Text' = 2 # Microsoft Windows text format
'TextLineBreaks' = 3 #
'DOSText' = 4 # Microsoft DOS text format
'DOSTextLineBreaks' = 5 # Microsoft DOS text with line breaks preserved
'RTF' = 6 # Rich text format (RTF)
'EncodedText' = 7 # Encoded text format
'HTML' = 8 # Standard HTML format
'WebArchive' = 9 # Web archive format
'FilteredHtml' = 10 # Filtered HTML format
'XML' = 11 # Extensible Markup Language (XML) format
'XMLDocument' = 12 # XML document format
'XMLDocumentMacroEnabled' = 13 # XML document format with macros enabled
'XMLTemplate' = 14 # XML template format
'XMLTemplateMacroEnabled' = 15 # XML template format with macros enabled
'Default' = 16 # Word default document file format. For Word, this is the DOCX format
'PDF' = 17 # PDF format
'XPS' = 18 # XML template format
'FlatXML' = 19 # Open XML file format saved as a single XML file
'FlatXMLMacroEnabled' = 20 # Open XML file format with macros enabled saved as a single XML file
'FlatXMLTemplate' = 21 # Open XML template format saved as a XML single file
'FlatXMLTemplateMacroEnabled' = 22 # Open XML template format with macros enabled saved as a single XML file
'OpenDocument' = 23 # OpenDocument Text format
'StrictOpenXMLFormat' = 24 # Strict Open XML document format
}
$FileExtension = @{
'Document' = '.doc'
'Template' = '.dot'
'RTF' = '.rtf'
'HTML' = '.html'
'Default' = '.docx'
'PDF' = '.pdf'
'XPS' = '.xps'
}
function Invoke-Word {
[CmdletBinding()]
Param(
[string]$FileSourcePath = '',
[string]$SourceFileExtension = '',
[string]$TargetFileExtension = '',
[int]$WdSaveFormat = 16, # Default docx
[switch]$DeleteFile
)
if($FileSourcePath -ne '') {
Write-Output ('Working on {0}' -f $FileSourcePath)
# define variable for Word com object
$WordApplication = $null
# try to create a new instance of the COM object
try{
# New Word instance
$WordApplication = New-Object -ComObject Word.Application
}
catch {
Write-Error -Message 'Word COM object could not be loaded'
Exit $ERR_COMOBJECT
}
# try to ope the document and save in new format
try {
$WordDocument = $WordApplication.Documents.Open($FileSourcePath)
# Replace the source file extenson with the appropriate target file extension
$NewFilePath = ($FileSourcePath).Replace($SourceFileExtension, $TargetFileExtension)
if((Test-Path -Path $NewFilePath) -and $DeleteFile) {
# Delete existing file
$null = Remove-Item -Path $NewFilePath -Force -Confirm:$false
}
# Now let's save the document
$WordDocument.SaveAs([ref] $NewFilePath, [ref]$WdSaveFormat)
}
catch {
# Ooops
Write-Error -Message "Error saving document$($FileSourcePath): ´nException: $($_.Exception.Message)"
Exit $ERR_WORDSAVEAS
}
finally{
# Do some clean up
# 2019-11-26 Updated
$WordDocument.Close()
$WordApplication.Quit()
[Runtime.Interopservices.Marshal]::ReleaseComObject($WordApplication) | Out-Null
if(Test-Path variable:global:WordApplication) {
Remove-Variable -Name WordApplication -Scope Global 4>$Null
}
[GC]::Collect()
[GC]::WaitForPendingFinalizers()
}
}
}
if($SourcePath -ne '') {
# Check whether SourcePath is a single file or directory
$IsFolder = $false
try {
$IsFolder = ((Get-Item -Path $SourcePath ) -is [System.IO.DirectoryInfo])
}
catch{}
if($IsFolder) {
# We need to iterate a source folder
$SourceFiles = Get-ChildItem -Path $SourcePath -Include $IncludeFilter -Recurse
Write-Verbose -Message ('{0} files found in {1}' -f ($SourceFiles | Measure-Object).Count, $SourcePath)
# Let's work on all files
foreach($File in $SourceFiles) {
Invoke-Word -FileSourcePath $File.FullName -SourceFileExtension $File.Extension -TargetFileExtension $FileExtension.Item($TargetFormat) -WdSaveFormat $wdFormat.Item($TargetFormat)
}
}
else{
# It's just a single file
$File = Get-Item -Path $SourcePath
Invoke-Word -FileSourcePath $File.FullName -SourceFileExtension $File.Extension -TargetFileExtension $FileExtension.Item($TargetFormat) -WdSaveFormat $wdFormat.Item($TargetFormat)
}
}
else {
Write-Warning -Message 'No document source path has been provided'
exit $ERR_SOURCEPATHMISSING
}