Fumei is just reacting to what a big topic this is -- you're talking about document conversion, which is an entirely different topic really than document cleanup.
Your approach is to take the converted document (however you've received it) and "cleaning up" some stuff so that it is okay to use for the end-user. That clean up involves getting rid of some paragraph formatting, "bad" styles, but still trying to preserve some document formatting.
For document conversion, I actually advocate the reverse -- using a "Paste Special Unformatted Text" approach into a "clean new document" shell, and then making sure to restore the stuff that you want to keep (which, in this case, is *some* font formatting)
I have some massively complicated code which addresses this in a much more robust fashion. But for a couple of reasons (not the least of which is what I'm guessing is your level of expertise in VBA programming), I'm giving you a more simple approach to this. I've included the storing and restoring of the following font attributes: Bold, Italic, *single* underline, superscript and subscript (beware of these two, as often scanned documents will show text as superscript and subscript when it's really just the scan being slightly out of alignment.
This may help you on your way... although it is still pretty complex code for a beginner.
' Main routine -- works on the active document, and leaves you with a new document in which
' the main font formatting is restored.
' NOTE: if not used in Word 2010, you may need to do multiple "undo" actions to restore your
' document to the way it was -- so TEST a lot
Sub ConversionWithFontFormatRestoration()
Dim oOrigDoc As Document
Dim oNewDoc As Document
Dim rngOrigSelection As Range
Set oOrigDoc = ActiveDocument
Set oNewDoc = Documents.Add(Visible:=False)
'mark the formatting
FontFormat_MarkAll oOrigDoc
'note, this can pull over section breaks, etc
oOrigDoc.Content.Copy
'if we were able to use the undo record, undo the marks (comment out to see the process)
#If VBA7 Then
'store the selection
Set rngOrigSelection = Selection.Range.Duplicate
'undo (which selects the whole document_
oOrigDoc.Undo
'restore the selection
rngOrigSelection.Select
#End If
oNewDoc.Content.PasteSpecial dataType:=wdPasteText
'replace the formatting
FontFormat_ReplaceAll oNewDoc
'make it visible
oNewDoc.ActiveWindow.Visible = True
End Sub
' Mark all font formats desired
Sub FontFormat_MarkAll(oDoc As Document)
Dim oFont As Font
'make use of the undo record, if it's available
#If VBA7 Then
Application.UndoRecord.StartCustomRecord "Mark Font Formatting"
#End If
'setting to new clears out all other settings, so you're only searching for a single item
Set oFont = New Font
oFont.Bold = True
FontFormat_Mark oFont, "Bold", oDoc.Content
Set oFont = New Font
oFont.Italic = True
FontFormat_Mark oFont, "Italic", oDoc.Content
Set oFont = New Font
oFont.Underline = wdUnderlineSingle
FontFormat_Mark oFont, "UnderlineSingle", oDoc.Content
Set oFont = New Font
oFont.Superscript = True
FontFormat_Mark oFont, "Superscript", oDoc.Content
Set oFont = New Font
oFont.Subscript = True
FontFormat_Mark oFont, "Subscript", oDoc.Content
'if we made use of it with the conditional compile, end it
'(this gives one undo, rather than a bunch)
#If VBA7 Then
Application.UndoRecord.EndCustomRecord
#End If
End Sub
' mark an individual font format
Sub FontFormat_Mark(oFont As Font, sIdentifier As String, rngSearch As Range)
With rngSearch.Find
'just setting the font object doesn't work - so you'd have to do this for each item you care about
With .Font
.Bold = oFont.Bold
.Italic = oFont.Italic
.Underline = oFont.Underline
.Superscript = oFont.Superscript
.Subscript = oFont.Subscript
End With
.Replacement.text = "[StartMy" & sIdentifier & "]^&[EndMy" & sIdentifier & "]"
.Execute Replace:=wdReplaceAll
End With
End Sub
' Replace all font formats
Sub FontFormat_ReplaceAll(oDoc As Document)
Dim oFont As Font
'make use of the undo record, if it's available
#If VBA7 Then
Application.UndoRecord.StartCustomRecord "Replace Font Formatting"
#End If
'setting to new clears out all other settings, so you're only searching for a single item
Set oFont = New Font
oFont.Bold = True
FontFormat_Replace oFont, "Bold", oDoc.Content
Set oFont = New Font
oFont.Italic = True
FontFormat_Replace oFont, "Italic", oDoc.Content
Set oFont = New Font
oFont.Underline = wdUnderlineSingle
FontFormat_Replace oFont, "UnderlineSingle", oDoc.Content
Set oFont = New Font
oFont.Superscript = True
FontFormat_Replace oFont, "Superscript", oDoc.Content
Set oFont = New Font
oFont.Subscript = True
FontFormat_Replace oFont, "Subscript", oDoc.Content
'if we made use of it with the conditional compile, end it
'(this gives one undo, rather than a bunch)
#If VBA7 Then
Application.UndoRecord.EndCustomRecord
#End If
End Sub
' Replace an indivdual font format
Sub FontFormat_Replace(oFont As Font, sIdentifier As String, rngSearch As Range)
With rngSearch.Duplicate.Find
'have to use wild card searches
.MatchWildcards = True
'use the slashes to identify the brackets as real brackets
'not wildcard search special characters
.text = "\[StartMy" & sIdentifier & "\]*\[EndMy" & sIdentifier & "\]"
'set up the formatting replacements
With .Replacement.Font
.Bold = oFont.Bold
.Italic = oFont.Italic
.Underline = oFont.Underline
.Superscript = oFont.Superscript
.Subscript = oFont.Subscript
End With
'replace the formatting
.Execute Replace:=wdReplaceAll
'remove the start and end markers by
'resetting the formatting
.ClearFormatting
.Replacement.ClearFormatting
'turning off wildcard searches
.MatchWildcards = False
'and removing the specific text
.text = "[StartMy" & sIdentifier & "]"
.Replacement.text = ""
.Execute Replace:=wdReplaceAll
'and the end codes
.text = "[EndMy" & sIdentifier & "]"
.Execute Replace:=wdReplaceAll
End With
End Sub