PDA

View Full Version : [SOLVED:] Search documents in directory for text string, copy results to new document.



lkpederson
01-04-2014, 04:12 PM
Folks,
I am writing (cobbling together would be more accurate) a macro which searches documents in a selected directory for a text string. Once found, that information is to be copied into a new document. There can be multiple occurrences of the string or none at all in these documents. The macro continues until all documents have been searched.

My issue is that the results from additional document searches overwrite the information stored in the new document. I've tried multiple methods but am not getting there. Also suggestions on streamlining the code is welcome.

WARNING
I am not a programmer and the code is ugly at best. I'm sure I've broken more than one programming rule. Oh and I've commented like crazy.

Thanks again for time and effort.

Version is 2010

' Current issues with proggie:
' 1. copies over text in RSN doc instead of appending
' 2. saves RSN.doc file in Word default directory.
' Don't want it in working directory since are processing
' those files. Let user pick where to save?
' 3. need to limit files selected to .doc files instead
' of all files in directory.
' __________________________________________________________________

Function GetFolder() As String
Dim oFolder As Object
GetFolder = ""
Set oFolder = CreateObject("Shell.Application").BrowseForFolder(0, "Choose a folder", 0)
If (Not oFolder Is Nothing) Then GetFolder = oFolder.Items.Item.Path
Set oFolder = Nothing
End Function

Public Sub main()

' these are all variant variables and must be converted to strings to use
Dim FolderPath, objFSO, TxtFilePath

' added in from ExtractRSN macro
' Dim nDoc As Word.Document
Dim cDoc As Word.Document, nDoc As Word.Document

' set string variable for later use as filename
Dim cDocFN As String

' Set cDoc = ActiveDocument
Set nDoc = Documents.Add

' commented this out during testing so I don't have to keep selecting _
the directory RETURN THIS BACK WHEN COMPLETE

' FolderPath = GetFolder
FolderPath = "J:\test"

If InStr(FolderPath, "EMPTY") = 0 Then
Set objFSO = CreateObject("Scripting.Filesystemobject")
Dim Folder, DocFiles, DocFile

' added from other macro
Dim nRng As Word.Range
Dim cRng As Word.Range
Dim nRng2 As Word.Range
Set Folder = objFSO.GetFolder(FolderPath)
Set DocFiles = Folder.Files
' .FileType = msoFileTypeWordDocuments ' trying to only look _
at Word documents but this statement doesn't work. See notes _
at beginning of macro for "things to fix".
For Each DocFile In DocFiles 'loop the files in the folder

' this statement is a holdover from previous macro which verified that
' a file was of a particular file type. NEED TO ADD THIS LATER
' check AddHeaderAllFiles macro
' TxtFilePath = DocFile.Path

' Sets the variable nRng equal to the new document added above.
' Allows copying from one file to another
Set nRng = nDoc.Content
' Set nRng = nDoc.Paragraphs

' This section checks to see if nDoc is empty. Had trouble
' passing variables so stuck this text in here. Should be
' its own function. Maybe later... if this works

Dim astory As Range
Dim IsDocEmpty As Boolean

' Initialize function to True.
IsDocEmpty = True
nDoc.Activate
For Each astory In nDoc.StoryRanges

' Check for text. If the length of the
' current story is greater than one, then
' there is either text or more than one
' empty line.
If Len(astory.Text) > 1 Then
IsDocEmpty = False
End If

' Check for Objects.
' Note: If there are no objects within
' the current story range, an error occurs.
On Error Resume Next
If astory.ShapeRange.Count > 0 Then
If Err = 0 Then
IsDocEmpty = False
Else
On Error GoTo 0
End If
End If

Next

' This bit of code moves the cursor to the end of the document
' in the file "nDoc". This allows information to be appended
' to the file nDoc.

' Dim objSelection
If IsDocEmpty = False Then
nDoc.Activate
Selection.EndOf Unit:=wdStory
' nDoc.Bookmarks("\EndofDoc").Select

' third attempt
' nRng2 = nRng
' nRng2.Collapse (Microsoft.Office.Interop.Word.WdCollapseDirection.wdCollapseEnd)

' another attempt to move cursor to end
' nDoc.Bookmarks("\EndOfDoc").Select

' and yet another attempt to move cursor to end
' Const wdStory = 6
' Const wdMove = 0
' Set objSelection = nRgn.Selection
' Set objWord = CreateObject("Word.Application")
' objWord.Visible = True
' Set objSelection = objWord.Selection
' objSelection.EndKey wdStory, wdMove
End If

' Convert variant string to string (1st line) which is actually
' a file name then open it as a file (2nd line)
cDocFN = CStr(DocFile)
Documents.Open cDocFN
cDocFN = ActiveDocument ' Setting the open document as the active one. (redundant)?

' Sets the variable cRng to include the entire document's contents
' so it can be searched
Set cRng = ActiveDocument.Content

' removes any formatting from the "Find" command
cRng.Find.ClearFormatting

' loop to search for text starting with "RSN" and ending in a period.
With cRng.Find
.Forward = True
.Text = "RSN"
.Wrap = wdFindStop
.Execute
Do While .Found
cRng.Collapse Word.WdCollapseDirection.wdCollapseEnd
cRng.Expand Unit:=wdParagraph ' use this line copy to a hard return
' cRng.MoveEndUntil Cset:=".", Count:=Word.wdForward ' use this to search _
for a specfic character

nRng = cRng
nRng.InsertParagraphAfter
nRng.Collapse Word.WdCollapseDirection.wdCollapseEnd
cRng.Collapse Word.WdCollapseDirection.wdCollapseEnd
.Execute
Loop
' Set nRng = Nothing
End With

' closing the open document before opening another one.
Documents(cDocFN).Close SaveChanges:=wdDoNotSaveChanges

Next

End If

Documents(nDoc).SaveAs2 ("RSN.docx")

End Sub

Function IsDocEmpty() As Boolean
Dim astory As Range

' Initialize function to True.
IsDocEmpty = True
nDoc.Activate
For Each astory In nDoc.StoryRanges

' Check for text. If the length of the
' current story is greater than one, then
' there is either text or more than one
' empty line.
If Len(astory.Text) > 1 Then

' if there is text, more to end

IsDocEmpty = False
End If

' Check for Objects.
' Note: If there are no objects within
' the current story range, an error occurs.
On Error Resume Next
If astory.ShapeRange.Count > 0 Then
If Err = 0 Then
IsDocEmpty = False
Else
On Error GoTo 0
End If
End If

' If something was found, then
' return to the calling routine with
' a value of False.
If IsDocEmpty = False Then Exit Function

Next

End Function

fumei
01-05-2014, 12:00 AM
Please use the code tags when posting code.

Option Explicit

Function GetFolder() As String
Dim oFolder As Object
GetFolder = ""
Set oFolder = CreateObject("Shell.Application").BrowseForFolder(0, "Choose a folder", 0)
If (Not oFolder Is Nothing) Then
GetFolder = oFolder.Items.Item.path
End If
Set oFolder = Nothing
End Function

Sub AppendStuff()
Dim SourceFolder As String
Dim pathToPutNewDoc As String
Dim files
Dim AppendedToDoc As Document
Dim r As Range

' create document for the chunks
Set AppendedToDoc = Documents.Add
' folder with source docs
SourceFolder = "J:\test\"
' folder to put new doc
pathToPutNewDoc = GetFolder & "\"
' use DIR function
files = Dir(SourceFolder & "*.doc")
Do While files <> ""
Documents.Open SourceFolder & files
Set r = ActiveDocument.Range
With r.Find
.Forward = True
.Text = "RSN"
.Wrap = wdFindStop
.Execute
Do While .Found
r.Collapse 0
cRng.Expand Unit:=wdParagraph
' append to result doc
AppendedToDoc.Range.InsertAfter r.Text & vbCrLf
r.Collapse 0
Loop
End With
'close active doc
ActiveDocument.Close
' set up next file
files = Dir()
Loop
AppendedToDoc.SaveAs FileName:=pathToPutNewDoc & "AppendedResults.doc"
End Sub

Using DIR allows you to just process .doc files in the folder.
User selects where to put results doc.
Source folder is set...you could use GetFolder to choose if you want.
It is a best practice to put all your variable declarations at the beginning of a procedure. Do not scatter them through the code.

fumei
01-05-2014, 12:06 AM
I am a bit unclear as to what you are copying exactly. It seems to be the paragraph, but you also have commented code that indicates to the end of the document.

lkpederson
01-05-2014, 05:06 AM
deleted msg as later msg supersedes this.

lkpederson
01-05-2014, 07:03 AM
Fumei,
I tried the code you posted, corrected a variable:

cRng.Expand Unit:=wdParagraph

should be, I believe :

r.Expand Unit:=wdParagraph

However when I tried it, the loop continues and find all sorts of text which isn't in the search parameters. I've included one of the documents I'm searching on. I tried up upload the results but got a fail. Here's what it looks like:

Search results file (see below). The first two lines are correct, the rest is not. The macro keeps looping at the end of the file. No idea why as the search criteria doesn't include these characters. Ideas?

RSN 01 35 10-1, Complete LHM and MSDS.

RSN 01 35 10-2, Updated LHM and MSDS.

Comply with paragraph (e) of clause at FAR 52.223-3, Hazardous Material Identification and Material Safety Data - Alternate 1.

[In addition to requirements in Table 01330A, submit copies of updated LHM and MSDS to__________ at least 15 days before delivering hazardous materials to job site.]

DELIVERY

Do not deliver hazardous materials to jobsite which are not included on the original or previously updated LHM and MSDS before receipt of updated LHM and MSDS by [________________].

PRODUCTS
Not Used
EXECUTION
Not Used
END OF SECTION
END OF SECTION
END OF SECTION

Thanks again.

lkpederson
01-05-2014, 02:58 PM
Ok
I finally have it sorted out, thanks to fumei's nudge in the right directory. It's not pretty but it works:

The current code asks the user which directory to search on and where to save the resultant file. I am sure there are more elegant way to get the job done but it works. Again, thank fumei for your thoughts on my problem. It allowed me to get back on track and solve the issue.


Option Explicit

Function GetFolder() As String
Dim oFolder As Object
GetFolder = ""
Set oFolder = CreateObject("Shell.Application").BrowseForFolder(0, "Choose a folder", 0)
If (Not oFolder Is Nothing) Then GetFolder = oFolder.Items.Item.Path
Set oFolder = Nothing
End Function

Sub main()

Dim files, DocFile ' these are all variant variables and must be converted to strings to use
Dim cDoc As Word.Document, nDoc As Word.Document
Dim cDocFN As String, SourceFolder As String, FileFolder As String, pathToPutSaveDoc As String
Dim nRng As Word.Range, cRng As Word.Range

Set nDoc = Documents.Add

SourceFolder = GetFolder & "\"
files = Dir(SourceFolder & "*.doc")

Do While files <> ""
Documents.Open SourceFolder & files
' Sets the variable nRng equal to the new document added above.
' Allows copying from one file to another
Set nRng = nDoc.Content

' Convert variant string to string (1st line) which is actually
' a file name then open it as a file (2nd line)
cDocFN = CStr(DocFile)
Documents.Open cDocFN
' cDocFN = ActiveDocument ' Setting the open document as the active one. (redundant)?

' Sets the variable cRng to include the entire document's contents
' so it can be searched
Set cRng = ActiveDocument.Content

' removes any formatting from the "Find" command
cRng.Find.ClearFormatting

' loop to search for text starting with "RSN" and ending in a period.
With cRng.Find
.Forward = True
.Text = "RSN"
.MatchCase = True
.Wrap = wdFindStop
.Execute
Do While .Found
cRng.Collapse Word.WdCollapseDirection.wdCollapseEnd
cRng.Expand Unit:=wdParagraph ' use this line copy to a hard return
' cRng.MoveEndUntil Cset:=".", Count:=Word.wdForward ' use this to search _
for a specfic character
nDoc.Range.InsertAfter cRng.Text '& vbCrLf
nRng.Collapse Word.WdCollapseDirection.wdCollapseEnd
cRng.Collapse Word.WdCollapseDirection.wdCollapseEnd
.Execute
Loop

End With
' closing the open document before opening another one.
ActiveDocument.Close SaveChanges:=wdDoNotSaveChanges

files = Dir()
Loop

pathToPutSaveDoc = GetFolder & "\"
nDoc.SaveAs2 filename:=pathToPutSaveDoc & "RSN.docx"


End Sub