PDA

View Full Version : Words Statistic Makro



markos97
10-28-2010, 07:21 AM
Hello!

I want to create a word statistic. Someone on a polish forum help
me with this, but partially. In this moment "my" code output the following
results: words, frequency of occurence, and page number.


Option Explicit
Option Compare Text
Sub Test()

Dim lngRes As Long
lngRes = WordCountAndPages(ThisDocument)
Application.ScreenRefresh
MsgBox "There were " & CStr(lngRes) & " different words ", vbOKOnly, "Finished"
End Sub
Function WordCountAndPages(SourceDoc As Word.Document, _
Optional ByVal sExcludes = "[the][a][of][is][to][for][by][be][and][are]", _
Optional ByVal lmaxwords As Long = 50000) As Long

' "[ale][ani][aby][do][od][czy][za][ze][przed][po]" 'excludes for example for polish
On Error GoTo WordCountAndPages_Error

Dim NewDoc As Word.Document
Dim TmpRange As Word.Range
Dim aWord As Object
'----------------------------------------
Dim tmpName As String
Dim strSingleWord As String

Dim lngCurrentPage As Long
Dim lngPageCount As Long
Dim lngWordNum As Long 'Number of unique words
Dim lngttlwds As Long 'Total words in the document
Dim j As Long
Dim k As Long
Dim w As Long
Dim bTmpFound As Boolean 'Temporary flag

ReDim arrWordList(1 To 1) As String 'Array to hold unique words
ReDim arrWordCount(1 To 1) As Long 'Frequency counter for unique words
ReDim arrPageW(1 To 1) As String 'Pages unique words

lngCurrentPage = 1 'we started to count from a first page to next pages

With SourceDoc
If ActiveDocument.FullName <> .FullName Then SourceDoc.Activate ' because below selection
Set TmpRange = .Range
' document's page count (maybe must refresh)
lngPageCount = .Content.ComputeStatistics(wdStatisticPages) 'we counted number of pages
End With
'--------------
' The item in the Words collection includes both the word and the spaces after the word
' The Count property for this collection in a document returns the number of items in the main story only.
' Also, the Count property includes punctuation and paragraph marks in the total.
lngttlwds = TmpRange.Words.Count ' SourceDoc.Words.Count
'---------------------
System.Cursor = wdCursorWait

Do Until lngCurrentPage > lngPageCount
If lngCurrentPage = lngPageCount Then
TmpRange.End = SourceDoc.Range.End 'last page (there won't be a next page)
Else
'Find the beginning of the next page
'Must use the Selection object. The Range.Goto method will not work on a page
Selection.GoTo wdGoToPage, wdGoToAbsolute, lngCurrentPage + 1 'page, next page
'Set the end of the range to the point between the pages
TmpRange.End = Selection.Start
End If
'------------------------------------
For Each aWord In TmpRange.Words
' 160 is non breaking space

strSingleWord = LCase(Replace(Trim(aWord.Text), Chr(160), "")) 'lower character to upper character
'maybe remove lower case form code, like this: (Replace(Trim(aWord.Text), Chr(160), ""))
Select Case True
Case Len(strSingleWord) = 1 'number of characters in single word
' if =1 then ignore "a" "&" if = 2 ignore for example: "an" "of" "is" "to" "by" "be"
Case strSingleWord < "a" Or strSingleWord > "z" 'maybe remove this line form code
Case InStr(1, sExcludes, "[" & strSingleWord & "]", vbTextCompare)
Case Else
bTmpFound = False
For j = 1 To lngWordNum
If StrComp(arrWordList(j), strSingleWord, vbTextCompare) = 0 Then
arrWordCount(j) = arrWordCount(j) + 1
If (arrPageW(j) & "," Like "*," & CStr(lngCurrentPage) & ",*") = False Then
arrPageW(j) = arrPageW(j) & "," & CStr(lngCurrentPage)
End If
bTmpFound = True
Exit For
End If
Next j
If Not bTmpFound Then
lngWordNum = lngWordNum + 1
ReDim Preserve arrWordList(1 To lngWordNum)
ReDim Preserve arrWordCount(1 To lngWordNum)
ReDim Preserve arrPageW(1 To lngWordNum)
arrWordList(lngWordNum) = strSingleWord
arrWordCount(lngWordNum) = 1
arrPageW(lngWordNum) = arrPageW(lngWordNum) & "," & CStr(lngCurrentPage)
End If
If lngWordNum > lmaxwords - 1 Then
MsgBox "Too many words.", vbOKOnly
Exit For
End If
End Select
lngttlwds = lngttlwds - 1
StatusBar = "Remaining: " & lngttlwds & ", Unique: " & lngWordNum
Next aWord
'------------------------------------
lngCurrentPage = lngCurrentPage + 1 'move to the next page
TmpRange.Collapse wdCollapseEnd 'go to the next page
Loop
'------------------------------------
If lngWordNum > 0 Then
tmpName = SourceDoc.AttachedTemplate.FullName 'output results
Set NewDoc = Application.Documents.Add(Template:=tmpName, NewTemplate:=False)

Selection.ParagraphFormat.TabStops.ClearAll
Application.ScreenUpdating = False
With Selection
For j = 1 To lngWordNum
.TypeText Text:=arrWordList(j) & vbTab & CStr(arrWordCount(j)) & vbTab & Mid(arrPageW(j), 2) & vbNewLine
Next j
End With

Set TmpRange = NewDoc.Range
TmpRange.ConvertToTable Separator:=wdSeparateByTabs
With NewDoc.Tables(1)
.Sort ExcludeHeader:=False, _
FieldNumber:="Kolumna 1", SortFieldType:=wdSortFieldAlphanumeric, SortOrder:=wdSortOrderAscending, _
FieldNumber2:="", _
FieldNumber3:="", _
CaseSensitive:=False, LanguageID:=wdPolish, IgnoreDiacritics:=False
.Borders(wdBorderTop).LineStyle = wdLineStyleNone
.Borders(wdBorderLeft).LineStyle = wdLineStyleNone
.Borders(wdBorderBottom).LineStyle = wdLineStyleNone
.Borders(wdBorderRight).LineStyle = wdLineStyleNone
.Borders(wdBorderHorizontal).LineStyle = wdLineStyleNone
.Borders(wdBorderVertical).LineStyle = wdLineStyleNone
.Borders(wdBorderDiagonalDown).LineStyle = wdLineStyleNone
.Borders(wdBorderDiagonalUp).LineStyle = wdLineStyleNone
End With
End If
WordCountAndPages = lngWordNum
'---------------------
WordCountAndPages_Exit:
On Error Resume Next
Set aWord = Nothing
Set TmpRange = Nothing
Set NewDoc = Nothing
System.Cursor = wdCursorNormal
Application.ScreenUpdating = True

Exit Function

WordCountAndPages_Error:
MsgBox "Błąd : ( " & Err.Number & " ) " & Err.Description & vbCrLf & _
"Procedura : " & "WordCountAndPages", vbExclamation
Resume WordCountAndPages_Exit
End Function I also need another column with page numbers, where the page number looks like that:
1-3, 5, 6-8. Now the page numbers looks like this: 1,2,3,5,6,7,8. When the
page numbers are consecutive (sequential), beetwen a page numbers I need to
add a hyphen (dashes).

In another column I also need a number of page numbers. For example, when a
some word appear on pages 1,3,5,7, in output results in another column will
be 4.
And the last case: my code search all words in document. I need also search for the keywords.

I have also a some problems with some lines of my code:
strSingleWord < "a" Or strSingleWord > "z" 'fist character of word must
be a letter, this line ignore a special word, like "3w" "4you"
"3ware" "3love" "2day" , and so on...

I have also problem with some words, like: "@SCHL" "m&m", and so
on..., something line of code ignore this words.

I hope so, someone help.

Thanks for your attention!
Regards

macropod
10-28-2010, 03:56 PM
Hi markos,

Try something along the lines of:
Sub Pages()
Dim oCel As Cell, oRng As Range, StrTmp As String, ArrTmp(), i As Integer, j As Integer, k As Integer
With ActiveDocument.Tables(1).Columns.Last
For Each oCel In .Cells
Set oRng = oCel.Range
oRng.End = oRng.End - 1
StrTmp = oRng.Text
ReDim ArrTmp(UBound(Split(StrTmp, ",")))
For i = 0 To UBound(Split(StrTmp, ","))
ArrTmp(i) = Split(StrTmp, ",")(i)
Next
For i = 0 To UBound(ArrTmp) - 2
If IsNumeric(ArrTmp(i)) Then
k = 2
For j = i + 2 To UBound(ArrTmp)
If CInt(ArrTmp(i) + k) <> CInt(ArrTmp(j)) Then Exit For
ArrTmp(j - 1) = ""
k = k + 1
Next
i = j
End If
Next
oRng.Text = Replace(Replace(Replace(Replace(Join(ArrTmp, ","), ",,", " "), " ,", " "), " ", " "), " ", "-")
Next
End With
Set oRng = Nothing
End Sub

fumei
10-29-2010, 10:24 AM
Replace(Replace(Replace(Replace( _
Join(ArrTmp, ","), ",,", " "), " ,", " "), " ", " "), " ", "-")


Gotta love it.

markos97
10-29-2010, 10:50 AM
Once more thanks macropod for your repley, and thanks for fumei too

But I can't run your code. I must add your code to mine, I don't know. But I don't know how.

This thread is a cross thread.

But If I want to add a link in a forum, I get to: To be able to post links your post count must be 5 or greater... sorry.

macropod
10-29-2010, 03:40 PM
Hi markos,

In your code you have a sub named 'Test'. Insert the sub I gave you immediately after this sub, then insert a new line into the 'Test' sub, so that you end up with:
Sub Test()
Dim lngRes As Long
lngRes = WordCountAndPages(ThisDocument)
Call Pages ' Modify the page range
Application.ScreenRefresh
MsgBox "There were " & CStr(lngRes) & " different words ", vbOKOnly, "Finished"
End Sub

Sub Pages()
'... include the rest of the 'Pages' sub code here
End Sub

Function WordCountAndPages(SourceDoc As Word.Document,
'... etc

macropod
10-29-2010, 03:46 PM
Replace(Replace(Replace(Replace( _
Join(ArrTmp, ","), ",,", " "), " ,", " "), " ", " "), " ", "-")


Gotta love it.
Hi Gerry,

Yes, it's not pretty, but it does the job.

fumei
10-29-2010, 04:30 PM
It is that Field knowledge/background of yours. All those encapsulated brackets. Gives me a headache. But, very very cute.

I am ALL in favour of things doing the job, regardless of prettiness. Although of course we all prefer things to be elegant. Sometimes it does not work that way. Life can be ugly.

<grin>

markos97
10-30-2010, 04:50 AM
Ok, I run your macro. Works, but not so good. When a some words appears on minimum 3 consecutive (sequential) pages, ok good, beetween the numbers of pages, code insert a hyphen, like 1-3, or 6-8. But when a some words appears only on 2 consecutive pages, we get a commas, like 1,2, 4,5, but we also need 1-2, 4-5.

And what the rest my problem, any ideas?

P.S.
Macropod, can you add the link of my thread on social.answears.microsoft, I can’t, because, I havent 5 post yet.

macropod
10-31-2010, 12:05 AM
Hi markos,

The retention of commas when the range only spanned two pages was a design decision. Since it's now apparent that's not what you want, try this version of the 'Pages' sub:
Sub Pages()
Dim oCel As Cell, oRng As Range, StrTmp As String, ArrTmp()
Dim i As Integer, j As Integer, k As Integer, l As Integer
With ActiveDocument.Tables(1).Columns.Last
For Each oCel In .Cells
Set oRng = oCel.Range
oRng.End = oRng.End - 1
StrTmp = oRng.Text
l = UBound(Split(StrTmp, ","))
ReDim ArrTmp(l)
For i = 0 To l
ArrTmp(i) = Split(StrTmp, ",")(i)
Next
For i = 0 To l - 1
If IsNumeric(ArrTmp(i)) Then
k = 2
For j = i + 2 To l
If CInt(ArrTmp(i) + k) <> CInt(ArrTmp(j)) Then Exit For
ArrTmp(j - 1) = ""
k = k + 1
Next
If CInt(ArrTmp(i) + 1) = ArrTmp(i + 1) Then ArrTmp(i) = ArrTmp(i) & ","
i = j - 1
End If
Next
oRng.Text = Join(ArrTmp, ",")
With oRng.Find
.MatchWildcards = True
.ClearFormatting
.Replacement.ClearFormatting
.Wrap = wdFindStop
.Text = "[,]{2,}"
.Replacement.Text = "-"
.Execute Replace:=wdReplaceAll
End With
Next
End With
Set oCel = Nothing: Set oRng = Nothing
End Sub
The Find/Replace in the code is to obviate the tortuous nested replace expressions Gerry gagged on ...

Your cross-posts are at: http://social.answers.microsoft.com/Forums/en-US/officeprog/thread/212521f8-183d-4825-9de1-0ddc4b50ef7c

markos97
11-02-2010, 06:47 AM
[…] was a design decision A ha … ok.
I thought I make myself clear, when I describing my problem. I hope so I don’t give you an incorrect directions.


The Find/Replace in the code is to obviate the tortuous nested replace expressions Gerry gagged on ...
I have a problem with Find/Replace. When I try to run new version of your code, firstly I get an error:
Run-time error '5941': The requested member of the collection does not exist.

But I think, that before running this macro for the first time, it is necessary to open Word's 'Find' box, enter "[,]{2,}" as the find text, check the 'Use wildcards' option, and perform a single search to get these options into Word's memory. I do it (I hope so correctly), but then I get the next error:

Run-time error '5560': The Find What text contains a Pattern Match expression which is not valid.

Maybe I need a polish match expression of this line:
.Text = “[,]{2,}”
What did you think?

macropod
11-02-2010, 02:20 PM
Hi Markos,

For the Polish language, you could probably get it to work by changing:
.Text = "[,]{2,}"
to:
.Text = "[,]{2;}"

Alternatively, for code that's not affected by languange considerations, you could replace everything between the same two Next ... Next expressions with:
StrTmp = Join(ArrTmp, ",")
Do While InStr(StrTmp, ",,,") > 0
StrTmp = Replace(StrTmp, ",,,", ",,")
Loop
oRng.Text = Replace(StrTmp, ",,", "-")

I would also suggest changing the line:
i = j - 1
to:
i = j - 2
That way, if you change your mind about having 1-2,4-5 and decide you want to have 1,2,4,5 after all, all you need to do is comment out or delete the line:
If CInt(ArrTmp(i) + 1) = ArrTmp(i + 1) Then ArrTmp(i) = ArrTmp(i) & ","

markos97
11-03-2010, 08:41 AM
Hi everybody!
Now the code looks better, again, thanks marcopod for you support.


you could replace everything between the same two Next ... Next expressions with I try it, but incorrectly, maybe show me in which place, and what I have to do, like below:


Sub Test()
' rest of the 'Test' sub code here
End Sub

Sub Pages()
'the rest of the 'Pages' sub code here
'...
StrTmp = oRng.Text ‘e.g this line change to...
'in this place add...
'and so on
End Sub

Function WordCountAndPages(SourceDoc As Word.Document,
'...etc


[...]if you change your mind about having 1-2,4-5 and decide you want to have 1,2,4,5[...]
But basicly, right from the beginning, in one column I need the page numbers only with a commas (whatever happend, nothing else metters, we coudn’t care less the numbers are consecutive and/or not consecutive), and in the next column I need the page numbers with a hypens (when the page numbers are consecutive) or with a commas (when the page numbers are not consecutive).

Compare with my thread on a social.answers.microsoft...

macropod
11-03-2010, 02:41 PM
Hi Markos,

The 'Pages' sub simply becomes:
Sub Pages()
Dim oCel As Cell, oRng As Range, StrTmp As String, ArrTmp()
Dim i As Integer, j As Integer, k As Integer, l As Integer
With ActiveDocument.Tables(1).Columns.Last
For Each oCel In .Cells
Set oRng = oCel.Range
oRng.End = oRng.End - 1
StrTmp = oRng.Text
l = UBound(Split(StrTmp, ","))
ReDim ArrTmp(l)
For i = 0 To l
ArrTmp(i) = Split(StrTmp, ",")(i)
Next
For i = 0 To l - 1
If IsNumeric(ArrTmp(i)) Then
k = 2
For j = i + 2 To l
If CInt(ArrTmp(i) + k) <> CInt(ArrTmp(j)) Then Exit For
ArrTmp(j - 1) = ""
k = k + 1
Next
If CInt(ArrTmp(i) + 1) = ArrTmp(i + 1) Then ArrTmp(i) = ArrTmp(i) & ","
i = j - 2
End If
Next
StrTmp = Join(ArrTmp, ",")
Do While InStr(StrTmp, ",,,") > 0
StrTmp = Replace(StrTmp, ",,,", ",,")
Loop
oRng.Text = Replace(StrTmp, ",,", "-")
Next
End With
Set oCel = Nothing: Set oRng = Nothing
End Sub
With this code, if your page numbers start out as 1,2,4,5,6,8,10,11,13,14,15,16,17,19 you will end up with 1-2,4-6,8,10-11,13-17,19. However, if you comment out or delete the line:
If CInt(ArrTmp(i) + 1) = ArrTmp(i + 1) Then ArrTmp(i) = ArrTmp(i) & ","
you will end up with 1,2,4-6,8,10,11,13-17,19.

markos97
11-05-2010, 07:04 AM
Yeah, this is it! Thank you macropod.
Starting today, for a period of time, I will be written only on a social.forum.microsoft, we will see how Andreas code work (now I have some trouble with his code). I think, that Andreas solution is more effective (I mean store the result immediately in Excel, and very easiest in opposite to "my" very long code). But once again, thanks for you.
If something goes wrong, I call you back.
See you later. Maybe you visit once again a social.forum.
Regards

macropod
11-08-2010, 02:45 AM
Hi markos,
Re:
In another column I also need a number of page numbers. For example, when a some word appear on pages 1,3,5,7, in output results in another column will be 4.
You can get that by changing the following line in your original WordCountAndPages function from:

.TypeText Text:=arrWordList(j) & vbTab & CStr(arrWordCount(j)) & vbTab & Mid(arrPageW(j), 2) & vbNewLine
to:

.TypeText Text:=arrWordList(j) & vbTab & CStr(arrWordCount(j)) & vbTab & _
UBound(Split(Mid(arrPageW(j), 2), ",")) & vbTab & Mid(arrPageW(j), 2) & vbNewLine
With this simple change you will gate a 4-column table with the count in column 3.
As for:

And the last case: my code search all words in document. I need also search for the keywords.
You would need to supply the list of keywords - and you'd need to say what has to be done with the search results.

markos97
11-12-2010, 08:52 AM
Hi, and thanks again for your help.


You can get that by changing the following line in your original WordCountAndPages function from […] to […].

Ok, I change it, but... something's wrong in results. Wacht out: in document we have e.g. following word "cat" which appear on page number 1, and page number 3, and on page number 2 we have a "dog".
And in column 3 in results we get only "1" number of pages, but the "cat" appears on 2 pages (page nr 1, and 3)! In turn, in column 3 for a dog we get 0 (zero), but the dog appear on page number 2 and will be 1 (appear on one page)!

And the next problem, next example: If "cat" appear on pages which are consecutive 1,2,3, in results we get "2" while the "cat" appear on 3 pages (1,2,3), not 2 pages! I think it happens, because in results on column 4 we get page numbers with a hypens 1-3. Something is wrong.

11-02-2010, 07:20 PM, macropod wrote:
That way, if you change your mind about having 1-2,4-5 and decide you want to have 1,2,4,5 after all, all you need to do is comment out or delete the line:
If CInt(ArrTmp(i) + 1) = ArrTmp(i + 1) Then ArrTmp(i) = ArrTmp(i) & ","

11-03-2010, 07:41 PM, macropod wrote, again:

With this code, if your page numbers start out as 1,2,4,5,6,8,10,11,13,14,15,16,17,19 you will end up with 1-2,4-6,8,10-11,13-17,19. However, if you comment out or delete the line:
If CInt(ArrTmp(i) + 1) = ArrTmp(i + 1) Then ArrTmp(i) = ArrTmp(i) & ","
you will end up with 1,2,4-6,8,10,11,13-17,19. [1,2,4-6,8,10,11,13-17,19, what? You probably mistake!?, do you mean, if I comment out or delete above line I have to get: 1,2,4,5,6,8,10,11,13,14,15,16,17,19 (only page numbers with commas, not with a hypens ...4-6...13-17..., but never mind]

Macropod, I need in the end as 5-column (maybe I remember you shortly, once more: Word, FQ, Commas page numbers, Commas or/and hyphens page numbers, and the Number of page numbers).
Comment out or delete the line:

If CInt(ArrTmp(i) + 1) = ArrTmp(i + 1) Then ArrTmp(i) = ArrTmp(i) & ","

is good idea to get the page numbers only with a commas, but I need also the another column to a page numbers with a hyphens or/and commas.

Maybe to do it, we need something like call statement, like:



Sub Test()
Dim lngRes As Long
lngRes = WordCountAndPages(ThisDocument)
Call Pages1 ' Modify the page range
Call Pages2
Application.ScreenRefresh
MsgBox "There were " & CStr(lngRes) & " different words ", vbOKOnly, "Finished"
End Sub

Sub Pages1 () 'page numbers with comma and/or hyphens
'the rest of code
If CInt(ArrTmp(i) + 1) = ArrTmp(i + 1) Then ArrTmp(i) = ArrTmp(i) & ","
'the rest of code
End Sub

Sub Pages2 () 'page numbers with a comma
'the rest of code
k = k + 1
Next
'If CInt(ArrTmp(i) + 1) = ArrTmp(i + 1) Then ArrTmp(i) = ArrTmp(i) & ","
'above line I comment out, that how you advise me, to get page numbers only with comma
i = j - 2
End If
'the rest of code
End Sub

Function WordCountAndPages(SourceDoc As Word.Document,
'... etc


You would need to supply the list of keywords

Ok, something like: array = ("dog", "cat", "bird", and so on)
maybe compare with below link
http://microsoft-personal-applications.hostweb.com/TopicMessages/microsoft.public.word.vba.beginners/2015763/3/Default.aspx


and you'd need to say what has to be done with the search results.

I’m not understand, what to be done with the search results? Simply, store them in column 1 in results document, like now my code do it. Oh, I think I know what you mean. Macropod, I don’t know how to resolve this problem from a technical side. But I think that one code, will be search the all words or only the keywords (if user comment out some line of code, then will be search for all words, or only a keywords), or maybe we need two versions of code: one code search for all words in document, and second code search only for a keywords, and the user decide which code to use, which code to need, I don’t know which solution is better, only the programers like you, know that.

Sorry, but I think that I waste your time macropod. I think intensively about Andreas solution on social.answears.microsoft. I think, that this solution is a better, becasue, the user can make to sort the results immediately in excel (in word, we can’t sort the results). Using "my" code, we have to copy, and paste the results to excel, and in the end sort them, if we want. I’m intresting your solution, but maybe help also on social.answears? Andreas help me, but he don’t close my case. I think that I know how to get the number of page numbers in Andreas solution, but to do this I need someone help to get in Andreas solution column results (column "D") with page numbers only with a commas.

Thanks for attention, and sorry for my long describe

Regards

macropod
11-12-2010, 03:57 PM
Hi Markos,


in document we have e.g. following word "cat" which appear on page number 1, and page number 3, and on page number 2 we have a "dog".
And in column 3 in results we get only "1" number of pages, but the "cat" appears on 2 pages (page nr 1, and 3)! In turn, in column 3 for a dog we get 0 (zero), but the dog appear on page number 2 and will be 1 (appear on one page)!

And the next problem, next example: If "cat" appear on pages which are consecutive 1,2,3, in results we get "2" while the "cat" appear on 3 pages (1,2,3), not 2 pages! I think it happens, because in results on column 4 we get page numbers with a hypens 1-3. Something is wrong.
In the code I gave you in my last post, change:
UBound(Split(Mid(arrPageW(j), 2), ","))
to:
UBound(Split(Mid(arrPageW(j), 2), ",")) + 1
and everything should be OK.

or maybe we need two versions of code: one code search for all words in document, and second code search only for a keywords, and the user decide which code to use, which code to need, I don’t know which solution is better
Yes, you will need two versions of the code but, as to which is better, that depends on the user's needs!

I think intensively about Andreas solution on social.answears.microsoft. I think, that this solution is a better, becasue, the user can make to sort the results immediately in excel (in word, we can’t sort the results).Word can sort tables (which is what the code produces) quite easily!

markos97
11-13-2010, 06:45 AM
Yeah! That’s right. Now works good. In your previous post, you omitted +1 in below line of code, but now, it’s okay. Pretty good.


UBound(Split(Mid(arrPageW(j), 2), ",")) + 1



you will need two versions of the code
Maybye you know how to do it?

Macropod, I think we have the most of my problem. But still we haven’t a column with a page numbers only with a commas. Is it possible to get them in another column?

Once more thanks for your support. I’m impressed for your knowledge, awsome!
Regards

macropod
11-13-2010, 02:27 PM
Hi markos,

Maybye you know how to do it?
Yes, but VBA Express isn't simply a free coding service. We expect users to make an effort to do at least some of their own coding. We're here to help when you get stuck. In any event, there are plenty of examples of code for this available on the web - you just need to do some searching.

I think we have the most of my problem. But still we haven’t a column with a page numbers only with a commas. Is it possible to get them in another column?I don't understand what you're saying. What you should now be getting is the table with the word list, word count, page count and page ranges. What else do you want?

markos97
11-24-2010, 09:47 AM
Hi everyone!
Yes, but VBA Express isn't simply a free coding service. We expect users to make an effort[...] Ok, understand. But from the beginning, in any kind of forum, I know about it. Every time, I try oneself to find a solution, or, if I’m able to, I try to do it, oneself. But many things is to difficult. I’m only the technical user, and from time to time, I do something oneself.Come back
to my problem, now I have the most what I need. In results, I get a 5 column. I
add using a call statement, new column with page numbers separated only with a
comma. Also added in results a bolded column headers, I do it based on below
link: http://www.avdf.com/aug98/art_vb006.html To stay headers in top of table results, I change False property to True in line, which sorted the header results.

Sub Test()
'beginning of code
Call Pages1
Call Pages2
'end of code
End Sub
Sub Pages1()
'rest of code
End Sub
Sub Pages2()
'beginning of code
'If CInt(ArrTmp(i) + 1) = ArrTmp(i + 1) Then ArrTmp(i) = ArrTmp(i) & "," 'this line comment out
'end of code
End Sub
Function WordCountAndPages(SourceDoc As Word.Document, _
'beginning of code, no changes, no added
Selection.ParagraphFormat.TabStops.ClearAll
Application.ScreenUpdating = False
With Selection 'below add bolded column headers
Selection.Font.Bold = wdToggle
Selection.TypeText Text:="Words" & vbTab & "Fq" & vbTab & "NrOfPageNumbers" & vbTab & "CPageNumbers" & vbTab & "H/C/HCPageNumbers" & vbCrLf
Selection.Font.Bold = wdToggle
For j = 1 To lngWordNum
.TypeText Text:=arrWordList(j) & vbTab & CStr(arrWordCount(j)) & vbTab & _
UBound(Split(Mid(arrPageW(j), 2), ",")) + 1 & vbTab & Mid(arrPageW(j), 2) & vbTab & Mid(arrPageW(j), 2) & vbNewLine ‘add Mid(arrPageW(j), 2)
'the following code
.Sort ExcludeHeader:=True 'False change to True
'the end of code function I the end, I think also how to store the result immediately in excel (familiar as Andreas Killer code to do it). Or maybe simpliest, how to copy the results ang paste to excel, and in the end close the results Word document. I’m also intresting how will be looks the code indexing only a keywords.

Thanks for your attention, and your time.

Regards

macropod
11-24-2010, 05:49 PM
Hi Markos,

If you want to have the page numbers separated by commas, as well as having them with the hyphens to span multi-page ranges, as well as adding a heading row to the table, replace the whole:
With Selection
For j = 1 To lngWordNum
...
Next j
End With
block in the 'WordCountAndPages' function with:
With Selection
.Font.Bold = True
.TypeText Text:="Words" & vbTab & "Fq" & vbTab & "NrOfPageNumbers" & _
vbTab & "CPageNumbers" & vbTab & "H/C/HCPageNumbers" & vbCrLf
.Font.Bold = False
For j = 1 To lngWordNum
.TypeText Text:=arrWordList(j) & vbTab & CStr(arrWordCount(j)) _
& vbTab & UBound(Split(Mid(arrPageW(j), 2), ",")) + 1 & vbTab _
& Mid(arrPageW(j), 2) & vbTab & Mid(arrPageW(j), 2) & vbNewLine
Next j
.HomeKey Unit:=wdStory, Extend:=wdMove
End With
(the '.HomeKey Unit:=wdStory, Extend:=wdMove' is a bonus - it takes you back to the top of the document).

I would also recommend deleting the ''Application.ScreenUpdating = False' line from just above the 'With Selection' line and putting the screen updating code into the 'Build_Concordance' sub (see below).

As you've noted, you also need to change:
.Sort ExcludeHeader:=False
after:
.Sort ExcludeHeader:=True
in the 'WordCountAndPages' function.

You may, of course, end up with comma-separated rows spanning multiple lines. To fix that, you might try inserting:
.AllowAutoFit = True
after:
With NewDoc.Tables(1)
in the 'WordCountAndPages' function. Be aware, though, that this might make the right-hand side of the table disappear from the page if there are many page numbers.

Without making any other changes, the comma-separated pages list will be in the second-last column of the table and the hyphenated list will be in the last column. If you want to swap these two columns around, change:
With ActiveDocument.Tables(1).Columns.Last
in the 'Pages' sub to:
With ActiveDocument.Tables(1).Columns.Last.Previous

Finally, I'd suggest replacing the 'Build_Concordance' sub with:
Sub Build_Concordance()
Application.ScreenUpdating = False
Dim lngRes As Long
lngRes = WordCountAndPages(ThisDocument)
Call Pages ' Modify the page ranges in the output table
Application.ScreenRefresh
Application.ScreenUpdating = True
MsgBox "There were " & CStr(lngRes) & " different words ", vbOKOnly, "Finished"
End Sub

I'm not sure what your reference to Excel is about. Are you wanting to copy the table to Excel? If that's just to do some more processing, it's probably unnecessary as Word vba can do most of what you can do in Excel and, even where it can't, there are ways to around it. Perhaps you could explain what you're trying to do.

macropod
11-24-2010, 08:02 PM
Hi Markos,

To add a table of the Keyword statistics to the output, try the following macro:
Sub Keywords()
'This sub extracts a table of the document's keywords.
Dim oCel As Cell, oRng As Range, strWords As String, i As Integer, bKeep As Boolean
'Insert comma-separated keywords on the following strWords lines. Add/delete lines as necessary.
'Note: there is no comma after the last word in the list.
strWords = strWords & "analyze,assess,assign,collaborate,consult,create,deliver,demonstrate,"
strWords = strWords & "develop,educate,enhance,escalate,establish,evaluate,facilitate,format,"
strWords = strWords & "generate,impact,implement,increase,integrate,justify,maintain,monitor,"
strWords = strWords & "negotiate,prepare,present,process,program,prospect,provide,report,research,"
strWords = strWords & "research,retain,review,revitalize,secure,support,troubleshoot,verify"
With ActiveDocument
.Tables(1).Range.Copy
.Range.InsertAfter vbCrLf
Set oRng = .Paragraphs.Last.Range
oRng.Paste
With .Tables(2).Columns.First
.Cells(1).Range.Text = "Keywords"
For Each oCel In .Cells
If oCel.RowIndex > 1 Then
bKeep = False
For i = 0 To UBound(Split(strWords, ","))
Set oRng = oCel.Range
With oRng.Find
.MatchCase = False
.MatchWholeWord = True
.MatchAllWordForms = True
.Text = Split(strWords, ",")(i)
.Execute
If .Found Then
bKeep = True
Exit For
End If
End With
Next
If bKeep = False Then oCel.Row.Delete
End If
Next
End With
End With
Set oCel = Nothing: Set oRng = Nothing
End Sub
which you would call from the 'Build_Concordance' sub by adding the line:
Call KeyWords
after the line:
Call Pages

This will make a table below the original one, with everything except the keyword entries deleted.

You will have to use your own keywords, of course.

As coded, the macro finds all word forms matching a word in the keyword list (you can't do that in Excel). If that gives the wrong results, you'll need to change the 'True' to 'False' in the '.MatchAllWordForms = True' line. And then you'll need to include in your keywords list all the possessive, plural, gender and tense forms of the keywords you're interested in.