1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
122:
123:
124:
125:
126:
127:
128:
129:
130:
131:
132:
133:
134:
135:
136:
137:
138:
139:
140:
141:
142:
143:
144:
145:
146:
147:
148:
149:
150:
151:
152:
153:
154:
155:
156:
157:
158:
159:
160:
161:
162:
163:
164:
165:
166:
167:
168:
169:
170:
171:
172:
173:
174:
|
Option Explicit
'written by Neil Fleming
Sub MineText()
Dim wb As Workbook, wsRaw As Worksheet, wsResults As Worksheet, wsIgnore As Worksheet
Dim rSource As Range, rRow As Range, rResults As Range, rMatch As Range
Dim allText As String
Dim unWanted As String
Dim Words() As String
Dim aWord As Integer, aRow As Integer, Nearby As Integer, maxCol As Integer
On Error GoTo errortrap
'set up worksheets
Set wb = ActiveWorkbook
Set wsRaw = wb.Sheets("raw")
Set wsResults = wb.Sheets("results")
Set wsIgnore = wb.Sheets("ignore")
'set rSource to non-blank rows
Set rSource = Range(wsRaw.Range("A5"), wsRaw.Range("a5").End(xlDown))
allText = ""
For Each rRow In rSource.Rows
allText = allText & " " & rRow.Cells(1, 1).Value
Next
'add trailing space:
allText = allText & " "
'uppercase:
allText = UCase(allText)
'take out punctuation:
allText = Replace(allText, ",", " ")
allText = Replace(allText, ";", " ")
allText = Replace(allText, ".", " ")
allText = Replace(allText, " - ", " ")
'remove ignored words, using list from "ignore" worksheet
For aWord = 2 To wsIgnore.Cells(2, 1).End(xlDown).Row
unWanted = wsIgnore.Cells(aWord, 1).Value
While InStr(allText, " " & unWanted & " ") > 0
allText = Replace(allText, " " & unWanted & " ", " ")
Wend
Next
'take out double spaces:
While InStr(allText, " ") > 0
allText = Replace(allText, " ", " ")
Wend
'remove leading and trailing spaces:
allText = Trim(allText)
'convert AllText to array of words:
Words = Split(allText, " ")
'LAY OUT RESULTS:
'blank results:
With wsResults
Application.ScreenUpdating = False
.Cells.ClearContents
.Cells.ClearFormats
.Range("a1", "dd1000").Interior.Color = RGB(255, 255, 255)
.Activate
.Cells(1, 1) = "Count"
.Cells(1, 2) = "Word"
'insert unique word results in results sheet:
aRow = 2
maxCol = 1
For aWord = 0 To UBound(Words)
'redefine results range:
Set rResults = .Range(wsResults.Range("b2"), .Range("b2").End(xlDown))
Set rMatch = rResults.Find(Words(aWord))
'if word in list, increment count:
If Not rMatch Is Nothing Then
rMatch.Offset(0, -1) = rMatch.Offset(0, -1) + 1
'otherwise add to list:
Else
Set rMatch = wsResults.Cells(aRow, 2)
rMatch.Value = Words(aWord)
rMatch.Offset(0, -1).Value = 1
aRow = aRow + 1
End If
'examine nearby words, count occurrences and add to right of main word:
'number of words examined is set on "ignore" sheet in cell named "ptrMaxWords":
For Nearby = 1 To Range("ptrMaxWords").Value
'go backwards:
If aWord - Nearby >= 0 Then AddNearby Words(aWord - Nearby), .Cells(rMatch.Row, 1), maxCol
'go forwards:
If aWord + Nearby <= UBound(Words) Then AddNearby Words(aWord + Nearby), .Cells(rMatch.Row, 1), maxCol
Next Nearby
Next aWord
'adjust column widths
.Columns.AutoFit
'sort rows using CustomSort sub below:
For Each rRow In rResults
CustomSort rRow
Next rRow
'sort columns
Set rResults = .Range("a1", .Cells(rResults.Rows.Count + 2, maxCol))
rResults.Sort .Range("a1", rResults.Range("a1").End(xlDown)), xlDescending, , , , , , xlYes, , , xlSortColumns, xlPinYin
With rResults.Borders
.LineStyle = xlContinuous
.Weight = xlThin
.Color = RGB(255, 140, 0)
End With
.Range("A:B").Font.Bold = True
End With
errortrap:
Application.ScreenUpdating = True
End Sub
Sub AddNearby(aString As String, ByRef aRange As Range, maxCol As Integer)
Dim rcol As Range
Set rcol = aRange.EntireRow.Find(aString)
If Not rcol Is Nothing Then
'increase count:
rcol.Offset(0, -1) = rcol.Offset(0, -1) + 1
Else
'add word
Set rcol = aRange.End(xlToRight).Offset(0, 1)
rcol.Value = 1
rcol.Offset(0, 1) = aString
End If
'adjust value of the rightmost altered column:
If rcol.Column + 1 > maxCol Then maxCol = rcol.Column + 1 Else maxCol = maxCol
End Sub
Sub CustomSort(aRange As Range)
'sorts pairs of cells along row based on the value of the first cell in the pair:
Dim StartCol As Integer, swapCol As Integer, swapValue As Integer
Dim swapText As String
StartCol = 2
Do
swapCol = StartCol + 2
Do
If aRange.Cells(1, swapCol) > aRange.Cells(1, StartCol) Then
'swap cells and next cell along (containing word) with each other:
swapValue = aRange.Cells(1, swapCol)
swapText = aRange.Cells(1, swapCol + 1)
aRange.Cells(1, swapCol) = aRange.Cells(1, StartCol)
aRange.Cells(1, swapCol + 1) = aRange.Cells(1, StartCol + 1)
aRange.Cells(1, StartCol) = swapValue
aRange.Cells(1, StartCol + 1) = swapText
End If
swapCol = swapCol + 2
Loop Until aRange.Cells(1, swapCol) = ""
StartCol = StartCol + 2
Loop Until aRange.Cells(1, StartCol) = ""
End Sub
|