Skip to content

Commit f7620eb

Browse files
authored
v1.3 compat (#8)
v1.3 compat fixed lexicon
1 parent 443615f commit f7620eb

File tree

5 files changed

+223
-17
lines changed

5 files changed

+223
-17
lines changed

.DS_Store

6 KB
Binary file not shown.

Manifest.toml

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
# This file is machine-generated - editing it directly is not advised
2+
3+
[[Arpack]]
4+
deps = ["Arpack_jll", "Libdl", "LinearAlgebra"]
5+
git-tree-sha1 = "2ff92b71ba1747c5fdd541f8fc87736d82f40ec9"
6+
uuid = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97"
7+
version = "0.4.0"
8+
9+
[[Arpack_jll]]
10+
deps = ["Libdl", "OpenBLAS_jll", "Pkg"]
11+
git-tree-sha1 = "68a90a692ddc0eb72d69a6993ca26e2a923bf195"
12+
uuid = "68821587-b530-5797-8361-c406ea357684"
13+
version = "3.5.0+2"
14+
15+
[[Base64]]
16+
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
17+
18+
[[BinaryProvider]]
19+
deps = ["Libdl", "SHA"]
20+
git-tree-sha1 = "5b08ed6036d9d3f0ee6369410b830f8873d4024c"
21+
uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
22+
version = "0.5.8"
23+
24+
[[DataAPI]]
25+
git-tree-sha1 = "674b67f344687a88310213ddfa8a2b3c76cc4252"
26+
uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
27+
version = "1.1.0"
28+
29+
[[DataStructures]]
30+
deps = ["InteractiveUtils", "OrderedCollections"]
31+
git-tree-sha1 = "5a431d46abf2ef2a4d5d00bd0ae61f651cf854c8"
32+
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
33+
version = "0.17.10"
34+
35+
[[Dates]]
36+
deps = ["Printf"]
37+
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
38+
39+
[[Distributed]]
40+
deps = ["Random", "Serialization", "Sockets"]
41+
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
42+
43+
[[Distributions]]
44+
deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns"]
45+
git-tree-sha1 = "6b19601c0e98de3a8964ed33ad73e130c7165b1d"
46+
uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
47+
version = "0.22.4"
48+
49+
[[FillArrays]]
50+
deps = ["LinearAlgebra", "Random", "SparseArrays"]
51+
git-tree-sha1 = "85c6b57e2680fa28d5c8adc798967377646fbf66"
52+
uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
53+
version = "0.8.5"
54+
55+
[[InteractiveUtils]]
56+
deps = ["Markdown"]
57+
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
58+
59+
[[LibGit2]]
60+
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
61+
62+
[[Libdl]]
63+
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
64+
65+
[[LinearAlgebra]]
66+
deps = ["Libdl"]
67+
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
68+
69+
[[Logging]]
70+
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
71+
72+
[[Markdown]]
73+
deps = ["Base64"]
74+
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
75+
76+
[[Missings]]
77+
deps = ["DataAPI"]
78+
git-tree-sha1 = "de0a5ce9e5289f27df672ffabef4d1e5861247d5"
79+
uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
80+
version = "0.4.3"
81+
82+
[[OpenBLAS_jll]]
83+
deps = ["Libdl", "Pkg"]
84+
git-tree-sha1 = "e2551d7c25d52f35b76d86a50917a3ba8988f519"
85+
uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
86+
version = "0.3.7+5"
87+
88+
[[OpenSpecFun_jll]]
89+
deps = ["Libdl", "Pkg"]
90+
git-tree-sha1 = "65f672edebf3f4e613ddf37db9dcbd7a407e5e90"
91+
uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
92+
version = "0.5.3+1"
93+
94+
[[OrderedCollections]]
95+
deps = ["Random", "Serialization", "Test"]
96+
git-tree-sha1 = "c4c13474d23c60d20a67b217f1d7f22a40edf8f1"
97+
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
98+
version = "1.1.0"
99+
100+
[[PDMats]]
101+
deps = ["Arpack", "LinearAlgebra", "SparseArrays", "SuiteSparse", "Test"]
102+
git-tree-sha1 = "5f303510529486bb02ac4d70da8295da38302194"
103+
uuid = "90014a1f-27ba-587c-ab20-58faa44d9150"
104+
version = "0.9.11"
105+
106+
[[Pkg]]
107+
deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Test", "UUIDs"]
108+
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
109+
110+
[[Printf]]
111+
deps = ["Unicode"]
112+
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
113+
114+
[[QuadGK]]
115+
deps = ["DataStructures", "LinearAlgebra"]
116+
git-tree-sha1 = "dc84e810393cfc6294248c9032a9cdacc14a3db4"
117+
uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
118+
version = "2.3.1"
119+
120+
[[REPL]]
121+
deps = ["InteractiveUtils", "Markdown", "Sockets"]
122+
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
123+
124+
[[Random]]
125+
deps = ["Serialization"]
126+
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
127+
128+
[[Rmath]]
129+
deps = ["BinaryProvider", "Libdl", "Random", "Statistics"]
130+
git-tree-sha1 = "2bbddcb984a1d08612d0c4abb5b4774883f6fa98"
131+
uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa"
132+
version = "0.6.0"
133+
134+
[[SHA]]
135+
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
136+
137+
[[Serialization]]
138+
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
139+
140+
[[Sockets]]
141+
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
142+
143+
[[SortingAlgorithms]]
144+
deps = ["DataStructures", "Random", "Test"]
145+
git-tree-sha1 = "03f5898c9959f8115e30bc7226ada7d0df554ddd"
146+
uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
147+
version = "0.3.1"
148+
149+
[[SparseArrays]]
150+
deps = ["LinearAlgebra", "Random"]
151+
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
152+
153+
[[SpecialFunctions]]
154+
deps = ["OpenSpecFun_jll"]
155+
git-tree-sha1 = "e19b98acb182567bcb7b75bb5d9eedf3a3b5ec6c"
156+
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
157+
version = "0.10.0"
158+
159+
[[Statistics]]
160+
deps = ["LinearAlgebra", "SparseArrays"]
161+
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
162+
163+
[[StatsBase]]
164+
deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics"]
165+
git-tree-sha1 = "be5c7d45daa449d12868f4466dbf5882242cf2d9"
166+
uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
167+
version = "0.32.1"
168+
169+
[[StatsFuns]]
170+
deps = ["Rmath", "SpecialFunctions"]
171+
git-tree-sha1 = "f290ddd5fdedeadd10e961eb3f4d3340f09d030a"
172+
uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
173+
version = "0.9.4"
174+
175+
[[SuiteSparse]]
176+
deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
177+
uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
178+
179+
[[Test]]
180+
deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
181+
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
182+
183+
[[UUIDs]]
184+
deps = ["Random", "SHA"]
185+
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
186+
187+
[[Unicode]]
188+
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

Project.toml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
name = "TopicModels"
2+
uuid = "e9825ca3-3499-4c9b-97dc-a93734876e50"
3+
authors = ["Jonathan Chang <slycoder @gmail.com>"]
4+
version = "0.1.0"
5+
6+
[deps]
7+
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
8+
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
9+
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
10+
11+
[compat]
12+
julia = "1.3"
13+
14+
[extras]
15+
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
16+
17+
[targets]
18+
test = ["Test"]

REQUIRE

Whitespace-only changes.

src/TopicModels.jl

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ module TopicModels
22

33
import Base.length
44

5-
typealias RaggedMatrix{T} Array{Array{T,1},1}
5+
RaggedMatrix{T} = Array{Array{T,1},1}
66

7-
type Corpus
7+
struct Corpus
88
documents::RaggedMatrix{Int64}
99
weights::RaggedMatrix{Float64}
1010

@@ -15,7 +15,7 @@ type Corpus
1515
weights
1616
)
1717
end
18-
18+
1919
Corpus(documents::RaggedMatrix{Int64}) = begin
2020
weights = map(documents) do doc
2121
ones(Float64, length(doc))
@@ -27,7 +27,7 @@ type Corpus
2727
end
2828
end
2929

30-
type Model
30+
struct Model
3131
alphaPrior::Vector{Float64}
3232
betaPrior::Float64
3333
topics::Array{Float64,2}
@@ -37,9 +37,9 @@ type Model
3737
frozen::Bool
3838
corpus::Corpus
3939

40-
Model(alphaPrior::Vector{Float64},
41-
betaPrior::Float64,
42-
V::Int64,
40+
Model(alphaPrior::Vector{Float64},
41+
betaPrior::Float64,
42+
V::Int64,
4343
corpus::Corpus) = begin
4444
K = length(alphaPrior)
4545
m = new(
@@ -48,7 +48,7 @@ type Model
4848
zeros(Float64, K, V), # topics
4949
zeros(Float64, K), # topicSums
5050
zeros(Float64, K, length(corpus.documents)), #documentSums
51-
fill(Array(Int64, 0), length(corpus.documents)), # assignments
51+
Array{Array{Int64,1},1}(undef,length(corpus.documents)), # assignments
5252
false,
5353
corpus
5454
)
@@ -111,8 +111,8 @@ function wordDistribution(word::Int,
111111
out::Vector{Float64})
112112
V = size(model.topics, 2)
113113
for ii in 1:length(out)
114-
u = (model.documentSums[ii, document] + model.alphaPrior[ii]) *
115-
(model.topics[ii, word] + model.betaPrior) /
114+
u = (model.documentSums[ii, document] + model.alphaPrior[ii]) *
115+
(model.topics[ii, word] + model.betaPrior) /
116116
(model.topicSums[ii] + V * model.betaPrior)
117117
@inbounds out[ii] = u
118118
end
@@ -128,10 +128,10 @@ function sampleWord(word::Int,
128128
end
129129

130130

131-
function updateSufficientStatistics(word::Int64,
131+
function updateSufficientStatistics(word::Int64,
132132
topic::Int64,
133133
document::Int64,
134-
scale::Float64,
134+
scale::Float64,
135135
model::Model)
136136
fr = Float64(!model.frozen)
137137
@inbounds model.documentSums[topic, document] += scale
@@ -146,7 +146,7 @@ function sampleDocument(document::Int,
146146
Nw = length(words)
147147
@inbounds weights = model.corpus.weights[document]
148148
K = length(model.alphaPrior)
149-
p = Array(Float64, K)
149+
p = Array{Float64,1}(undef,K)
150150
@inbounds assignments = model.assignments[document]
151151
for ii in 1:Nw
152152
@inbounds word = words[ii]
@@ -170,10 +170,10 @@ end
170170
function termToWordSequence(term::AbstractString)
171171
parts = split(term, ":")
172172
fill(parse(Int64, parts[1]) + 1, parse(Int64, parts[2]))
173-
end
173+
end
174174

175175
# The functions below are designed for public consumption
176-
function trainModel(model::Model,
176+
function trainModel(model::Model,
177177
numIterations::Int64)
178178
for ii in 1:numIterations
179179
println(string("Iteration ", ii, "..."))
@@ -183,7 +183,7 @@ function trainModel(model::Model,
183183
end
184184

185185
function topTopicWords(model::Model,
186-
lexicon::Array{ASCIIString,1},
186+
lexicon::Array{String,1},
187187
numWords::Int64)
188188
[lexicon[reverse(sortperm(model.topics'[1:end, row]))[1:numWords]]
189189
for row in 1:size(model.topics,1)]
@@ -198,7 +198,7 @@ end
198198

199199
function readLexicon(stream)
200200
lines = readlines(stream)
201-
map(chomp, convert(Array{AbstractString,1}, lines))
201+
convert(Array{String,1},map(chomp, convert(Array{AbstractString,1}, lines)))
202202
end
203203

204204
export Corpus,

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy