From 9e37b1719b8eb5d4a903b4dd5c09a28c176e38b9 Mon Sep 17 00:00:00 2001 From: John Myles White Date: Fri, 6 Dec 2013 19:00:11 -0800 Subject: [PATCH] Slight code speedups Assert indexing operations are in bounds Pull indexing operations out of loops when possible --- src/TopicModels.jl | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/src/TopicModels.jl b/src/TopicModels.jl index 17ccde8..d1bd4e2 100644 --- a/src/TopicModels.jl +++ b/src/TopicModels.jl @@ -78,15 +78,17 @@ end function initializeAssignments(model::Model) for dd in 1:length(model.corpus) - model.assignments[dd] = fill(0, length(model.corpus.documents[dd])) - for ww in 1:length(model.corpus.documents[dd]) - word = model.corpus.documents[dd][ww] + @inbounds words = model.corpus.documents[dd] + @inbounds model.assignments[dd] = fill(0, length(words)) + for ww in 1:length(words) + @inbounds word = words[ww] topic = sampleMultinomial(model.alphaPrior) - model.assignments[dd][ww] = topic + @inbounds model.assignments[dd][ww] = topic updateSufficientStatistics( word, topic, dd, model.corpus.weights[dd][ww], model) end end + return end function sampleMultinomial(p::Array{Float64,1}) @@ -109,11 +111,12 @@ function wordDistribution(word::Int, out::Vector{Float64}) V = size(model.topics, 2) for ii in 1:length(out) - out[ii] = (model.documentSums[ii, document] + model.alphaPrior[ii]) * - (model.topics[ii, word] + model.betaPrior) / - (model.topicSums[ii] + V * model.betaPrior) + u = (model.documentSums[ii, document] + model.alphaPrior[ii]) * + (model.topics[ii, word] + model.betaPrior) / + (model.topicSums[ii] + V * model.betaPrior) + @inbounds out[ii] = u end - return out + return end function sampleWord(word::Int, @@ -130,32 +133,37 @@ function updateSufficientStatistics(word::Int64, document::Int64, scale::Float64, model::Model) - model.documentSums[topic, document] += scale - model.topicSums[topic] += scale * !model.frozen - model.topics[topic, word] += scale * !model.frozen + fr = float64(!model.frozen) + @inbounds model.documentSums[topic, document] += scale + @inbounds model.topicSums[topic] += scale * fr + @inbounds model.topics[topic, word] += scale * fr + return end function sampleDocument(document::Int, model::Model) - words = model.corpus.documents[document] + @inbounds words = model.corpus.documents[document] Nw = length(words) - weights = model.corpus.weights[document] + @inbounds weights = model.corpus.weights[document] K = length(model.alphaPrior) p = Array(Float64, K) + @inbounds assignments = model.assignments[document] for ii in 1:Nw - word = words[ii] - oldTopic = model.assignments[document][ii] + @inbounds word = words[ii] + @inbounds oldTopic = assignments[ii] updateSufficientStatistics(word, oldTopic, document, -weights[ii], model) - newTopic::Int64 = sampleWord(word, document, model, p) - model.assignments[document][ii] = newTopic + newTopic = sampleWord(word, document, model, p) + @inbounds assignments[ii] = newTopic updateSufficientStatistics(word, newTopic, document, weights[ii], model) end + return end function sampleCorpus(model::Model) for ii in 1:length(model.corpus) sampleDocument(ii, model) end + return end # Note, files are zero indexed, but we are 1-indexed. @@ -171,6 +179,7 @@ function trainModel(model::Model, println(string("Iteration ", ii, "...")) sampleCorpus(model) end + return end function topTopicWords(model::Model, pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy