slycoder · slycoder · Apr 15, 2016 · Apr 14, 2016
diff --git a/README.md b/README.md
@@ -14,13 +14,15 @@ document.  The space whence the words are drawn is termed the lexicon.
 
 Formally, the model is defined as
 
+```
   For each topic k,
     phi_k ~ Dirichlet(beta)
   For each document d,
     theta ~ Dirichlet(alpha)
     For each word w,
       z ~ Multinomial(theta)
       w ~ Multinomial(phi_z)
+```
 
 alpha and beta are hyperparameters of the model.  The number of topics, K,
 is a fixed parameter of the model, and w is observed.  This package fits 
@@ -31,8 +33,10 @@ the topics using collapsed Gibbs sampling (Griffiths and Steyvers, 2004).
 We describe the functions of the package using an example. First we load 
 corpora from data files as follows:
 
+```
   testDocuments = readDocuments(open("cora.documents"))
   testLexicon = readLexicon(open("cora.lexicon"))
+```
 
 These read files in LDA-C format.  The lexicon file is assumed to have one
 word per line.  The document file consists of one document per line.  Each
@@ -45,7 +49,9 @@ the number of tuples for that document.
 
 With the documents loaded, we instantiate a model that we want to train:
 
+```
   model = Model(fill(0.1, 10), 0.01, length(testLexicon), testDocuments)
+```
 
 This is a model with 10 topics.  alpha is set to a uniform Dirichlet prior
 with 0.1 weight on each topic (the dimension of this variable is used
@@ -54,7 +60,9 @@ the prior weight on phi (i.e. beta) should be set to 0.01.  The third
 parameter is the lexicon size; here we just use the lexicon we have 
 just read.  The fourth parameter is the collection of documents.
 
+```
   trainModel(testDocuments, model, 30)
+```
 
 With the model defined, we can train the model on a corpus of documents.
 The trainModel command takes the corpus as the first argument, the model
@@ -64,7 +72,9 @@ will be mutated in place.
 
 Finally we can examine the output of the trained model using topTopicWords.
 
+```
   topWords = topTopicWords(model, testLexicon, 10)
+```
 
 This function retrieves the top words associated with each topic; this
 serves as a useful summary of the model.  The first parameter is the model,

diff --git a/src/TopicModels.jl b/src/TopicModels.jl
@@ -133,7 +133,7 @@ function updateSufficientStatistics(word::Int64,
                                     document::Int64,
                                     scale::Float64, 
                                     model::Model)
-  fr = float64(!model.frozen)
+  fr = Float64(!model.frozen)
   @inbounds model.documentSums[topic, document] += scale
   @inbounds model.topicSums[topic] += scale * fr
   @inbounds model.topics[topic, word] += scale * fr
@@ -167,9 +167,9 @@ function sampleCorpus(model::Model)
 end
 
 # Note, files are zero indexed, but we are 1-indexed.
-function termToWordSequence(term::String)
+function termToWordSequence(term::AbstractString)
   parts = split(term, ":")
-  fill(int64(parts[1]) + 1, int64(parts[2]))
+  fill(parse(Int64, parts[1]) + 1, parse(Int64, parts[2]))
 end 
 
 # The functions below are designed for public consumption
@@ -190,16 +190,15 @@ function topTopicWords(model::Model,
 end
 
 function readDocuments(stream)
-  lines = readlines(stream)
-  convert(
-    RaggedMatrix{Int64},
-    [apply(vcat, [termToWordSequence(term) for term in split(line, " ")[2:end]])
-     for line in lines])
+    lines = readlines(stream)
+    convert(RaggedMatrix{Int64},
+            [vcat([termToWordSequence(term) for term in split(line, " ")[2:end]]...)
+             for line in lines])
 end
 
 function readLexicon(stream)
   lines = readlines(stream)
-  map(chomp, convert(Array{String,1}, lines))
+  map(chomp, convert(Array{AbstractString,1}, lines))
 end
 
 export Corpus,
@@ -208,5 +207,4 @@ export Corpus,
        readLexicon,
        topTopicWords,
        trainModel
-
 end