Arc Forumnew | comments | leaders | submitlogin
1 point by cchooper 5842 days ago | link | parent

This is my final attempt. It's faster, shorter and easier to read than my first attempt, but the performance is still shockingly bad.

  (= *nwords* (table))

  (w/infile f "c:/big.txt" (whilet l (readline f) (counts (tokens (downcase l) whitec) *nwords*)))

  (def edits1 (word)
    (with (alphabet "abcdefghijklmnopqrstuvwxyz"
           n (len word))
      (dedup:accum add
        (forlen i word (add:+ (cut word 0 i) (cut word (+ 1 i)))
                       (each c alphabet (add:copy word i c)))
        (for i 0 (- n 2) (add:copy word i (word (+ 1 i)) (+ 1 i) word.i))
        (for i 0 n (each c alphabet (add:string (cut word 0 i) c (cut word i)))))))

  (def known-edits2 (word edits) 
    (accum add (each e edits (map add (known (edits1 e))))))

  (def known (words) (keep [*nwords* _] words))

  (def correct (word)
    (let edits (edits1 word)
      (best (compare > [*nwords* _]) (or (known:list word) (known edits) (known-edits2 word edits)))))