For loading in the Netflix prize dataset. Supports packed structures defined with defbitstruct (similar syntax to defstruct) on mmap'd files or simple-arrays.
Some attempt is made to use dynamic-extent appropriately for SBCL.
(defconstant +max-rating+ 4) (defconstant +ratings+ 100480507) (defconstant +movies+ 17770) (defconstant +rating-time-max-days+ 4000) (defconstant +customer-id-max+ 2649429) (defconstant +users+ 480189) (defbitstruct rating (user nil :type (integer 0 #.+users+)) (movie nil :type (integer 1 #.+movies+)) (rating nil :type (integer 0 #.+max-rating+)) (day nil :type (integer 0 #.+rating-time-max-days+))) (defbitstruct movie (start nil :type (integer 0 #.+ratings+)) (len nil :type (integer 0 #.+users+)) (scores 0 :type (integer 0 #.+users+) :count #.(1+ +max-rating+))) (defbitstruct user (start 0 :type (integer 0 #.+ratings+)) (len 0 :type (integer 0 #.+movies+)) (scores 0 :type (integer 0 #.+movies+) :count #.(1+ +max-rating+))) (defbitstruct user-index (rating-ptr nil :type (integer 0 #.+ratings+))) (defstruct netflix (ratings (mmap-rating-vector "ratings" +ratings+) :type mmap) (index (make-array `(,+ratings+) :element-type `(integer 0 ,+ratings+) :initial-element +ratings+) :type (simple-array (integer 0 #.+ratings+) (#.+ratings+))) (users (mmap-user-vector "users" +users+) :type mmap) (movies (mmap-movie-vector "movies" +movies+) :type mmap) (user-customer-ids (make-array `(,(1+ +customer-id-max+)) :element-type `(integer 0 ,+users+) :initial-element +users+) :type (simple-array (integer 0 #.+users+) (#.(1+ +customer-id-max+)))) (next-user 0 :type (integer 0 #.+users+)) (next-rating 0 :type (integer 0 #.+ratings+))) (defun user-len-frequency-table (netflix) (let ((array (make-array +movies+ :initial-element 0 :element-type 'fixnum))) (loop for i below +users+ do (incf (aref array (user-len (user-ref (netflix-users netflix) i))))) array))
git clone http://common-lisp.net/project/bytemap/bytemap.git