For loading in the Netflix prize dataset. Supports packed structures defined with defbitstruct (similar syntax to defstruct) on mmap'd files or simple-arrays.
Some attempt is made to use dynamic-extent appropriately for SBCL.
(defconstant +max-rating+ 4)
(defconstant +ratings+ 100480507)
(defconstant +movies+ 17770)
(defconstant +rating-time-max-days+ 4000)
(defconstant +customer-id-max+ 2649429)
(defconstant +users+ 480189)
(defbitstruct rating
(user nil :type (integer 0 #.+users+))
(movie nil :type (integer 1 #.+movies+))
(rating nil :type (integer 0 #.+max-rating+))
(day nil :type (integer 0 #.+rating-time-max-days+)))
(defbitstruct movie
(start nil :type (integer 0 #.+ratings+))
(len nil :type (integer 0 #.+users+))
(scores 0 :type (integer 0 #.+users+) :count #.(1+ +max-rating+)))
(defbitstruct user
(start 0 :type (integer 0 #.+ratings+))
(len 0 :type (integer 0 #.+movies+))
(scores 0 :type (integer 0 #.+movies+) :count #.(1+ +max-rating+)))
(defbitstruct user-index
(rating-ptr nil :type (integer 0 #.+ratings+)))
(defstruct netflix
(ratings (mmap-rating-vector "ratings" +ratings+) :type mmap)
(index (make-array `(,+ratings+) :element-type `(integer 0 ,+ratings+) :initial-element +ratings+)
:type (simple-array (integer 0 #.+ratings+) (#.+ratings+)))
(users (mmap-user-vector "users" +users+) :type mmap)
(movies (mmap-movie-vector "movies" +movies+) :type mmap)
(user-customer-ids (make-array `(,(1+ +customer-id-max+)) :element-type `(integer 0 ,+users+) :initial-element +users+)
:type (simple-array (integer 0 #.+users+) (#.(1+ +customer-id-max+))))
(next-user 0 :type (integer 0 #.+users+))
(next-rating 0 :type (integer 0 #.+ratings+)))
(defun user-len-frequency-table (netflix)
(let ((array (make-array +movies+ :initial-element 0 :element-type 'fixnum)))
(loop for i below +users+ do
(incf (aref array (user-len (user-ref (netflix-users netflix) i)))))
array))
git clone http://common-lisp.net/project/bytemap/bytemap.git