bytemap

A library for accessing densely packed uncompressed binary numeric data in Common Lisp.

What?

For loading in the Netflix prize dataset. Supports packed structures defined with defbitstruct (similar syntax to defstruct) on mmap'd files or simple-arrays.

Some attempt is made to use dynamic-extent appropriately for SBCL.

How?


(defconstant +max-rating+ 4)
(defconstant +ratings+ 100480507)
(defconstant +movies+ 17770)
(defconstant +rating-time-max-days+ 4000)
(defconstant +customer-id-max+ 2649429)
(defconstant +users+ 480189)

(defbitstruct rating
  (user nil :type (integer 0 #.+users+))
  (movie nil :type (integer 1 #.+movies+))
  (rating nil :type (integer 0 #.+max-rating+))
  (day nil :type (integer 0 #.+rating-time-max-days+)))

(defbitstruct movie
  (start nil :type (integer 0 #.+ratings+))
  (len nil :type (integer 0 #.+users+))
  (scores 0 :type (integer 0 #.+users+) :count #.(1+ +max-rating+)))

(defbitstruct user
  (start 0 :type (integer 0 #.+ratings+))
  (len 0 :type (integer 0 #.+movies+))
  (scores 0 :type (integer 0 #.+movies+) :count #.(1+ +max-rating+)))

(defbitstruct user-index
    (rating-ptr nil :type (integer 0 #.+ratings+)))

(defstruct netflix
  (ratings (mmap-rating-vector "ratings" +ratings+) :type mmap)
  (index (make-array `(,+ratings+) :element-type `(integer 0 ,+ratings+) :initial-element +ratings+) 
		     :type (simple-array (integer 0 #.+ratings+) (#.+ratings+)))
 
  (users (mmap-user-vector "users" +users+) :type mmap)
  (movies (mmap-movie-vector "movies" +movies+) :type mmap)
  (user-customer-ids (make-array `(,(1+ +customer-id-max+)) :element-type `(integer 0 ,+users+) :initial-element +users+) 
		     :type (simple-array (integer 0 #.+users+) (#.(1+ +customer-id-max+))))
  (next-user 0 :type (integer 0 #.+users+))
  (next-rating 0 :type (integer 0 #.+ratings+)))



(defun user-len-frequency-table (netflix)
  (let ((array (make-array +movies+ :initial-element 0 :element-type 'fixnum))) 
    (loop for i below +users+ do 
      (incf (aref array (user-len (user-ref (netflix-users netflix) i))))) 
    array))

Where?

Current snapshot.

git clone http://common-lisp.net/project/bytemap/bytemap.git

Who?

John Fremlin. Any help very much appreciated!

Project members

Valid XHTML 1.0 Strict