Search This Blog

Thursday, January 10, 2013

How Sessions Work in Ring

Sessions are good fun, I found:


;;  necessary dependencies
;; [[org.clojure/clojure "1.4.0"]
;;  [ring/ring "1.1.6"]]
;; -------------

;; Here's an app, built in a way which should surprise no-one who's read the previous posts:

(require 'ring.adapter.jetty
         'ring.middleware.stacktrace
         'clojure.pprint)

;; Middleware for spying on the doings of other middleware:
(defn html-escape [string]
  (clojure.string/escape string {\< "&lt;", \> "&gt;"}))

(defn html-preformatted-escape [string]
  (str "<pre>\n" (html-escape string) "</pre>\n"))

(defn format-request [name request kill-keys kill-headers]
  (let [r1 (reduce dissoc request kill-keys)
        r (reduce (fn [h n] (update-in h [:headers] dissoc n)) r1 kill-headers)]
  (with-out-str
    (println "-------------------------------")
    (println name)
    (println "-------------------------------")
    (clojure.pprint/pprint r)
    (println "-------------------------------"))))


;; I have taken the liberty of removing some of the less fascinating entries from the request and response maps, for clarity
(def kill-keys [:body :character-encoding :remote-addr :server-name :server-port :ssl-client-cert :scheme  :content-type  :content-length])
(def kill-headers ["user-agent" "accept" "accept-encoding" "accept-language" "accept-charset" "cache-control" "connection"])

(defn wrap-spy [handler spyname]
  (fn [request]
    (let [incoming (format-request (str spyname ":\n Incoming Request:") request kill-keys kill-headers)]
      (println incoming)
      (let [response (handler request)]
        (let [outgoing (format-request (str spyname ":\n Outgoing Response Map:") response kill-keys kill-headers)]
          (println outgoing)
          (update-in response  [:body] (fn[x] (str (html-preformatted-escape incoming) x  (html-preformatted-escape outgoing)))))))))



;; Absolute binding promise to someday get around to writing the app
(declare handler)

;; plumbing
(def app
  (-> #'handler
      (ring.middleware.stacktrace/wrap-stacktrace) ;; belt
      (wrap-spy "what the handler sees" )
      (wrap-spy "what the web server sees" )
      (ring.middleware.stacktrace/wrap-stacktrace) ;; braces
      ))

;; The actual application
(defn handler [request]
  {:status 200
   :headers {"Content-Type" "text/html"}
   :body (str "<h1>Hello World!</h1>" )})


;; Start the server if it hasn't already been started
(defonce server (ring.adapter.jetty/run-jetty #'app {:port 8080 :join? false}))


;; Have a look at http://localhost:8080, and while you're there,
;; delete any cookies that your browser is storing for localhost:8080

;; In Chrome, you can right-click on a page, Inspect Element, and then
;; choose Resources/Cookies/localhost to give you a live view of your
;; cookies, which is nice to watch for the following.

;; In Firefox I can't find anything as nice. 

;; With curl of course, you have total control.
;; Real men, those who do not cower behind their mother's apron strings, like whining infants
;; may wish to experiment with commands such as:
;; $ curl -sv http://localhost:8080 -b cookies.txt -c cookies.txt && cat cookies.txt


;; Now we'll add the session middleware:

(require 'ring.middleware.session)

;; and re-plumb
(def app
  (-> #'handler
      (ring.middleware.stacktrace/wrap-stacktrace)
      (wrap-spy "what the handler sees" )
      (ring.middleware.session/wrap-session)
      (wrap-spy "what the web server sees" )
      (ring.middleware.stacktrace/wrap-stacktrace)
      ))


;; Have another look at http://localhost:8080, (i.e. refresh the page)


;; If you examine the request as passed to the handler, then you'll
;; see a :session key, whose value currently is {}. That's been
;; inserted by the session middleware.

;; That seems to be the only difference so far.

;; Now redefine the handler to return a :session key/value pair in the response map
(defn handler [request]
  {:status 200
   :headers {"Content-Type" "text/html"}
   :body (str "<h1>Hello World!</h1>" )
   :session "I am a session. Fear me."})


;; And refresh the page again. Only once! And pay careful attention!

;; The incoming maps are exactly the same, of course, but on the way out the middleware
;; notices the :session key, and transforms it into a cookie-setting header

;; Your browser should now have a stored cookie, named ring-session,
;; with a cryptic but hopefully unique random value

;; Again refresh, and notice that this time, the browser presents its cookie, 
;; and the middleware decodes it and puts a :session key in the request.
;; Notice that no new cookie is set the second time.

;; The cookie stays the same.

;; Notice that we can change the data in the session without changing
;; the cookie on the browser.  In the default implementation at least,
;; the data is stored on the server, and the browser's cookie just
;; tells the server which session to use.

(defn handler [request]
  {:status 200
   :headers {"Content-Type" "text/html"}
   :body (str "<h1>Hello World!</h1>" )
   :session (let [rs (request :session)] (if (empty? rs) "I am a session. Fear me!"  (str rs "!")))})


;; One thing we have control of is the length of time before the session expires
;; Ten seconds is a bit short, but it does allow for some interesting effects:
(def app
  (-> #'handler
      (ring.middleware.stacktrace/wrap-stacktrace)
      (wrap-spy "what the handler sees" )
      (ring.middleware.session/wrap-session {:cookie-attrs {:max-age 10}})
      (wrap-spy "what the web server sees" )
      (ring.middleware.stacktrace/wrap-stacktrace)
      ))


;; Here's an app using sessions to store state in much the same way that we were using cookies earlier:
(defn handler [request]
  (when (not= (request :uri) "/favicon.ico")
    (let [count ((request :session {}) :count 0)]  ;; no, I didn't know you could do this either. neat, isn't it?
      {:status 200
       :headers {"Content-Type" "text/html"}
       :body (cond (zero? count) (str "<h1>Hello Stranger!</h1>" )
                   :else (str "<h1>Hello Again (" count ")!</h1>" ))
       :session {:count (inc count)}})))

;; How many times can you ping the virtual goldfish?
;; I got it up to 25 with:
;; watch -d -n 0 curl -sv http://localhost:8080 -b cookies.txt -c cookies.txt


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Footnote for Smug Lisp Weenies only

;; Ring provides an alternative to storing session variables in
;; memory, where it can encrypt them into a cookie

;; This seems a more 'functional' way to do things, without carrying
;; state in the server, and it probably is a good way to do things if
;; you're careful.

;; But they're not quite equivalent: Some things that you can do with the
;; memory backed store won't work if you have to serialize your
;; session data.

;; Try this:
(require 'ring.middleware.session.cookie)

(def app
  (-> #'handler
      (ring.middleware.stacktrace/wrap-stacktrace)
      (wrap-spy "what the handler sees" )
      (ring.middleware.session/wrap-session {:store (ring.middleware.session.cookie/cookie-store {:key "a 16-byte secret"})})
      (wrap-spy "what the web server sees" )
      (ring.middleware.stacktrace/wrap-stacktrace)
      ))

;; Everything should still work fine, but now notice that the cookie
;; is changing every time you refresh the page.

;; But if you redefine the handler

(defn handler [request]
      {:status 200
       :headers {"Content-Type" "text/html"}
       :body (str "<h1>Hello " (((request :session {}) :fn (fn[] "Stranger") )) "</h1>" )
       :session {:fn (fn[] "Again")}})

;; and refresh twice, then it should cause some sort of nasty exception

;; Now restore the memory-backed version and try again

(def app
  (-> #'handler
      (ring.middleware.stacktrace/wrap-stacktrace)
      (wrap-spy "what the handler sees" )
      (ring.middleware.session/wrap-session )
      (wrap-spy "what the web server sees" )
      (ring.middleware.stacktrace/wrap-stacktrace)
      ))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; One last flourish: flash messages

;; Flash messages use the session mechanism to allow a redirect to
;; leave a message on the page it is redirecting to:

(require 'ring.middleware.flash)

(def app
  (-> #'handler
      (ring.middleware.stacktrace/wrap-stacktrace)
      (wrap-spy "what the handler sees" )
      (ring.middleware.flash/wrap-flash)
      (wrap-spy "what the flash middleware sees" )
      (ring.middleware.session/wrap-session )
      (wrap-spy "what the web server sees" )
      (ring.middleware.stacktrace/wrap-stacktrace)
      ))

(defn link [s]
  (str "<a href=\"" s "\">" s "</a>"))

(defn handler [request]
  (case (request :uri)
    "/favicon.ico" {:status 404}
    "/" {:body (str "<h1>home " (request :flash) "</h1>"  "<p>" (link "/bother") "<p>" (link "/"))}
    "/bother" {:status 302, :headers {"Location" "/"}, :body "" :flash "(bothered)"}))

;; The mechanism here is quite subtle and bears thinking about.


;; The interested reader might also wish to get a load of this mother:

(defn handler [request]
  (case (request :uri)
    "/favicon.ico" {:status 404 
                    :session (update-in (request :session) [:favicon] (fnil inc 0))}
    "/" {:body (str "<h1>home " (request :flash) " </h1>"  
                    "<p> favicon requests: " (get-in request [:session :favicon] 0) 
                    "<p> bother requests: "  (get-in request [:session :bother ] 0) 
                    "<p>" (link "/bother") 
                    "<p>" (link "/"))}
    "/bother" {:status 302, :headers {"Location" "/"}, :body "" 
               :flash "(bothered)" 
               :session (update-in (request :session) [:bother] (fnil inc 0))}))





Wednesday, January 9, 2013

How Cookies Work in Ring

Cookies turn the stateless http protocol into something with memory.

Here's how to use them in Ring:



;;  necessary dependencies 
;; [[org.clojure/clojure "1.4.0"]
;;  [ring/ring "1.1.6"]]
;; -------------

;; Here's an app, built in a way which should surprise no-one who's read the previous posts

(require 'ring.adapter.jetty 
         'ring.middleware.stacktrace 
         'clojure.pprint)

;; Middleware for spying on the doings of other middleware:
(defn html-escape [string] 
  (str "<pre>" (clojure.string/escape string {\< "&lt;", \> "&gt;"}) "</pre>"))

(defn format-request [name request]
  (with-out-str
    (println "-------------------------------")
    (println name)
    (clojure.pprint/pprint request)
    (println "-------------------------------")))

(defn wrap-spy [handler spyname include-body]
  (fn [request]
    (let [incoming (format-request (str spyname ":\n Incoming Request:") request)]
      (println incoming)
      (let [response (handler request)]
        (let [r (if include-body response (assoc response :body "#<?>"))
              outgoing (format-request (str spyname ":\n Outgoing Response Map:") r)]
          (println outgoing)
          (update-in response  [:body] (fn[x] (str (html-escape incoming) x  (html-escape outgoing)))))))))



;; Absolute binding promise to someday get around to writing the app
(declare handler)

;; plumbing
(def app
  (-> #'handler
      (wrap-spy "what the handler sees" true)
      (ring.middleware.stacktrace/wrap-stacktrace)
      (wrap-spy "what the web server sees" false)))  

;; The actual application
(defn handler [request]
  {:status 200
   :headers {"Content-Type" "text/html"}
   :body (str "<h1>Hello World!</h1>" )})

 
;; Start the server if it hasn't already been started
(defonce server (ring.adapter.jetty/run-jetty #'app {:port 8080 :join? false}))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Next we'll include the cookies middleware

(require 'ring.middleware.cookies)

;; And re-plumb

(def app
  (-> #'handler
      (ring.middleware.stacktrace/wrap-stacktrace)
      (wrap-spy "what the handler sees" true)
      (ring.middleware.cookies/wrap-cookies)
      (wrap-spy "what the web server sees" false)))


;; Now go and look at http://localhost:8080 again.

;; In the map the handler sees, there is a key :cookies, whose value is {}
;; ( If it's not, you might want to clear cookies for localhost from your browser )

;; Let's make our app set a cookie:
(defn handler [request]
  {:status 200
   :headers {"Content-Type" "text/html"}
   :body (str "<h1>Setting Cookie!</h1>" )
   :cookies {"yo" {:value "hi"}} })


;; What happens now is quite complicated. 

;; Our key 
{ :cookies {"yo" {:value "hi"}}}
;; Gets converted by the middleware, and combined with our header, to make
{ :headers {"Set-Cookie" '("yo=hi"), "Content-Type" "text/html"}}
;; in the map given to the jetty adapter

;; If you look at the page with
;; $ curl -sv http://localhost:8080
;; Then you'll see
;; < Set-Cookie: yo=hi
;; as part of the http transaction

;; Now if we look at http://localhost:8080, the response will contain the Set-Cookie header.

;; Most browsers will react to this by including the cookie whenever they contact the site.
;; You can examine cookies from the browser's point of view by 
;; (In Chrome) looking at chrome://chrome/settings/cookies
;; (In Firefox) following some interminable GUI procedure that life is too short to describe. 


;; If you refresh the page yet again, you should now see:
{:headers {"cookie" "yo=hi"}}
;; in the incoming request from the webserver
;; and a new key:
{:cookies {"yo" {:value "hi"}}} 
;; in the map the eventual handler sees (put there by the middleware of course!)



;; We can use this to count how many times a particular browser has been greeted:
(defn seen-before [request]
  (try (Integer/parseInt (((request :cookies) "yo") :value))
       (catch Exception e :never-before)))

(defn handler [request]
  (let [s (seen-before request)]
    (cond
     (= s :never-before) {:status 200
                          :headers {"Content-Type" "text/html"}
                          :body (str "<h1>Hello Stranger!</h1>" )
                          :cookies {"yo" {:value "1"}}}
     (= s 1) {:status 200
                          :headers {"Content-Type" "text/html"}
                          :body (str "<h1>Hello Again!</h1>" )
                          :cookies {"yo" {:value "2"}}}
     :else {:status 200
                          :headers {"Content-Type" "text/html"}
                          :body (str "<h1>Hi, this is visit "s"</h1>" )
                          :cookies {"yo" {:value (str (inc s))}}})))



;; And now, an exercise for the reader!

;; If I look at my site in Firefox, it works as I expected.

;; If I look at it with Chrome, it double counts

;; If I use curl, like so:
;; curl -sv http://localhost:8080 | grep -i hello

;; Then all I ever see is "Hello Stranger"

;; What is going on?



Tuesday, January 8, 2013

Really Rather Overdoing the Interest in How Parameters Work in Ring

The next interesting bit of the Ring Tutorial http://github.com/ring-clojure/ring/wiki/Parameters is pretty straightforward, but I got carried away a bit.


;;  necessary dependencies 
;; [[org.clojure/clojure "1.4.0"]
;;  [ring/ring "1.1.6"]]
;; -------------

;; Behold the mighty app which we have constructed so far:

(require 'ring.adapter.jetty)
(require 'ring.middleware.stacktrace)
(require 'clojure.pprint)

(defn wrap-spy [handler]
  (fn [request]
    (println "-------------------------------")
    (println "Incoming Request:")
    (clojure.pprint/pprint request)
    (let [response (handler request)]
      (println "Outgoing Response Map:")
      (clojure.pprint/pprint response)
      (println "-------------------------------")
      response)))



(defn handler [request]
  {:status 200
   :headers {"Content-Type" "text/html"}
   :body (str "<h1>Hello World!!!!!!!!!!!!!!!</h1>" )})

(def app
  (-> #'handler
      (ring.middleware.stacktrace/wrap-stacktrace)
      (wrap-spy)))

(defonce server (ring.adapter.jetty/run-jetty #'app {:port 8080 :join? false}))

;; You can access your greeting at:
;; http://localhost:8080 in your favourite browser.

;; Actually, I think it is nice to see the request and response maps as part of the web page
;; So we can modify our spy function

(defn html-escape [string] 
  (str "<pre>" (clojure.string/escape string {\< "&lt;", \> "&gt;"}) "</pre>"))

(defn wrap-spy [handler spyname include-body]
  (fn [request]
    (let [incoming (with-out-str
                     (println "-------------------------------")
                     (println spyname ":\n Incoming Request:")
                     (clojure.pprint/pprint request))]
      (println incoming)
      (let [response (handler request)]
        (let [outgoing (with-out-str 
                         (println spyname ":\n Outgoing Response Map:")
                         (clojure.pprint/pprint (if include-body response
                                                  (assoc response :body "#<?>")))
                         (println "-------------------------------"))]
          (println outgoing)
          (update-in response  [:body] (fn[x] (str (html-escape incoming) x  (html-escape outgoing)))))))))


(def app
  (-> #'handler
      (wrap-spy "what the handler sees" true)
      (ring.middleware.stacktrace/wrap-stacktrace)
      (wrap-spy "what the web server sees" false)
      ))

;; With our spying middleware, we can investigate the subject of parameters

;; point your browser at http://localhost:8080/?doom=sarnath

;; In the map which is presented to app by jetty and ring, there is the key :query-string
;; Which should have the value "doom=sarnath"

;; We could use that directly

(defn handler [request]
    {:status 200
     :headers {"Content-Type" "text/html"}
     :body (if-let [s (request :query-string)]
             (let [[a b c] (re-matches #"(.*)=(.*)" (request :query-string))]
                (if (and a b c) (str "<h1>You Have Invoked " b " Upon the City of " c "</h1>")
                    (str "<h1>I do not understand, oh dark master...</h1>" )))
             (str "<h1>Hello World!!!!!!!!!!!!!!!</h1>" ))})

;; But clearly there are issues with that approach.

;; Instead, we can let ring take care of it for us by inserting another piece of middleware:

(require 'ring.middleware.params)

(def app
  (-> #'handler
      (wrap-spy "what the handler sees" true)
      (ring.middleware.stacktrace/wrap-stacktrace)
      (ring.middleware.params/wrap-params)
      (wrap-spy "what the web server sees" false)
      ))


;; If you have another look at http://localhost:8080/?Doom=Sarnath

;; You'll see that in between the web server and the handler function, wrap-params has inserted
;; another key, :query-params, with the :query-string already split up into a key-value map

;; So we don't need to parse the query string ourselves any more:

(defn handler [request]
  {:status 200
   :headers {"Content-Type" "text/html"}
   :body (if-let [m (request :query-params)]
           (if (empty? m)
             (str "<h1>Hello World!</h1>" )
             (apply str (for [[k v] m] (str "<h1>You Have Invoked " k " Upon the City of " v "</h1>"))))
           (str "<h1>Missing :query-params. Have you included ring.middleware.stacktrace/wrap-stacktrace, oh dark master?</h1>" ))})


             

Getting Started with Ring

In order to write a web app in clojure, it's necessary to understand the library Ring. Ring is a lovely clean design, with good up-to-date docs in the form of its wiki on github.

I'd like to understand how ring works rather better than I do, and so I'm working my way through the whole tutorial step by step, and as I'm going, I'm trying out various things, and making notes.

Here's my version of the 'getting started' tutorial. It's got a few extra frills over the original. I don't know whether other people will find it confusing or helpful, but I'm pretty sure that I'll need to refer to it myself in the future!



;;  necessary dependencies 
;; [[org.clojure/clojure "1.4.0"]
;;  [ring/ring "1.1.6"]]
;; -------------

;; A ring application is a function which takes a request map, and
;; returns a response map

;; Our first response map will have the HTTP status code 200, OK, a
;; content-type header that tells the browser that it's getting plain text
;; and a traditional body text.

(defn app [request]
  {:status 200
   :headers {"Content-Type" "text/plain"}
   :body "Hello World"})


;; Having got a ring application, we need to start a webserver to hand the pages out
;; We'll use jetty (via ring)
(require 'ring.adapter.jetty)

;; And we'd like those pages served on port 8080 

;; Three things to note here:
;;
;; :join? false means that the evaluating thread won't wait for the
;; server to finish (so that the repl doesn't seem to hang).
;;
;; referring to the application function via #' means that ring sees
;; the variable user/app rather than the function (fn[x]{:status 200})
;; which that variable evaluates to. And that means that if we
;; reevaluate the definition, the behaviour the browser sees will
;; change.
;;
;; finally defonce means that if we reload this file, or re-evaluate
;; this line, nothing will happen. That prevents us from accidentally
;; creating multiple copies of the jetty server.

(defonce server (ring.adapter.jetty/run-jetty #'app {:port 8080 :join? false}))

;; So, go and look at http://localhost:8080 in your favourite browser.



;; Now let's check that redefining the handler causes a change in the running webapp
(defn app [request]
  {:status 200
   :headers {"Content-Type" "text/html"}
   :body "<h1>Hello World</h1>"})

;; Refresh your browser to see the change.

;; I like to leave the web browser of the gods:
;; $ watch -d -n 1 curl -sv http://localhost:8080/ 
;; running in a terminal somewhere.

;; Let's demonstrate that we can stop and restart our server

(.stop server)

(.start server)

;; Now, let's look at the information that is going in and out of our application
(require 'clojure.pprint)

;; First we'll delegate the actual functionality of our app to a handler

(defn handler [request]
  {:status 200
   :headers {"Content-Type" "text/html"}
   :body "<h1>Hello World</h1>"})

;; And then we'll wrap that in a wrapper that prints the incoming and outgoing data:

(defn app [request]
  (println "-------------------------------")
  (println "Incoming Request:")
  (clojure.pprint/pprint request)
  (let [response (handler request)]
    (println "Outgoing Response Map:")
    (clojure.pprint/pprint response)
    (println "-------------------------------")
    response))

;; Another way to do the same thing is to define what's called a
;; middleware. This is a concept from python, and a good demonstration
;; of why dynamically typed functional languages are such pleasant
;; things to use

;; We define wrap-spy as a function which does to any handler what app does to our handler

(defn wrap-spy [handler]
  (fn [request]
    (println "-------------------------------")
    (println "Incoming Request:")
    (clojure.pprint/pprint request)
    (let [response (handler request)]
      (println "Outgoing Response Map:")
      (clojure.pprint/pprint response)
      (println "-------------------------------")
      response)))


;; And now we can write

(def app 
  (wrap-spy handler))

;; Or more idiomatically

(def app
  (-> handler
      (wrap-spy)))

;; which means exactly the same thing!

;; Unfortunately, we've now lost the ability to redefine handler and see the change 
;; reflected in the running app.
(defn handler [request]
  {:status 200
   :headers {"Content-Type" "text/html"}
   :body "<h1>Hello World!</h1>" })

;; But the same trick with passing the var in works again.
(def app
  (-> #'handler
      (wrap-spy)))

;; And now we do see changes reflected immediately:
(defn handler [request]
  {:status 200
   :headers {"Content-Type" "text/html"}
   :body "<h1>Hello World!!!!!!!!!!!!!!!1</h1>" })

;; Error handling in our app is conservative.

(defn handler [request]
  {:status 200
   :headers {"Content-Type" "text/html"}
   :body (str "<h1>Hello World!!!!!!!!!!!!!!!1</h1>" (/ 1 0))})

;; The browser gets an HTTP 500 Server Error, and the divide by zero
;; message goes to the console where the server is running.

;; But for development purposes, we can use one of the middlewares provided with ring:

(require 'ring.middleware.stacktrace)

(def app
  (-> #'handler
      (ring.middleware.stacktrace/wrap-stacktrace)
      (wrap-spy)))

;; Now the stacktrace appears nicely formatted in the web browser instead.










Thursday, October 25, 2012

£500 if you can find me a job!



I'm looking for a job, and as usual I'll pay a commission of £500 to anyone who can find me a good one.

Details here: http://johnlawrenceaspden.blogspot.co.uk/2012/10/gis-job-500-reward.html
CV here: http://www.aspden.com

Obviously Clojure is a speciality, and I'd love a Clojure job, but I can program in all styles. I've spent the last year or so working in C and Verilog, and I like those too, when they're being used for the domain where they were intended.

In fact the main reason this blog hasn't been updated recently is that after a full day of programming, the last thing I want to do is come home and program for fun, so I've hardly used Clojure since last summer.

On the other hand, if you're a local company using Java, who might be interested in giving Clojure a try, I'd love to try to show you what all the fuss is about.

And as a bonus, if I was using Clojure for work, I'd probably get interested again and start updating this blog.

Thursday, September 22, 2011

Clojure Setup Tutorial with EMACS: clojure, clojure.contrib, swank, slime, maven and maven-clojure-plugin in a couple of seconds

As of 22nd September 2011, this is still working fine, this time on Fedora 14 (substituting yum for apt-get, but that's the only difference)

------------------------------------------------------------------------------------------------------------------------------------

I've just (15th February 2011) had to set this up again for someone on an Ubuntu 10.10 box.

And to my speechless amazement (16th February 2011), it worked absolutely the same way on Radek Ostrowski (a complete stranger) 's mac at dev8d, thus condemning him to years of suffering at the hands of Aquamacs.

This method is still my favourite, still the one I actually use in practice, and still the easiest, and it's been stable for about a year now.


Since I've just confirmed that it still works, I've moved it back to the top.







Install maven: 


$sudo apt-get install maven2 


Then create a pom.xml file to tell maven which repositories to use. There's an example below to copy and paste.


Once you've got maven installed, and made a pom.xml file, then:


$mvn clojure:repl


Will start a REPL with clojure 1.2 and clojure-contrib 1.2 on the classpath. The REPL should use jLine to give it editing and history on all platforms.

If you like to use EMACS instead, then

$mvn clojure:swank



Will start a swank server, which you can connect to with M-x slime-connect.


That's it from the clojure side.



To set up emacs, and equip it to talk to the clojure swank server:


$sudo apt-get install emacs


Then install the emacs lisp package archive (see http://tromey.com/elpa/) by evaluating this code:

 (cut and paste it into the scratch buffer, put the cursor in the middle, and use M-C-x):


--emacs lisp to install elpa---------------






(let ((buffer (url-retrieve-synchronously
        "http://tromey.com/elpa/package-install.el")))
  (save-excursion
    (set-buffer buffer)
    (goto-char (point-min))
    (re-search-forward "^$" nil 'move)
    (eval-region (point) (point-max))
    (kill-buffer (current-buffer))))



---end of emacs lisp to install elpa-------------




Now use M-x package-list-packages to bring up the list of packages, and use i and then x to mark and then install slime, slime-repl, and clojure-mode.


Then connect emacs to the already running clojure image with M-x slime-connect


That should be it. You should now be at a running clojure 1.2 repl inside emacs.





Here's an example of a pom.xml file that pulls in clojure and clojure-contrib 1.2 . Just cut and paste it.





As well as the essentials, I've also added: the clojars.org repository, where many useful clojure packages live;  the maven versions plugin, which helps with keeping everything cutting edge; and jline so that command line repls work better (mvn clojure:repl)

I like to have my startup repls conditioned a little, so if you have a startup script that you always want to run, add this snippet

        <configuration>
          <replScript>startup.clj</replScript>
        </configuration>

to the clojure-maven-plugin section so that when maven starts a repl, the code in startup.clj is loaded as the first action. This is a good place to set print-length and print-level, so that they will be set before the swank server starts, which means that you won't hang emacs by evaluating an infinite sequence.


I also like to require everything on the classpath, so that I can use things like find-doc to find out about everything. 


pom.xml files look terrifying, but they're really not. It's just that xml is such a godawful verbose way to write things out.

It contains: the addresses of three repositories which hold vital code; the names and versions of four vital jar files that you need; and the names and versions of two helpful maven plugins.


pom.xml



<project>

  <modelVersion>4.0.0</modelVersion>
  <groupId>com.example</groupId>
  <artifactId>hello-maven-clojure-swank</artifactId>

  <version>1.0-SNAPSHOT</version>
  <name>hello-maven</name>
  <description>maven, clojure, emacs: together at last</description>

  <repositories>
    <repository>
      <id>clojars</id>
      <url>http://clojars.org/repo/</url>
    </repository>
    <repository>
      <id>clojure</id>
      <url>http://build.clojure.org/releases</url>
    </repository>
    <repository>
      <id>central</id>
      <url>http://repo1.maven.org/maven2</url>
    </repository>
  </repositories>

  <dependencies>
    <dependency>
      <groupId>org.clojure</groupId>
      <artifactId>clojure</artifactId>
      <version>1.2.0</version>
    </dependency>
    <dependency>
      <groupId>org.clojure</groupId>
      <artifactId>clojure-contrib</artifactId>
      <version>1.2.0</version>
    </dependency>
    <dependency>
      <groupId>jline</groupId>
      <artifactId>jline</artifactId>
      <version>0.9.94</version>
    </dependency>
    <dependency>
      <groupId>swank-clojure</groupId>
      <artifactId>swank-clojure</artifactId>
      <version>1.2.1</version>
    </dependency>
  </dependencies>

  <build>
    <plugins>
      <plugin>
         <groupId>com.theoryinpractise</groupId>
            <artifactId>clojure-maven-plugin</artifactId>
         <version>1.3.3</version>
      </plugin>
      <plugin>
        <groupId>org.codehaus.mojo</groupId>
          <artifactId>versions-maven-plugin</artifactId>
        <version>1.2</version>
      </plugin>
    </plugins>

  </build>

</project>




Sunday, May 29, 2011

Numerical Integration: Better Refinements?

;; Numerical Integration: Better Refinements?

;; Here are some very simple functions which we might want to test integration
;; methods on:
(defn square  [x] (* x x))
(defn sine    [x] (Math/sin x))
(defn step    [x] (if (< x 1/2) 0.0 1.0))
(defn inverse [x] (/ x))

;; Here are some Newton-Cotes formulae for approximate integration:

(defn trapezium-rule [f a b]
  (* 1/2 (- b a) (+ (f a) (f b))))

(defn simpson-rule [f a b]
  (let [midpoint (+ a (/ (- b a) 2))]
    (* 1/6 (- b a) (+ (f a) (* 4 (f midpoint)) (f b)))))

(defn simpson38-rule [f a b]
  (let [midpoint1 (/ (+ a a b) 3)
        midpoint2 (/ (+ a b b) 3)]
    (* 1/8 (- b a) (+ (f a) (* 3 (f midpoint1)) (* 3 (f midpoint2)) (f b)))))

(defn booles-rule [f a b]
  (let [midpoint1 (/ (+ a a a b) 4)
        midpoint2 (/ (+ a a b b) 4)
        midpoint3 (/ (+ a b b b) 4)]
    (* 1/90 (- b a) (+ (* 7 (f a)) (* 32 (f midpoint1)) (* 12 (f midpoint2)) (* 32 (f midpoint3)) (* 7 (f b))))))

;; We can use any of these rules to get estimate of the integral of a function over an interval:
(simpson-rule inverse 1 3) ; 10/9

;; If we halve the interval and use the rule over both halves, then we can use the rule to
;; get a better estimate by adding the estimates for the half-intervals
(+
 (simpson-rule inverse 1 2)
 (simpson-rule inverse 2 3)) ; 11/10

;; We can guess at the error involved in the estimate by taking the difference
;; between these two estimates, on the basis that splitting the interval usually
;; makes most of the error go away.

(- (simpson-rule inverse 1 3)
   (+
    (simpson-rule inverse 1 2)
    (simpson-rule inverse 2 3))) ; 1/90

;; So we'd expect that the first estimate is out by a bit more than 1/90, and
;; that the second is out by rather less than 1/90

;; For the inverse function, which can be integrated symbolically, we know the
;; true answer:
(- (Math/log 3) (Math/log 1)) ; 1.0986122886681098
(/ 10.0 9) ; 1.1111111111111112
(/ 11.0 10) ; 1.1

;; So the errors are really:
(- 1.0986122 10/9)  ; -0.0124989111111109  ; which is ~ 1/90
(- 1.0986122 11/10) ; -0.00138780000000005 ; which is ~ 1/900

;; This method of guessing the error is deeply suspect, and can go wrong, but I
;; won't go into details.

;; I think it's good enough for our purposes as long as the functions we want to
;; integrate are reasonably well behaved and we take small enough intervals.

;; So we can easily make a function which gives us the more refined of the two
;; estimates, together with a guess as to how close it is to the truth.
(defn approx-with-error[rule f a b]
  (let [guess (rule f a b)
        midpoint (/ (+ a b) 2)
        better-guess (+ (rule f a midpoint) (rule f midpoint b))
        error-estimate (- guess better-guess)
        abs-error-estimate (if (> error-estimate 0) error-estimate (- error-estimate))]
    [better-guess abs-error-estimate]))


;; Let's try it out on a few cases, on the particularly nasty integral of 1/x over [0.01,100]

;; This is the true answer
(- (Math/log 100) (Math/log 0.01)) ; 9.210340371976184

(approx-with-error trapezium-rule inverse 0.01 100) ; [2500.999775019998 2499.000174980002]
;; We guess 2500, and we think we're out by at most 2499, which is just true
(approx-with-error simpson-rule inverse 0.01 100) ; [835.4437770204454 832.5559396728856]
(approx-with-error simpson38-rule inverse 0.01 100) ; [627.4427811912234 624.2442845054817]
(approx-with-error booles-rule inverse 0.01 100) ; [391.7824297523125 388.1576179566068]

;; When we split the interval into two halves [0.01, 50.05] [50.05,100]
(approx-with-error trapezium-rule inverse 0.01 50.05) ; [1252.2495505293746 1250.2503495705255]
(approx-with-error trapezium-rule inverse 50.05 100) ; [0.7072645364881702 0.041486462512828726]

;; Our guess tells us that the great majority of the error is in the first sub interval
;; We might want to refine that first, before bothering with the other one:
;; We'll now split [0.01, 25.025][25.025,50.05][50.05,100]
(approx-with-error trapezium-rule inverse 0.01 25.025) ; [626.6241012183343 624.6256989814656]
(approx-with-error trapezium-rule inverse 25.025 50.05) ; [0.7083333333333333 0.04166666666666663]
(approx-with-error trapezium-rule inverse 50.05 100) ; [0.7072645364881702 0.041486462512828726]

;; Again, one subinterval seems to be responsible for the majority of our errors.

;; We could keep a list of intervals, sorted by the estimated error, and always refine the one
;; with the largest guessed error.

(defn interval->errorstruct [rule f [a b]]
  (let [[guess error-guess] (approx-with-error rule f a b)]
    [error-guess, guess, [a,b]]))

(def errorstructs (map (partial interval->errorstruct trapezium-rule inverse)
                       [[0.01,25.025][25.025 50.05][50.05 100]]))

errorstructs
;; ([624.6256989814656 626.6241012183343     [0.01   25.025]]
;;  [0.04166666666666663 0.7083333333333333  [25.025 50.05]]
;;  [0.041486462512828726 0.7072645364881702 [50.05  100]])

;; And now we need a function to refine the interval with the largest error

(defn refine[rule f errorstructs]
  (let [sortedstructs (reverse (sort errorstructs))
        [_ _ [a b]] (first sortedstructs)
        remains (rest sortedstructs)
        midpoint (/ (+ a b) 2)
        subinterval1 (interval->errorstruct rule f [a midpoint])
        subinterval2 (interval->errorstruct rule f [midpoint b])] 
        (cons subinterval1 (cons subinterval2 remains))))

;; Now with every call to refine, we refine the interval with the largest error estimate
(refine trapezium-rule inverse errorstructs)
;; ([311.93889676733556 313.93570379188156 [0.01 12.5175]]
;;  [0.04159457274964806 0.7079060863675477 [12.5175 25.025]]
;;  [0.04166666666666663 0.7083333333333333 [25.025 50.05]]
;;  [0.041486462512828726 0.7072645364881702 [50.05 100]])


(def successive-trapezium-refinements (iterate (partial refine trapezium-rule inverse) errorstructs))


;; Here's what it looks like after a few iterations
(nth successive-trapezium-refinements 5)
;; ([18.81475746721543 20.764864658796014 [0.01 0.7917187499999999]]
;;  [0.040533241059784286 0.7015625070966475 [0.7917187499999999 1.5734374999999998]]
;;  [0.04109463731966401 0.7049306354620377 [1.5734374999999998 3.136875]]
;;  [0.041379306898238544 0.7066276281534741 [3.136875 6.26375]]
;;  [0.04152264848816445 0.7074793872568832 [6.26375 12.5175]]
;;  [0.04159457274964806 0.7079060863675477 [12.5175 25.025]]
;;  [0.04166666666666663 0.7083333333333333 [25.025 50.05]]
;;  [0.041486462512828726 0.7072645364881702 [50.05 100]])

        
;; We can get our best guess for the whole thing and our total error estimate
;; by reducing this list

(reduce + (map first  (nth successive-trapezium-refinements 5))) ; 19.104035002910425
(reduce + (map second (nth successive-trapezium-refinements 5))) ; 25.708968772954105


;; After a hundred refinements..
(reduce + (map first  (nth successive-trapezium-refinements 100))) ; 0.010431101535137086
(reduce + (map second (nth successive-trapezium-refinements 100))) ; 9.213824736866899

;; After a thousand refinements..
(reduce + (map first  (nth successive-trapezium-refinements 1000))) ; 1.0913238861095381E-4
(reduce + (map second (nth successive-trapezium-refinements 1000))) ; 9.210376750235199

;; That's not bad, (the real answer is 9.210340371976184), but it's running very slowly.

;; We could try with a higher order rule

(def successive-boole-refinements (iterate (partial refine booles-rule inverse) errorstructs))
(reduce + (map first   (nth successive-boole-refinements 1000))) ; 4.420942778526893E-15
(reduce + (map second  (nth successive-boole-refinements 1000))) ; 9.210340371976176

;; In this case, that seems to work very well, but the run time is appalling.

;; The problem is that we have a longer and longer list of intervals at every
;; step, and every step, we have to sort this list. That's an n^2 algorithm,
;; which won't scale well.

;; What we should do here is use a priority queue. Clojure doesn't have an
;; immutable version, although it's possible to fake one with a sorted map.

;; But rather than do that, I'm going to drop out of the functional paradigm
;; altogther, and use the heap implementation from Java in a mutable fashion,
;; looping and popping and adding.

(defn improve-loop [rule f a b count]
  (let [pq (java.util.PriorityQueue. count (comparator (fn[a b](> (first a)(first b)))))]
    (.add pq (interval->errorstruct rule f [a b]))
    (loop [pq pq count count]
      (if (zero? count) pq
          (let [[err val [a b]] (.poll pq)
                midpoint (/ (+ a b) 2)
                aa (interval->errorstruct rule f [a midpoint])
                bb (interval->errorstruct rule f [midpoint b])]
            (doto pq
              (.add aa)
              (.add bb))
            (recur pq (dec count)))))))

;; Now we can do our calculation much faster
(defn integrate [rule f a b count]
  (let [pq (improve-loop rule f a b count)]
    [(reduce + (map first pq))
     (reduce + (map second pq))]))

;; We'll ask for a thousand refinements, and get back the error estimate, and the answer.
(integrate booles-rule inverse 0.01 100 1000) ; [4.455637248046429E-15 9.21034037197618]


;; Let's try the same integral over the very nasty range [0.0000001, 10000000] which caused serious
;; problems for our previous methods.
;; The real answer is
(- (Math/log 10000000) (Math/log 0.00000001)) ; 34.538776394910684

;; And our approximations are:
(integrate booles-rule inverse 0.00000001 10000000 10) ; [3.797743055486256E10 3.797743056542089E10]
(integrate booles-rule inverse 0.00000001 10000000 100) ; [3.3430724324184924E-5 34.53877704296225]
(integrate booles-rule inverse 0.00000001 10000000 1000) ; [4.549938203979309E-11 34.53877639491147]
(integrate booles-rule inverse 0.00000001 10000000 10000) ; [9.361001557239845E-16 34.53877639491065]

;; For the non-stiff integrals that we started playing with, Boole's rule is great:
;; It's exact for quadratics, and several higher powers
(integrate booles-rule square 0 2 10) ; [0 8/3]
(integrate booles-rule (fn[x] (* x x x x)) 0 2 10) ; [0 32/5]
(integrate booles-rule (fn[x] (* x x x x x)) 0 2 10) ; [0 32/3]

;; and very good for higher powers, even with very few refinements
(integrate booles-rule (fn[x] (* x x x x x x)) 0 2 10) ; [969/8589934592 471219269093/25769803776]
(integrate booles-rule (fn[x] (* x x x x x x)) 0 2 20) ; [2127/1099511627776 60316066438099/3298534883328]

;; convergence is great for sine
(integrate booles-rule sine 0 Math/PI 10) ; [1.7383848804897184E-9 1.9999999999725113]
(integrate booles-rule sine 0 Math/PI 100) ; [3.1931922384043077E-15 1.9999999999999991]
(integrate booles-rule sine 0 Math/PI 1000) ; [2.526233413538588E-17 1.999999999999999]
(integrate booles-rule sine 0 Math/PI 10000) ; [6.32722651455846E-18 2.0]


;; But I'm still quite worried about the error estimate that we made. It's only
;; a guess, and it can be a bad guess.  Here are some functions that are
;; deliberately designed to screw things up.

;; This function is extremely vibratey near the origin.
(defn sineinverse[x] (Math/sin (/ x)))

;; The error estimates are clearly wrong here, but the answers seem to settle down to something that looks plausible.

(integrate booles-rule sineinverse 0.001 10 1) ; [0.09752245288534744 3.189170812427795]
(integrate booles-rule sineinverse 0.001 10 10) ; [0.014802407142066881 2.725700351059874]
(integrate booles-rule sineinverse 0.001 10 100) ; [2.666579898821515E-4 2.7262259059929814]
(integrate booles-rule sineinverse 0.001 10 1000) ; [7.84363268117651E-9 2.7262019887881457]
(integrate booles-rule sineinverse 0.001 10 10000) ; [8.311387750656713E-15 2.726201989096135]

;; I'm slightly reassured that if we use the trapezium rule, which should be
;; slower converging but less sensitive to high derivatives, we seem to settle down to the same thing:

(integrate trapezium-rule sineinverse 0.001 10 1) ; [0.3493754261290617 2.9603246206316127]
(integrate trapezium-rule sineinverse 0.001 10 10) ; [0.12037643221528535 2.759621850911819]
(integrate trapezium-rule sineinverse 0.001 10 100) ; [0.011584111090323689 2.728470051290524]
(integrate trapezium-rule sineinverse 0.001 10 1000) ; [7.174438961790802E-4 2.7265014884997414]
(integrate trapezium-rule sineinverse 0.001 10 10000) ; [1.0854830311799172E-5 2.7262023437746654]

;; Since I don't actually know what the integral of sin(1/x) is, I've no idea
;; whether this answer is correct. Since both rules seem to settle down to the
;; same answer, I tend to believe that it is.

;; Here's a weird function, which looks like it should be even worse that sin(1/x) on its own
(defn strange[x] (- (Math/sin (/ x)) (/ (Math/cos (/ x)) x)))

;; In fact it's the derivative of x sin(1/x), so we can calculate the real answer over [0.001, 10]
;; which should be:
(- (* 10 (Math/sin 1/10)) (* 0.001 (Math/sin 1000))) ; 0.9975072869277496

;; Interestingly, the error estimates look sound for this one:
(integrate booles-rule strange 0.001 10 1) ; [109.91706304856582 -108.12753277035351]
(integrate booles-rule strange 0.001 10 10) ; [0.07641821305025362 -1.0276123964492345]
(integrate booles-rule strange 0.001 10 100) ; [0.0798435700032961 1.0469088424961843]
(integrate booles-rule strange 0.001 10 1000) ; [2.0359056110968434E-6 0.9975072871949854]
(integrate booles-rule strange 0.001 10 10000) ; [1.9224976990340685E-12 0.997507286927752]

;; Since we seem to have dealt well with some nasty functions, we might be
;; getting confident in our rule. I know I was!

;; But this innocuous looking function
(defn sine80squared[x] (square (Math/sin (* x 80))))

;; Is a complete nightmare. It looks as though the method is converging well:
(integrate booles-rule sine80squared 0 Math/PI 1) ; [1.1091279850485843E-28 3.7074689566598855E-28]
(integrate booles-rule sine80squared 0 Math/PI 10) ; [0.013089969389960716 0.7853981633974437]
(integrate booles-rule sine80squared 0 Math/PI 100) ; [1.7991469747360833E-12 0.7853981633974478]
(integrate booles-rule sine80squared 0 Math/PI 1000) ; [3.207733520089899E-13 0.7853981633974484]
(integrate booles-rule sine80squared 0 Math/PI 10000) ; [3.1095566849572033E-15 0.7853981633974481]

;; But if we use a different rule, it also seems to converge, but to a completely different answer
(integrate trapezium-rule sine80squared 0 Math/PI 1) ; [3.226319244612108E-28 4.262878793991289E-28]
(integrate trapezium-rule sine80squared 0 Math/PI 10) ; [0.01573134053904405 0.19774924859401588]
(integrate trapezium-rule sine80squared 0 Math/PI 100) ; [5.4528883422580115E-5 0.19634904414812832]
(integrate trapezium-rule sine80squared 0 Math/PI 1000) ; [4.6327740574637914E-7 0.19634954102557595]
(integrate trapezium-rule sine80squared 0 Math/PI 10000) ; [5.200068066397881E-9 0.19634954084967793]

;; In fact both answers are wrong. We can calculate the real integral of this
;; function over the interval [0,pi] which should be:

(/ (Math/PI) 2) ; 1.5707963267948966

;; In fact if we use our much earlier 'divide every interval evenly' algorithm:
(defn iterated-rule [rule f a b N]
  (if (= N 0)
    (rule f a b)
    (let [midpoint (+ a (/ (- b a) 2))]
      (+ (iterated-rule rule f a midpoint (dec N))
         (iterated-rule rule f midpoint b (dec N))))))

;; We very quickly get surprisingly good answers:
(iterated-rule trapezium-rule sine80squared 0 Math/PI 1) ; 1.13079223568638E-28
(iterated-rule trapezium-rule sine80squared 0 Math/PI 2) ; 4.262878793991289E-28
(iterated-rule trapezium-rule sine80squared 0 Math/PI 3) ; 3.5626606693542546E-28
(iterated-rule trapezium-rule sine80squared 0 Math/PI 4) ; 3.6535380881685736E-28
(iterated-rule trapezium-rule sine80squared 0 Math/PI 5) ; 1.5707963267948966
(iterated-rule trapezium-rule sine80squared 0 Math/PI 6) ; 1.570796326794897
(iterated-rule trapezium-rule sine80squared 0 Math/PI 7) ; 1.5707963267948972
(iterated-rule trapezium-rule sine80squared 0 Math/PI 8) ; 1.5707963267948966
(iterated-rule trapezium-rule sine80squared 0 Math/PI 9) ; 1.5707963267948957
(iterated-rule trapezium-rule sine80squared 0 Math/PI 10) ; 1.5707963267948963

;; With Booles' rule the story is the same:
(iterated-rule booles-rule sine80squared 0 Math/PI 1) ; 3.1974110932118413E-28
(iterated-rule booles-rule sine80squared 0 Math/PI 2) ; 3.7074689566598855E-28
(iterated-rule booles-rule sine80squared 0 Math/PI 3) ; 2.2340214425527414
(iterated-rule booles-rule sine80squared 0 Math/PI 4) ; 1.5358897417550046
(iterated-rule booles-rule sine80squared 0 Math/PI 5) ; 1.570796326794895
(iterated-rule booles-rule sine80squared 0 Math/PI 6) ; 1.5707963267948961
(iterated-rule booles-rule sine80squared 0 Math/PI 7) ; 1.5707963267948966
(iterated-rule booles-rule sine80squared 0 Math/PI 8) ; 1.5707963267948968
(iterated-rule booles-rule sine80squared 0 Math/PI 9) ; 1.5707963267948963


;; So the nice rule that we'd come up with, which worked so well for the stiff problem
;; of integrating 1/x near the origin, is completely broken on something that the obvious
;; recursion integrates (suspiciously) well.

;; The problem is that we used an error estimate that we can't trust to control
;; the refinement process.

;; If we guess that a certain interval contains hardly any error, then it will
;; never get refined at all, so we'll never find out that our guess is wrong.





Followers