[LWN Logo]
[Timeline]
diff -ur --exclude-from /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.0-test11/drivers/block/DAC960.c linux/drivers/block/DAC960.c
--- /opt/kernel/linux-2.4.0-test11/drivers/block/DAC960.c	Sun Nov 19 15:27:14 2000
+++ linux/drivers/block/DAC960.c	Sat Nov  4 16:15:54 2000
@@ -1825,7 +1825,6 @@
       Request->nr_segments < Controller->DriverScatterGatherLimit)
     {
       Request->nr_segments++;
-      RequestQueue->elevator.nr_segments++;
       return true;
     }
   return false;
@@ -1849,7 +1848,6 @@
       Request->nr_segments < Controller->DriverScatterGatherLimit)
     {
       Request->nr_segments++;
-      RequestQueue->elevator.nr_segments++;
       return true;
     }
   return false;
@@ -1879,7 +1877,6 @@
   if (TotalSegments > MaxSegments ||
       TotalSegments > Controller->DriverScatterGatherLimit)
     return false;
-  RequestQueue->elevator.nr_segments -= SameSegment;
   Request->nr_segments = TotalSegments;
   return true;
 }
diff -ur --exclude-from /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.0-test11/drivers/block/elevator.c linux/drivers/block/elevator.c
--- /opt/kernel/linux-2.4.0-test11/drivers/block/elevator.c	Sun Nov 19 15:27:14 2000
+++ linux/drivers/block/elevator.c	Tue Nov 21 11:18:49 2000
@@ -24,125 +24,111 @@
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
 #include <linux/blk.h>
+#include <linux/module.h>
 #include <asm/uaccess.h>
 
-/*
- * Order ascending, but only allow a request to be skipped a certain
- * number of times
- */
-void elevator_linus(struct request *req, elevator_t *elevator,
-		    struct list_head *real_head,
-		    struct list_head *head, int orig_latency)
-{
-	struct list_head *entry = real_head;
-	struct request *tmp;
-
-	req->elevator_sequence = orig_latency;
-
-	while ((entry = entry->prev) != head) {
-		tmp = blkdev_entry_to_request(entry);
-		if (IN_ORDER(tmp, req))
-			break;
-		if (!tmp->elevator_sequence)
-			break;
-		tmp->elevator_sequence--;
-	}
-	list_add(&req->queue, entry);
-}
-
 int elevator_linus_merge(request_queue_t *q, struct request **req,
+			 struct list_head * head,
 			 struct buffer_head *bh, int rw,
-			 int *max_sectors, int *max_segments)
+			 int max_sectors, int max_segments)
 {
-	struct list_head *entry, *head = &q->queue_head;
+	struct list_head *entry;
 	unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE;
 
-	entry = head;
-	if (q->head_active && !q->plugged)
-		head = head->next;
-
+	entry = &q->queue_head;
 	while ((entry = entry->prev) != head) {
-		struct request *__rq = *req = blkdev_entry_to_request(entry);
+		struct request *__rq = blkdev_entry_to_request(entry);
+
+		/*
+		 * simply "aging" of requests in queue
+		 */
+		if (__rq->elevator_sequence-- <= 0)
+			break;
+
 		if (__rq->sem)
 			continue;
 		if (__rq->cmd != rw)
 			continue;
-		if (__rq->nr_sectors + count > *max_sectors)
-			continue;
 		if (__rq->rq_dev != bh->b_rdev)
 			continue;
+		if (__rq->nr_sectors + count > max_sectors)
+			continue;
+		if (__rq->elevator_sequence < count)
+			break;
 		if (__rq->sector + __rq->nr_sectors == bh->b_rsector) {
 			ret = ELEVATOR_BACK_MERGE;
+			*req = __rq;
 			break;
-		}
-		if (!__rq->elevator_sequence)
-			break;
-		if (__rq->sector - count == bh->b_rsector) {
-			__rq->elevator_sequence--;
+		} else if (__rq->sector - count == bh->b_rsector) {
 			ret = ELEVATOR_FRONT_MERGE;
+			__rq->elevator_sequence -= count;
+			*req = __rq;
 			break;
-		}
+		} else if (BHRQ_IN_ORDER(bh, __rq) && *req == NULL)
+			*req = __rq;
 	}
 
+	return ret;
+}
+
+void elevator_linus_merge_cleanup(request_queue_t *q, struct request *req, int count)
+{
+	struct list_head *entry = &req->queue, *head = &q->queue_head;
+
 	/*
 	 * second pass scan of requests that got passed over, if any
 	 */
-	if (ret != ELEVATOR_NO_MERGE && *req) {
-		while ((entry = entry->next) != &q->queue_head) {
-			struct request *tmp = blkdev_entry_to_request(entry);
-			tmp->elevator_sequence--;
-		}
+	while ((entry = entry->next) != head) {
+		struct request *tmp = blkdev_entry_to_request(entry);
+		tmp->elevator_sequence -= count;
 	}
-
-	return ret;
 }
 
-/*
- * No request sorting, just add it to the back of the list
- */
-void elevator_noop(struct request *req, elevator_t *elevator,
-		   struct list_head *real_head, struct list_head *head,
-		   int orig_latency)
+void elevator_linus_merge_req(struct request *req, struct request *next)
 {
-	list_add_tail(&req->queue, real_head);
+	if (next->elevator_sequence < req->elevator_sequence)
+		req->elevator_sequence = next->elevator_sequence;
 }
 
 /*
- * See if we can find a request that is buffer can be coalesced with.
+ * See if we can find a request that this buffer can be coalesced with.
  */
 int elevator_noop_merge(request_queue_t *q, struct request **req,
+			struct list_head * head,
 			struct buffer_head *bh, int rw,
-			int *max_sectors, int *max_segments)
+			int max_sectors, int max_segments)
 {
-	struct list_head *entry, *head = &q->queue_head;
+	struct list_head *entry;
 	unsigned int count = bh->b_size >> 9;
 
-	if (q->head_active && !q->plugged)
-		head = head->next;
-
-	entry = head;
+	entry = &q->queue_head;
 	while ((entry = entry->prev) != head) {
-		struct request *__rq = *req = blkdev_entry_to_request(entry);
-		if (__rq->sem)
-			continue;
+		struct request *__rq = blkdev_entry_to_request(entry);
+
 		if (__rq->cmd != rw)
 			continue;
-		if (__rq->nr_sectors + count > *max_sectors)
-			continue;
 		if (__rq->rq_dev != bh->b_rdev)
 			continue;
-		if (__rq->sector + __rq->nr_sectors == bh->b_rsector)
+		if (__rq->nr_sectors + count > max_sectors)
+			continue;
+		if (__rq->sem)
+			continue;
+		if (__rq->sector + __rq->nr_sectors == bh->b_rsector) {
+			*req = __rq;
 			return ELEVATOR_BACK_MERGE;
-		if (__rq->sector - count == bh->b_rsector)
+		} else if (__rq->sector - count == bh->b_rsector) {
+			*req = __rq;
 			return ELEVATOR_FRONT_MERGE;
+		}
 	}
+
+	*req = blkdev_entry_to_request(q->queue_head.prev);
 	return ELEVATOR_NO_MERGE;
 }
 
-/*
- * The noop "elevator" does not do any accounting
- */
-void elevator_noop_dequeue(struct request *req) {}
+void elevator_noop_merge_cleanup(request_queue_t *q, struct request *req, int count) {}
+
+void elevator_noop_merge_req(struct request *req, struct request *next) {}
 
 int blkelvget_ioctl(elevator_t * elevator, blkelv_ioctl_arg_t * arg)
 {
diff -ur --exclude-from /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.0-test11/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c
--- /opt/kernel/linux-2.4.0-test11/drivers/block/ll_rw_blk.c	Mon Nov 20 12:37:20 2000
+++ linux/drivers/block/ll_rw_blk.c	Mon Nov 20 14:32:28 2000
@@ -125,7 +125,7 @@
 	return max_sectors[MAJOR(dev)][MINOR(dev)];
 }
 
-static inline request_queue_t *__blk_get_queue(kdev_t dev)
+inline request_queue_t *__blk_get_queue(kdev_t dev)
 {
 	struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
 
@@ -153,17 +153,14 @@
 
 static int __blk_cleanup_queue(struct list_head *head)
 {
-	struct list_head *entry;
 	struct request *rq;
 	int i = 0;
 
 	if (list_empty(head))
 		return 0;
 
-	entry = head->next;
 	do {
-		rq = list_entry(entry, struct request, table);
-		entry = entry->next;
+		rq = list_entry(head->next, struct request, table);
 		list_del(&rq->table);
 		kmem_cache_free(request_cachep, rq);
 		i++;
@@ -191,6 +188,8 @@
 
 	count -= __blk_cleanup_queue(&q->request_freelist[READ]);
 	count -= __blk_cleanup_queue(&q->request_freelist[WRITE]);
+	count -= __blk_cleanup_queue(&q->pending_freelist[READ]);
+	count -= __blk_cleanup_queue(&q->pending_freelist[WRITE]);
 
 	if (count)
 		printk("blk_cleanup_queue: leaked requests (%d)\n", count);
@@ -280,7 +279,6 @@
 {
 	if (req->nr_segments < max_segments) {
 		req->nr_segments++;
-		q->elevator.nr_segments++;
 		return 1;
 	}
 	return 0;
@@ -317,7 +315,6 @@
 	if (total_segments > max_segments)
 		return 0;
 
-	q->elevator.nr_segments -= same_segment;
 	req->nr_segments = total_segments;
 	return 1;
 }
@@ -353,7 +350,7 @@
 	}
 }
 
-static void generic_unplug_device(void *data)
+void generic_unplug_device(void *data)
 {
 	request_queue_t *q = (request_queue_t *) data;
 	unsigned long flags;
@@ -368,13 +365,18 @@
 	struct request *rq;
 	int i;
 
+	INIT_LIST_HEAD(&q->request_freelist[READ]);
+	INIT_LIST_HEAD(&q->request_freelist[WRITE]);
+	INIT_LIST_HEAD(&q->pending_freelist[READ]);
+	INIT_LIST_HEAD(&q->pending_freelist[WRITE]);
+	q->pending_free[READ] = q->pending_free[WRITE] = 0;
+
 	/*
-	 * Divide requests in half between read and write. This used to
-	 * be a 2/3 advantage for reads, but now reads can steal from
-	 * the write free list.
+	 * Divide requests in half between read and write
 	 */
 	for (i = 0; i < QUEUE_NR_REQUESTS; i++) {
 		rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL);
+		memset(rq, 0, sizeof(struct request));
 		rq->rq_status = RQ_INACTIVE;
 		list_add(&rq->table, &q->request_freelist[i & 1]);
 	}
@@ -413,15 +415,13 @@
  *    blk_queue_headactive().
  *
  * Note:
- *    blk_init_queue() must be paired with a blk_cleanup-queue() call
+ *    blk_init_queue() must be paired with a blk_cleanup_queue() call
  *    when the block device is deactivated (such as at module unload).
  **/
 static int __make_request(request_queue_t * q, int rw,  struct buffer_head * bh);
 void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
 {
 	INIT_LIST_HEAD(&q->queue_head);
-	INIT_LIST_HEAD(&q->request_freelist[READ]);
-	INIT_LIST_HEAD(&q->request_freelist[WRITE]);
 	elevator_init(&q->elevator, ELEVATOR_LINUS);
 	blk_init_free_list(q);
 	q->request_fn     	= rfn;
@@ -443,7 +443,6 @@
 	q->head_active    	= 1;
 }
 
-
 #define blkdev_free_rq(list) list_entry((list)->next, struct request, table);
 /*
  * Get a free request. io_request_lock must be held and interrupts
@@ -451,37 +450,16 @@
  */
 static inline struct request *get_request(request_queue_t *q, int rw)
 {
-	struct list_head *list = &q->request_freelist[rw];
-	struct request *rq;
-
-	/*
-	 * Reads get preferential treatment and are allowed to steal
-	 * from the write free list if necessary.
-	 */
-	if (!list_empty(list)) {
-		rq = blkdev_free_rq(list);
-		goto got_rq;
-	}
+	struct request *rq = NULL;
 
-	/*
-	 * if the WRITE list is non-empty, we know that rw is READ
-	 * and that the READ list is empty. allow reads to 'steal'
-	 * from the WRITE list.
-	 */
-	if (!list_empty(&q->request_freelist[WRITE])) {
-		list = &q->request_freelist[WRITE];
-		rq = blkdev_free_rq(list);
-		goto got_rq;
+	if (!list_empty(&q->request_freelist[rw])) {
+		rq = blkdev_free_rq(&q->request_freelist[rw]);
+		list_del(&rq->table);
+		rq->rq_status = RQ_ACTIVE;
+		rq->special = NULL;
+		rq->q = q;
 	}
 
-	return NULL;
-
-got_rq:
-	list_del(&rq->table);
-	rq->free_list = list;
-	rq->rq_status = RQ_ACTIVE;
-	rq->special = NULL;
-	rq->q = q;
 	return rq;
 }
 
@@ -578,16 +556,22 @@
  */
 
 static inline void add_request(request_queue_t * q, struct request * req,
-			       struct list_head *head, int lat)
+			       struct list_head *insert_here)
 {
 	int major;
 
 	drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
 
+	if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
+		spin_unlock_irq(&io_request_lock);
+		BUG();
+	}
+
 	/*
-	 * let selected elevator insert the request
+	 * elevator indicated where it wants this request to be
+	 * inserted at elevator_merge time
 	 */
-	q->elevator.elevator_fn(req, &q->elevator, &q->queue_head, head, lat);
+	list_add(&req->queue, insert_here);
 
         /*
 	 * FIXME(eric) I don't understand why there is a need for this
@@ -605,20 +589,47 @@
 		(q->request_fn)(q);
 }
 
+void inline blk_refill_freelist(request_queue_t *q, int rw)
+{
+	if (q->pending_free[rw]) {
+		list_splice(&q->pending_freelist[rw], &q->request_freelist[rw]);
+		INIT_LIST_HEAD(&q->pending_freelist[rw]);
+		q->pending_free[rw] = 0;
+	}
+}
+
 /*
  * Must be called with io_request_lock held and interrupts disabled
  */
 void inline blkdev_release_request(struct request *req)
 {
+	request_queue_t *q = req->q;
+	int rw = req->cmd;
+
 	req->rq_status = RQ_INACTIVE;
+	req->q = NULL;
 
 	/*
 	 * Request may not have originated from ll_rw_blk
 	 */
-	if (req->free_list) {
-		list_add(&req->table, req->free_list);
-		req->free_list = NULL;
-		wake_up(&req->q->wait_for_request);
+	if (q) {
+		if (!list_empty(&q->request_freelist[rw])) {
+			blk_refill_freelist(q, rw);
+			list_add(&req->table, &q->request_freelist[rw]);
+			return;
+		}
+
+		/*
+		 * free list is empty, add to pending free list and
+		 * batch wakeups
+		 */
+		list_add(&req->table, &q->pending_freelist[rw]);
+
+		if (++q->pending_free[rw] >= (QUEUE_NR_REQUESTS >> 3)) {
+			int wake_up = q->pending_free[rw];
+			blk_refill_freelist(q, rw);
+			wake_up_nr(&q->wait_for_request, wake_up);
+		}
 	}
 }
 
@@ -643,9 +654,10 @@
 	 * will have been updated to the appropriate number,
 	 * and we shouldn't do it here too.
 	 */
-	if(!(q->merge_requests_fn)(q, req, next, max_segments))
+	if(!q->merge_requests_fn(q, req, next, max_segments))
 		return;
 
+	q->elevator.elevator_merge_req_fn(req, next);
 	req->bhtail->b_reqnext = next->bh;
 	req->bhtail = next->bhtail;
 	req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
@@ -683,8 +695,8 @@
 	unsigned int sector, count;
 	int max_segments = MAX_SEGMENTS;
 	struct request * req = NULL, *freereq = NULL;
-	int rw_ahead, max_sectors, el_ret;
-	struct list_head *head = &q->queue_head;
+	int rw_ahead, max_sectors, el_ret, rw_cmd = rw;
+	struct list_head *head, *insert_here;
 	int latency;
 	elevator_t *elevator = &q->elevator;
 
@@ -695,9 +707,10 @@
 	switch (rw) {
 		case READA:
 			rw_ahead = 1;
-			rw = READ;	/* drop into READ */
+			rw_cmd = rw = READ;	/* drop into READ */
 		case READ:
 		case WRITE:
+			latency = elevator_request_latency(elevator, rw);
 			break;
 		default:
 			BUG();
@@ -726,36 +739,31 @@
 	 */
 	max_sectors = get_max_sectors(bh->b_rdev);
 
-	latency = elevator_request_latency(elevator, rw);
-
+again:
 	/*
 	 * Now we acquire the request spinlock, we have to be mega careful
 	 * not to schedule or do something nonatomic
 	 */
-again:
 	spin_lock_irq(&io_request_lock);
 
-	/*
-	 * skip first entry, for devices with active queue head
-	 */
-	if (q->head_active && !q->plugged)
-		head = head->next;
-
+	head = &q->queue_head;
+	insert_here = head->prev;
 	if (list_empty(head)) {
 		q->plug_device_fn(q, bh->b_rdev); /* is atomic */
 		goto get_rq;
-	}
+	} else if (q->head_active && !q->plugged)
+		head = head->next;
 
-	el_ret = elevator->elevator_merge_fn(q, &req, bh, rw, &max_sectors, &max_segments);
+	el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw, max_sectors, max_segments);
 	switch (el_ret) {
 
 		case ELEVATOR_BACK_MERGE:
 			if (!q->back_merge_fn(q, req, bh, max_segments))
 				break;
+			elevator->elevator_merge_cleanup_fn(q, req, count);
 			req->bhtail->b_reqnext = bh;
 			req->bhtail = bh;
 			req->nr_sectors = req->hard_nr_sectors += count;
-			req->e = elevator;
 			drive_stat_acct(req->rq_dev, req->cmd, count, 0);
 			attempt_back_merge(q, req, max_sectors, max_segments);
 			goto out;
@@ -763,20 +771,28 @@
 		case ELEVATOR_FRONT_MERGE:
 			if (!q->front_merge_fn(q, req, bh, max_segments))
 				break;
+			elevator->elevator_merge_cleanup_fn(q, req, count);
 			bh->b_reqnext = req->bh;
 			req->bh = bh;
 			req->buffer = bh->b_data;
 			req->current_nr_sectors = count;
 			req->sector = req->hard_sector = sector;
 			req->nr_sectors = req->hard_nr_sectors += count;
-			req->e = elevator;
 			drive_stat_acct(req->rq_dev, req->cmd, count, 0);
 			attempt_front_merge(q, head, req, max_sectors, max_segments);
 			goto out;
+
 		/*
 		 * elevator says don't/can't merge. get new request
 		 */
 		case ELEVATOR_NO_MERGE:
+			/*
+			 * use elevator hints as to where to insert the
+			 * request. if no hints, just add it to the back
+			 * of the queue
+			 */
+			if (req)
+				insert_here = &req->queue;
 			break;
 
 		default:
@@ -804,7 +820,8 @@
 	}
 
 /* fill up the request-info, and add it to the queue */
-	req->cmd = rw;
+	req->elevator_sequence = latency;
+	req->cmd = rw_cmd;
 	req->errors = 0;
 	req->hard_sector = req->sector = sector;
 	req->hard_nr_sectors = req->nr_sectors = count;
@@ -816,13 +833,12 @@
 	req->bh = bh;
 	req->bhtail = bh;
 	req->rq_dev = bh->b_rdev;
-	req->e = elevator;
-	add_request(q, req, head, latency);
+	add_request(q, req, insert_here);
 out:
-	if (!q->plugged)
-		(q->request_fn)(q);
 	if (freereq)
 		blkdev_release_request(freereq);
+	if (!q->plugged)
+		q->request_fn(q);
 	spin_unlock_irq(&io_request_lock);
 	return 0;
 end_io:
@@ -877,7 +893,6 @@
 			buffer_IO_error(bh);
 			break;
 		}
-
 	}
 	while (q->make_request_fn(q, rw, bh));
 }
@@ -995,6 +1010,8 @@
 	if ((bh = req->bh) != NULL) {
 		nsect = bh->b_size >> 9;
 		req->bh = bh->b_reqnext;
+		if (req->bh && (bh->b_rsector + (bh->b_size >> 9)) != req->bh->b_rsector)
+			printk("%s: %lu is followed by %lu\n", name, bh->b_rsector, req->bh->b_rsector);
 		bh->b_reqnext = NULL;
 		bh->b_end_io(bh, uptodate);
 		if ((bh = req->bh) != NULL) {
@@ -1017,10 +1034,6 @@
 
 void end_that_request_last(struct request *req)
 {
-	if (req->e) {
-		printk("end_that_request_last called with non-dequeued req\n");
-		BUG();
-	}
 	if (req->sem != NULL)
 		up(req->sem);
 
@@ -1158,9 +1171,11 @@
 EXPORT_SYMBOL(end_that_request_last);
 EXPORT_SYMBOL(blk_init_queue);
 EXPORT_SYMBOL(blk_get_queue);
+EXPORT_SYMBOL(__blk_get_queue);
 EXPORT_SYMBOL(blk_cleanup_queue);
 EXPORT_SYMBOL(blk_queue_headactive);
 EXPORT_SYMBOL(blk_queue_pluggable);
 EXPORT_SYMBOL(blk_queue_make_request);
 EXPORT_SYMBOL(generic_make_request);
 EXPORT_SYMBOL(blkdev_release_request);
+EXPORT_SYMBOL(generic_unplug_device);
diff -ur --exclude-from /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.0-test11/drivers/block/paride/pd.c linux/drivers/block/paride/pd.c
--- /opt/kernel/linux-2.4.0-test11/drivers/block/paride/pd.c	Sun Nov 19 15:27:14 2000
+++ linux/drivers/block/paride/pd.c	Sat Nov  4 16:15:54 2000
@@ -392,7 +392,6 @@
 
 	if (req->nr_segments < max_segments) {
 		req->nr_segments++;
-		q->elevator.nr_segments++;
 		return 1;
 	}
 	return 0;
@@ -432,7 +431,6 @@
 	if (total_segments > max_segments)
 		return 0;
 
-	q->elevator.nr_segments -= same_segment;
 	req->nr_segments = total_segments;
 	return 1;
 }
diff -ur --exclude-from /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.0-test11/drivers/block/paride/pf.c linux/drivers/block/paride/pf.c
--- /opt/kernel/linux-2.4.0-test11/drivers/block/paride/pf.c	Sun Nov 19 15:27:14 2000
+++ linux/drivers/block/paride/pf.c	Sat Nov  4 16:15:54 2000
@@ -346,7 +346,6 @@
 
 	if (req->nr_segments < max_segments) {
 		req->nr_segments++;
-		q->elevator.nr_segments++;
 		return 1;
 	}
 	return 0;
@@ -386,7 +385,6 @@
 	if (total_segments > max_segments)
 		return 0;
 
-	q->elevator.nr_segments -= same_segment;
 	req->nr_segments = total_segments;
 	return 1;
 }
diff -ur --exclude-from /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.0-test11/drivers/i2o/i2o_block.c linux/drivers/i2o/i2o_block.c
--- /opt/kernel/linux-2.4.0-test11/drivers/i2o/i2o_block.c	Mon Nov 20 12:37:20 2000
+++ linux/drivers/i2o/i2o_block.c	Fri Nov 10 03:10:35 2000
@@ -392,7 +392,6 @@
 
 	if (req->nr_segments < max_segments) {
 		req->nr_segments++;
-		q->elevator.nr_segments++;
 		return 1;
 	}
 	return 0;
@@ -436,7 +435,6 @@
 	if (total_segments > max_segments)
 		return 0;
 
-	q->elevator.nr_segments -= same_segment;
 	req->nr_segments = total_segments;
 	return 1;
 }
diff -ur --exclude-from /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.0-test11/drivers/ide/ide-disk.c linux/drivers/ide/ide-disk.c
--- /opt/kernel/linux-2.4.0-test11/drivers/ide/ide-disk.c	Sun Nov 19 15:27:14 2000
+++ linux/drivers/ide/ide-disk.c	Sat Nov  4 16:40:59 2000
@@ -293,7 +293,6 @@
 static ide_startstop_t multwrite_intr (ide_drive_t *drive)
 {
 	byte stat;
-	int i;
 	ide_hwgroup_t *hwgroup = HWGROUP(drive);
 	struct request *rq = &hwgroup->wrq;
 
@@ -309,21 +308,8 @@
 				ide_set_handler (drive, &multwrite_intr, WAIT_CMD, NULL);
 				return ide_started;
 			}
-		} else {
-			/*
-			 *	If the copy has all the blocks completed then
-			 *	we can end the original request.
-			 */
-			if (!rq->nr_sectors) {	/* all done? */
-				rq = hwgroup->rq;
-				for (i = rq->nr_sectors; i > 0;){
-					i -= rq->current_nr_sectors;
-					ide_end_request(1, hwgroup);
-				}
-				return ide_stopped;
-			}
 		}
-		return ide_stopped;	/* the original code did this here (?) */
+		return ide_stopped;
 	}
 	return ide_error(drive, "multwrite_intr", stat);
 }
diff -ur --exclude-from /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.0-test11/drivers/ide/ide-dma.c linux/drivers/ide/ide-dma.c
--- /opt/kernel/linux-2.4.0-test11/drivers/ide/ide-dma.c	Sun Nov 19 15:27:14 2000
+++ linux/drivers/ide/ide-dma.c	Sat Nov  4 16:15:54 2000
@@ -224,6 +224,9 @@
 		unsigned char *virt_addr = bh->b_data;
 		unsigned int size = bh->b_size;
 
+		if (nents >= PRD_ENTRIES)
+			return 0;
+
 		while ((bh = bh->b_reqnext) != NULL) {
 			if ((virt_addr + size) != (unsigned char *) bh->b_data)
 				break;
@@ -257,6 +260,9 @@
 
 	HWIF(drive)->sg_nents = i = ide_build_sglist(HWIF(drive), HWGROUP(drive)->rq);
 
+	if (!i)
+		return 0;
+
 	sg = HWIF(drive)->sg_table;
 	while (i && sg_dma_len(sg)) {
 		u32 cur_addr;
@@ -266,7 +272,7 @@
 		cur_len = sg_dma_len(sg);
 
 		while (cur_len) {
-			if (++count >= PRD_ENTRIES) {
+			if (count++ >= PRD_ENTRIES) {
 				printk("%s: DMA table too small\n", drive->name);
 				pci_unmap_sg(HWIF(drive)->pci_dev,
 					     HWIF(drive)->sg_table,
diff -ur --exclude-from /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.0-test11/drivers/ide/ide-probe.c linux/drivers/ide/ide-probe.c
--- /opt/kernel/linux-2.4.0-test11/drivers/ide/ide-probe.c	Sun Nov 19 15:27:14 2000
+++ linux/drivers/ide/ide-probe.c	Mon Nov  6 11:27:53 2000
@@ -134,7 +134,7 @@
 					break;
 				}
 #endif
-				printk ("CDROM");
+				printk ("CD/DVD-ROM");
 				break;
 			case ide_tape:
 				printk ("TAPE");
@@ -761,9 +761,10 @@
 	for (unit = 0; unit < minors; ++unit) {
 		*bs++ = BLOCK_SIZE;
 #ifdef CONFIG_BLK_DEV_PDC4030
-		*max_sect++ = ((hwif->chipset == ide_pdc4030) ? 127 : MAX_SECTORS);
+		*max_sect++ = ((hwif->chipset == ide_pdc4030) ? 127 : 256);
 #else
-		*max_sect++ = MAX_SECTORS;
+		/* IDE can do up to 128K per request. */
+		*max_sect++ = 256;
 #endif
 		*max_ra++ = MAX_READAHEAD;
 	}
diff -ur --exclude-from /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.0-test11/drivers/md/lvm.c linux/drivers/md/lvm.c
--- /opt/kernel/linux-2.4.0-test11/drivers/md/lvm.c	Mon Nov 20 12:37:20 2000
+++ linux/drivers/md/lvm.c	Fri Nov 10 03:10:35 2000
@@ -195,7 +195,7 @@
 #define	DEVICE_REQUEST	lvm_dummy_device_request
 
 static int lvm_make_request_fn(request_queue_t *, int, struct buffer_head*);
-static void lvm_plug_device_noop(request_queue_t *, kdev_t);
+static void lvm_dummy_plug_device(request_queue_t *, kdev_t);
 
 static int lvm_blk_ioctl(struct inode *, struct file *, uint, ulong);
 static int lvm_blk_open(struct inode *, struct file *);
@@ -404,7 +404,7 @@
 
 	blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST);
 	blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_make_request_fn);
-	blk_queue_pluggable(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_plug_device_noop);
+	blk_queue_pluggable(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_dummy_plug_device);
 	/* optional read root VGDA */
 /*
    if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg);
@@ -1493,11 +1493,18 @@
 }
 
 /*
- * plug device function is a noop because plugging has to happen
- * in the queue of the physical blockdevice to allow the
- * elevator to do a better job.
+ * plugging has to happen in the queue of the physical blockdevice
+ * to allow the elevator to do a better job.
  */
-static void lvm_plug_device_noop(request_queue_t *q, kdev_t dev) { }
+static void lvm_dummy_plug_device(request_queue_t *q, kdev_t dev)
+{
+	printk(KERN_EMERG
+	       "%s -- oops, got lvm plug for %02d:%02d [sector: %lu]\n",
+	       lvm_name,
+	       MAJOR(CURRENT->rq_dev),
+	       MINOR(CURRENT->rq_dev),
+	       CURRENT->sector);
+}
 
 /********************************************************************
  *
diff -ur --exclude-from /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.0-test11/drivers/scsi/scsi_lib.c linux/drivers/scsi/scsi_lib.c
--- /opt/kernel/linux-2.4.0-test11/drivers/scsi/scsi_lib.c	Sun Nov 19 15:27:14 2000
+++ linux/drivers/scsi/scsi_lib.c	Sun Nov 19 16:35:00 2000
@@ -50,6 +50,54 @@
  * This entire source file deals with the new queueing code.
  */
 
+/*
+ * Function:	__scsi_insert_special()
+ *
+ * Purpose:	worker for scsi_insert_special_*()
+ *
+ * Arguments:	q - request queue where request should be inserted
+ *		rq - request to be inserted
+ * 		data - private data
+ *		at_head - insert request at head or tail of queue
+ *
+ * Lock status:	Assumed that io_request_lock is not held upon entry.
+ *
+ * Returns:	Nothing
+ *
+ * Notes:	Makes no guarentee as to when the request will be executed.
+ *		Requests stored at the end of queue are guarenteed not to
+ *		be resorted and postponed.
+ */
+static void __scsi_insert_special(request_queue_t *q, struct request *rq,
+				  void *data, int at_head)
+{
+	unsigned long flags;
+
+	ASSERT_LOCK(&io_request_lock, 0);
+
+	rq->cmd = SPECIAL;
+	rq->special = data;
+	rq->q = NULL;
+	rq->nr_segments = 0;
+	rq->elevator_sequence = 0;
+
+	/*
+	 * We have the option of inserting the head or the tail of the queue.
+	 * Typically we use the tail for new ioctls and so forth.  We use the
+	 * head of the queue for things like a QUEUE_FULL message from a
+	 * device, or a host that is unable to accept a particular command.
+	 */
+	spin_lock_irqsave(&io_request_lock, flags);
+	q->plug_device_fn(q, 0);
+
+	if (at_head)
+		list_add(&rq->queue, &q->queue_head);
+	else
+		list_add_tail(&rq->queue, &q->queue_head);
+
+	spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
 
 /*
  * Function:    scsi_insert_special_cmd()
@@ -73,52 +121,9 @@
  */
 int scsi_insert_special_cmd(Scsi_Cmnd * SCpnt, int at_head)
 {
-	unsigned long flags;
-	request_queue_t *q;
-
-	ASSERT_LOCK(&io_request_lock, 0);
-
-	/*
-	 * The SCpnt already contains a request structure - we will doctor the
-	 * thing up with the appropriate values and use that in the actual
-	 * request queue.
-	 */
-	q = &SCpnt->device->request_queue;
-	SCpnt->request.cmd = SPECIAL;
-	SCpnt->request.special = (void *) SCpnt;
-	SCpnt->request.q = NULL;
-	SCpnt->request.free_list = NULL;
-	SCpnt->request.nr_segments = 0;
-
-	/*
-	 * We have the option of inserting the head or the tail of the queue.
-	 * Typically we use the tail for new ioctls and so forth.  We use the
-	 * head of the queue for things like a QUEUE_FULL message from a
-	 * device, or a host that is unable to accept a particular command.
-	 */
-	spin_lock_irqsave(&io_request_lock, flags);
-
-	if (at_head) {
-		list_add(&SCpnt->request.queue, &q->queue_head);
-	} else {
-		/*
-		 * FIXME(eric) - we always insert at the tail of the
-		 * list.  Otherwise ioctl commands would always take
-		 * precedence over normal I/O.  An ioctl on a busy
-		 * disk might be delayed indefinitely because the
-		 * request might not float high enough in the queue
-		 * to be scheduled.
-		 */
-		list_add_tail(&SCpnt->request.queue, &q->queue_head);
-	}
+	request_queue_t *q = &SCpnt->device->request_queue;
 
-	/*
-	 * Now hit the requeue function for the queue.  If the host is
-	 * already busy, so be it - we have nothing special to do.  If
-	 * the host can queue it, then send it off.  
-	 */
-	q->request_fn(q);
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	__scsi_insert_special(q, &SCpnt->request, SCpnt, at_head);
 	return 0;
 }
 
@@ -144,51 +149,9 @@
  */
 int scsi_insert_special_req(Scsi_Request * SRpnt, int at_head)
 {
-	unsigned long flags;
-	request_queue_t *q;
-
-	ASSERT_LOCK(&io_request_lock, 0);
-
-	/*
-	 * The SCpnt already contains a request structure - we will doctor the
-	 * thing up with the appropriate values and use that in the actual
-	 * request queue.
-	 */
-	q = &SRpnt->sr_device->request_queue;
-	SRpnt->sr_request.cmd = SPECIAL;
-	SRpnt->sr_request.special = (void *) SRpnt;
-	SRpnt->sr_request.q = NULL;
-	SRpnt->sr_request.nr_segments = 0;
-
-	/*
-	 * We have the option of inserting the head or the tail of the queue.
-	 * Typically we use the tail for new ioctls and so forth.  We use the
-	 * head of the queue for things like a QUEUE_FULL message from a
-	 * device, or a host that is unable to accept a particular command.
-	 */
-	spin_lock_irqsave(&io_request_lock, flags);
+	request_queue_t *q = &SRpnt->sr_device->request_queue;
 
-	if (at_head) {
-		list_add(&SRpnt->sr_request.queue, &q->queue_head);
-	} else {
-		/*
-		 * FIXME(eric) - we always insert at the tail of the
-		 * list.  Otherwise ioctl commands would always take
-		 * precedence over normal I/O.  An ioctl on a busy
-		 * disk might be delayed indefinitely because the
-		 * request might not float high enough in the queue
-		 * to be scheduled.
-		 */
-		list_add_tail(&SRpnt->sr_request.queue, &q->queue_head);
-	}
-
-	/*
-	 * Now hit the requeue function for the queue.  If the host is
-	 * already busy, so be it - we have nothing special to do.  If
-	 * the host can queue it, then send it off.  
-	 */
-	q->request_fn(q);
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	__scsi_insert_special(q, &SRpnt->sr_request, SRpnt, at_head);
 	return 0;
 }
 
@@ -896,10 +859,11 @@
 		    || (SHpnt->host_blocked) 
 		    || (SHpnt->host_self_blocked)) {
 			/*
-			 * If we are unable to process any commands at all for this
-			 * device, then we consider it to be starved.  What this means
-			 * is that there are no outstanding commands for this device
-			 * and hence we need a little help getting it started again
+			 * If we are unable to process any commands at all for
+			 * this device, then we consider it to be starved.
+			 * What this means is that there are no outstanding
+			 * commands for this device and hence we need a
+			 * little help getting it started again
 			 * once the host isn't quite so busy.
 			 */
 			if (SDpnt->device_busy == 0) {
@@ -1000,8 +964,8 @@
 			}
 			/*
 			 * If so, we are ready to do something.  Bump the count
-			 * while the queue is locked and then break out of the loop.
-			 * Otherwise loop around and try another request.
+			 * while the queue is locked and then break out of the
+			 * loop. Otherwise loop around and try another request.
 			 */
 			if (!SCpnt) {
 				break;
@@ -1029,8 +993,9 @@
 			memcpy(&SCpnt->request, req, sizeof(struct request));
 
 			/*
-			 * We have copied the data out of the request block - it is now in
-			 * a field in SCpnt.  Release the request block.
+			 * We have copied the data out of the request block -
+			 * it is now in a field in SCpnt.  Release the request
+			 * block.
 			 */
 			blkdev_release_request(req);
 		}
@@ -1047,12 +1012,14 @@
 			/*
 			 * This will do a couple of things:
 			 *  1) Fill in the actual SCSI command.
-			 *  2) Fill in any other upper-level specific fields (timeout).
+			 *  2) Fill in any other upper-level specific fields
+			 * (timeout).
 			 *
-			 * If this returns 0, it means that the request failed (reading
-			 * past end of disk, reading offline device, etc).   This won't
-			 * actually talk to the device, but some kinds of consistency
-			 * checking may cause the request to be rejected immediately.
+			 * If this returns 0, it means that the request failed
+			 * (reading past end of disk, reading offline device,
+			 * etc).   This won't actually talk to the device, but
+			 * some kinds of consistency checking may cause the	
+			 * request to be rejected immediately.
 			 */
 			if (STpnt == NULL) {
 				STpnt = scsi_get_request_dev(req);
@@ -1103,8 +1070,8 @@
 		scsi_dispatch_cmd(SCpnt);
 
 		/*
-		 * Now we need to grab the lock again.  We are about to mess with
-		 * the request queue and try to find another command.
+		 * Now we need to grab the lock again.  We are about to mess
+		 * with the request queue and try to find another command.
 		 */
 		spin_lock_irq(&io_request_lock);
 	}
diff -ur --exclude-from /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.0-test11/drivers/scsi/scsi_merge.c linux/drivers/scsi/scsi_merge.c
--- /opt/kernel/linux-2.4.0-test11/drivers/scsi/scsi_merge.c	Sun Nov 19 15:27:14 2000
+++ linux/drivers/scsi/scsi_merge.c	Mon Nov 20 14:44:03 2000
@@ -324,7 +324,6 @@
 	    req->nr_segments >= SHpnt->sg_tablesize)
 		return 0;
 	req->nr_segments++;
-	q->elevator.nr_segments++;
 	return 1;
 }
 
@@ -341,11 +340,8 @@
 	if (req->nr_hw_segments >= SHpnt->sg_tablesize ||
 	     req->nr_segments >= SHpnt->sg_tablesize)
 		return 0;
-	if (req->nr_segments >= max_segments)
-		return 0;
 	req->nr_hw_segments++;
 	req->nr_segments++;
-	q->elevator.nr_segments++;
 	return 1;
 }
 #else
@@ -361,7 +357,6 @@
 		 * counter.
 		 */
 		req->nr_segments++;
-		q->elevator.nr_segments++;
 		return 1;
 	} else {
 		return 0;
@@ -417,8 +412,10 @@
 	SDpnt = (Scsi_Device *) q->queuedata;
 	SHpnt = SDpnt->host;
 
+#ifdef DMA_CHUNK_SIZE
 	if (max_segments > 64)
 		max_segments = 64;
+#endif
 
 	if (use_clustering) {
 		/* 
@@ -471,8 +468,10 @@
 	SDpnt = (Scsi_Device *) q->queuedata;
 	SHpnt = SDpnt->host;
 
+#ifdef DMA_CHUNK_SIZE
 	if (max_segments > 64)
 		max_segments = 64;
+#endif
 
 	if (use_clustering) {
 		/* 
@@ -601,10 +600,10 @@
 	SDpnt = (Scsi_Device *) q->queuedata;
 	SHpnt = SDpnt->host;
 
+#ifdef DMA_CHUNK_SIZE
 	if (max_segments > 64)
 		max_segments = 64;
 
-#ifdef DMA_CHUNK_SIZE
 	/* If it would not fit into prepared memory space for sg chain,
 	 * then don't allow the merge.
 	 */
@@ -664,7 +663,6 @@
 			 * This one is OK.  Let it go.
 			 */
 			req->nr_segments += next->nr_segments - 1;
-			q->elevator.nr_segments--;
 #ifdef DMA_CHUNK_SIZE
 			req->nr_hw_segments += next->nr_hw_segments - 1;
 #endif
diff -ur --exclude-from /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.0-test11/include/linux/blk.h linux/include/linux/blk.h
--- /opt/kernel/linux-2.4.0-test11/include/linux/blk.h	Mon Nov 20 12:37:22 2000
+++ linux/include/linux/blk.h	Mon Nov 20 12:39:04 2000
@@ -87,10 +87,6 @@
 
 static inline void blkdev_dequeue_request(struct request * req)
 {
-	if (req->e) {
-		req->e->dequeue_fn(req);
-		req->e = NULL;
-	}
 	list_del(&req->queue);
 }
 
diff -ur --exclude-from /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.0-test11/include/linux/blkdev.h linux/include/linux/blkdev.h
--- /opt/kernel/linux-2.4.0-test11/include/linux/blkdev.h	Sun Nov 19 15:27:14 2000
+++ linux/include/linux/blkdev.h	Mon Nov 20 12:39:04 2000
@@ -23,8 +23,6 @@
 	int elevator_sequence;
 	struct list_head table;
 
-	struct list_head *free_list;
-
 	volatile int rq_status;	/* should split this into a few status bits */
 #define RQ_INACTIVE		(-1)
 #define RQ_ACTIVE		1
@@ -47,7 +45,6 @@
 	struct buffer_head * bh;
 	struct buffer_head * bhtail;
 	request_queue_t *q;
-	elevator_t *e;
 };
 
 #include <linux/elevator.h>
@@ -77,6 +74,8 @@
 	 * the queue request freelist, one for reads and one for writes
 	 */
 	struct list_head	request_freelist[2];
+	struct list_head	pending_freelist[2];
+	int			pending_free[2];
 
 	/*
 	 * Together with queue_head for cacheline sharing
@@ -152,6 +151,7 @@
 extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size);
 extern void generic_make_request(int rw, struct buffer_head * bh);
 extern request_queue_t *blk_get_queue(kdev_t dev);
+extern inline request_queue_t *__blk_get_queue(kdev_t dev);
 extern void blkdev_release_request(struct request *);
 
 /*
@@ -162,6 +162,7 @@
 extern void blk_queue_headactive(request_queue_t *, int);
 extern void blk_queue_pluggable(request_queue_t *, plug_device_fn *);
 extern void blk_queue_make_request(request_queue_t *, make_request_fn *);
+extern void generic_unplug_device(void *);
 
 extern int * blk_size[MAX_BLKDEV];
 
@@ -175,9 +176,8 @@
 
 extern int * max_segments[MAX_BLKDEV];
 
-#define MAX_SECTORS 254
-
-#define MAX_SEGMENTS MAX_SECTORS
+#define MAX_SEGMENTS 128
+#define MAX_SECTORS (MAX_SEGMENTS*8)
 
 #define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK)
 
diff -ur --exclude-from /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.0-test11/include/linux/elevator.h linux/include/linux/elevator.h
--- /opt/kernel/linux-2.4.0-test11/include/linux/elevator.h	Sun Nov 19 15:27:14 2000
+++ linux/include/linux/elevator.h	Sat Nov  4 16:15:55 2000
@@ -7,34 +7,32 @@
 			    struct list_head *,
 			    struct list_head *, int);
 
-typedef int (elevator_merge_fn) (request_queue_t *, struct request **,
-				 struct buffer_head *, int, int *, int *);
+typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *,
+				 struct buffer_head *, int, int, int);
 
-typedef void (elevator_dequeue_fn) (struct request *);
+typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int);
+
+typedef void (elevator_merge_req_fn) (struct request *, struct request *);
 
 struct elevator_s
 {
-	int sequence;
-
 	int read_latency;
 	int write_latency;
-	int max_bomb_segments;
 
-	unsigned int nr_segments;
-	int read_pendings;
-
-	elevator_fn * elevator_fn;
 	elevator_merge_fn *elevator_merge_fn;
-	elevator_dequeue_fn *dequeue_fn;
+	elevator_merge_cleanup_fn *elevator_merge_cleanup_fn;
+	elevator_merge_req_fn *elevator_merge_req_fn;
 
 	unsigned int queue_ID;
 };
 
-void elevator_noop(struct request *, elevator_t *, struct list_head *, struct list_head *, int);
-int elevator_noop_merge(request_queue_t *, struct request **, struct buffer_head *, int, int *, int *);
-void elevator_noop_dequeue(struct request *);
-void elevator_linus(struct request *, elevator_t *, struct list_head *, struct list_head *, int);
-int elevator_linus_merge(request_queue_t *, struct request **, struct buffer_head *, int, int *, int *);
+int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int, int);
+void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int);
+void elevator_noop_merge_req(struct request *, struct request *);
+
+int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int, int);
+void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int);
+void elevator_linus_merge_req(struct request *, struct request *);
 
 typedef struct blkelv_ioctl_arg_s {
 	int queue_ID;
@@ -69,6 +67,10 @@
 	   (s1)->sector < (s2)->sector)) ||	\
 	 (s1)->rq_dev < (s2)->rq_dev)
 
+#define BHRQ_IN_ORDER(bh, rq)			\
+	(((bh)->b_rdev == (rq)->rq_dev &&	\
+	  (bh)->b_rsector < (rq)->sector))
+
 static inline int elevator_request_latency(elevator_t * elevator, int rw)
 {
 	int latency;
@@ -80,36 +82,24 @@
 	return latency;
 }
 
-#define ELEVATOR_NOOP						\
-((elevator_t) {							\
-	0,				/* sequence */		\
-								\
-	0,				/* read_latency */	\
-	0,				/* write_latency */	\
-	0,				/* max_bomb_segments */	\
-								\
-	0,				/* nr_segments */	\
-	0,				/* read_pendings */	\
-								\
-	elevator_noop,			/* elevator_fn */	\
-	elevator_noop_merge,		/* elevator_merge_fn */ \
-	elevator_noop_dequeue,		/* dequeue_fn */	\
+#define ELEVATOR_NOOP							\
+((elevator_t) {								\
+	0,				/* read_latency */		\
+	0,				/* write_latency */		\
+									\
+	elevator_noop_merge,		/* elevator_merge_fn */		\
+	elevator_noop_merge_cleanup,	/* elevator_merge_cleanup_fn */	\
+	elevator_noop_merge_req,	/* elevator_merge_req_fn */	\
 	})
 
-#define ELEVATOR_LINUS						\
-((elevator_t) {							\
-	0,				/* not used */		\
-								\
-	1000000,				/* read passovers */	\
-	2000000,				/* write passovers */	\
-	0,				/* max_bomb_segments */	\
-								\
-	0,				/* not used */		\
-	0,				/* not used */		\
-								\
-	elevator_linus,			/* elevator_fn */	\
-	elevator_linus_merge,		/* elevator_merge_fn */ \
-	elevator_noop_dequeue,		/* dequeue_fn */	\
+#define ELEVATOR_LINUS							\
+((elevator_t) {								\
+	8192,				/* read passovers */		\
+	16384,				/* write passovers */		\
+									\
+	elevator_linus_merge,		/* elevator_merge_fn */		\
+	elevator_linus_merge_cleanup,	/* elevator_merge_cleanup_fn */	\
+	elevator_linus_merge_req,	/* elevator_merge_req_fn */	\
 	})
 
 #endif
diff -ur --exclude-from /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.0-test11/include/linux/sched.h linux/include/linux/sched.h
--- /opt/kernel/linux-2.4.0-test11/include/linux/sched.h	Mon Nov 20 12:37:22 2000
+++ linux/include/linux/sched.h	Mon Nov 20 12:39:04 2000
@@ -534,8 +534,8 @@
 
 #define CURRENT_TIME (xtime.tv_sec)
 
-extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode));
-extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode));
+extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr));
+extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
 extern void FASTCALL(sleep_on(wait_queue_head_t *q));
 extern long FASTCALL(sleep_on_timeout(wait_queue_head_t *q,
 				      signed long timeout));
@@ -544,12 +544,16 @@
 						    signed long timeout));
 extern void FASTCALL(wake_up_process(struct task_struct * tsk));
 
-#define wake_up(x)			__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,WQ_FLAG_EXCLUSIVE)
-#define wake_up_all(x)			__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,0)
-#define wake_up_sync(x)			__wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,WQ_FLAG_EXCLUSIVE)
-#define wake_up_interruptible(x)	__wake_up((x),TASK_INTERRUPTIBLE,WQ_FLAG_EXCLUSIVE)
-#define wake_up_interruptible_all(x)	__wake_up((x),TASK_INTERRUPTIBLE,0)
-#define wake_up_interruptible_sync(x)	__wake_up_sync((x),TASK_INTERRUPTIBLE,WQ_FLAG_EXCLUSIVE)
+#define wake_up(x)			__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
+#define wake_up_nr(x, nr)		__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
+#define wake_up_all(x)			__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0)
+#define wake_up_sync(x)			__wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
+#define wake_up_sync_nr(x, nr)		__wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
+#define wake_up_interruptible(x)	__wake_up((x),TASK_INTERRUPTIBLE, 1)
+#define wake_up_interruptible_nr(x, nr)	__wake_up((x),TASK_INTERRUPTIBLE, nr)
+#define wake_up_interruptible_all(x)	__wake_up((x),TASK_INTERRUPTIBLE, 0)
+#define wake_up_interruptible_sync(x)	__wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
+#define wake_up_interruptible_sync_nr(x) __wake_up_sync((x),TASK_INTERRUPTIBLE,  nr)
 
 extern int in_group_p(gid_t);
 extern int in_egroup_p(gid_t);
diff -ur --exclude-from /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.0-test11/kernel/sched.c linux/kernel/sched.c
--- /opt/kernel/linux-2.4.0-test11/kernel/sched.c	Mon Nov 20 12:37:22 2000
+++ linux/kernel/sched.c	Sun Nov 19 15:27:04 2000
@@ -700,19 +700,15 @@
 }
 
 static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
-				     unsigned int wq_mode, const int sync)
+			 	     int nr_exclusive, const int sync)
 {
 	struct list_head *tmp, *head;
-	struct task_struct *p, *best_exclusive;
+	struct task_struct *p;
 	unsigned long flags;
-	int best_cpu, irq;
 
 	if (!q)
 		goto out;
 
-	best_cpu = smp_processor_id();
-	irq = in_interrupt();
-	best_exclusive = NULL;
 	wq_write_lock_irqsave(&q->lock, flags);
 
 #if WAITQUEUE_DEBUG
@@ -740,47 +736,27 @@
 #if WAITQUEUE_DEBUG
 			curr->__waker = (long)__builtin_return_address(0);
 #endif
-			/*
-			 * If waking up from an interrupt context then
-			 * prefer processes which are affine to this
-			 * CPU.
-			 */
-			if (irq && (curr->flags & wq_mode & WQ_FLAG_EXCLUSIVE)) {
-				if (!best_exclusive)
-					best_exclusive = p;
-				if (p->processor == best_cpu) {
-					best_exclusive = p;
-					break;
-				}
-			} else {
-				if (sync)
-					wake_up_process_synchronous(p);
-				else
-					wake_up_process(p);
-				if (curr->flags & wq_mode & WQ_FLAG_EXCLUSIVE)
-					break;
-			}
+			if (sync)
+				wake_up_process_synchronous(p);
+			else
+				wake_up_process(p);
+			if ((curr->flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
+				break;
 		}
 	}
-	if (best_exclusive) {
-		if (sync)
-			wake_up_process_synchronous(best_exclusive);
-		else
-			wake_up_process(best_exclusive);
-	}
 	wq_write_unlock_irqrestore(&q->lock, flags);
 out:
 	return;
 }
 
-void __wake_up(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode)
+void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr)
 {
-	__wake_up_common(q, mode, wq_mode, 0);
+	__wake_up_common(q, mode, nr, 0);
 }
 
-void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode)
+void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr)
 {
-	__wake_up_common(q, mode, wq_mode, 1);
+	__wake_up_common(q, mode, nr, 1);
 }
 
 #define	SLEEP_ON_VAR				\
diff -ur --exclude-from /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.0-test11/mm/filemap.c linux/mm/filemap.c
--- /opt/kernel/linux-2.4.0-test11/mm/filemap.c	Mon Nov 20 12:37:22 2000
+++ linux/mm/filemap.c	Sat Nov 18 10:12:23 2000
@@ -263,7 +263,7 @@
 	 */
 	age_page_up(page);
 	if (inactive_shortage() > inactive_target / 2 && free_shortage())
-			wakeup_kswapd(0);
+		wakeup_kswapd(0);
 not_found:
 	return page;
 }
@@ -855,10 +855,6 @@
  *   accessed sequentially.
  */
 	if (ahead) {
-		if (reada_ok == 2) {
-			run_task_queue(&tq_disk);
-		}
-
 		filp->f_ralen += ahead;
 		filp->f_rawin += filp->f_ralen;
 		filp->f_raend = raend + ahead + 1;