Index: linux/Documentation/DocBook/Makefile
===================================================================
--- linux.orig/Documentation/DocBook/Makefile
+++ linux/Documentation/DocBook/Makefile
@@ -10,7 +10,7 @@ DOCBOOKS := wanbook.xml z8530book.xml mc
kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
procfs-guide.xml writing_usb_driver.xml \
sis900.xml kernel-api.xml journal-api.xml lsm.xml usb.xml \
- gadget.xml libata.xml mtdnand.xml librs.xml
+ gadget.xml libata.xml mtdnand.xml librs.xml genericirq.xml
###
# The build process is as follows (targets):
Index: linux/Documentation/DocBook/genericirq.tmpl
===================================================================
--- /dev/null
+++ linux/Documentation/DocBook/genericirq.tmpl
@@ -0,0 +1,560 @@
+
+
+
+
+
+ Linux generic IRQ handling
+
+
+
+ Thomas
+ Gleixner
+
+
+ tglx@linutronix.de
+
+
+
+
+ Ingo
+ Molnar
+
+
+ mingo@elte.hu
+
+
+
+
+
+
+ 2005
+ Thomas Gleixner
+
+
+ 2005
+ Ingo Molnar
+
+
+
+
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License version 2 as published by the Free Software Foundation.
+
+
+
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+
+
+
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+
+
+
+ For more details see the file COPYING in the source
+ distribution of Linux.
+
+
+
+
+
+
+
+ Introduction
+
+ The generic interrupt handling layer is designed to provide a
+ complete abstraction of interrupt handling for device drivers
+ and is able to handle all different types of interrupt controller
+ hardware. Device drivers use generic API function to request, enable,
+ disable and free interrupts. The drivers do not have to know anything
+ about interrupt hardware, so they can be used on different hardware
+ platforms without code changes.
+
+
+ This documentation is provided for developers who want to implement
+ architecture interrupt support based on the Generic IRQ handling layer.
+
+
+
+
+ Rationale
+
+ The original implementation of interrupt handling in Linux is using
+ the __do_IRQ() super-handler, which must be able to deal with every
+ type of interrupt logic. This is achieved by an 'interrupt type'
+ structure and runtime flags to handle special cases.
+ Furthermore the superhandler assumed a certain type of interrupt
+ handling hardware and turned out to be not capable of handling all
+ kind of interrupt controller hardware which can be found through
+ the architectures. The all in one approach also adds unnecessary
+ complexity for every user.
+
+
+ Originally, Russell King identified different types of handlers to
+ build a quite universal set for the ARM interrupt handler
+ implementation in Linux 2.5/2.6. He distiguished between:
+
+ Level type
+ Edge type
+ Simple type
+
+ In the SMP world of the __do_IRQ() super-handler another type
+ was identified:
+
+ Per CPU type
+
+
+
+ This split implementation of handlers allows to optimize the flow
+ of the interrupt handling for each specific interrupt type.
+ This reduces complexitiy in that particular code path and allows
+ the optimized handling of a given type.
+
+
+ The original general implementation uses interrupt_type structures
+ to differentiate the flow control in the super-handler. This
+ leads to a mix of flow logic and code related to hardware details.
+ Russell Kings ARM implementation which replaced the type by a chip
+ abstraction did the mix the other way around.
+
+
+ The natural conclusion was a clean seperation of the 'type flow'
+ and the 'chip'. Analysing a couple of architecture implementations
+ reveals that many of them can use a generic set of 'type flow'
+ implementations and only need to add the chip level specific code.
+ The seperation is also valuable for the (sub)architectures,
+ which need specific quirks in the type flow itself, because it
+ provides a more transparent design.
+
+
+ Each interrupt type implementation has assigned its own flow
+ handler, which should be normally one of the generic
+ implementations. The flow handler implementation makes it
+ simple to provide demultiplexing handlers which can be found in
+ embedded platforms on various architectures.
+
+
+ The seperation makes the generic interrupt handling more flexible
+ and extensible. An (sub)architecture can use a generic type flow
+ implementation for e.g. 'level type' interrupts and add a
+ (sub)architecture specific 'edge type' implementation.
+
+
+ To make the transition to the new model easier and prevent the
+ breakage of existing implementations the __do_IRQ() super-handler
+ is still available. This leads to a kind of duality for the time
+ being. Over time the new model should achieve a homogeneous
+ implementation scheme over all architectures with enhanced
+ maintainability and cleanliness.
+
+
+
+ Known Bugs And Assumptions
+
+ None (hopefully).
+
+
+
+
+ Abstraction layers
+
+ There are three main levels of abstraction in the interrupt code:
+
+ Highlevel driver API
+ Abstract interrupt type
+ Chiplevel hardware encapsulation
+
+
+
+ The seperation of interrupt type and chip level functionality
+ provides the most flexible design. This implementation can handle
+ all kinds of interrupt hardware and the necessary workarounds for
+ the interrupt types without the need of redundant implementations.
+ The seperation handles also edge and level type interrupts
+ on the same hardware chip.
+
+
+ Interrupt control flow
+
+ Each interrupt is described by an interrupt description structure
+ irq_desc. The interrupt is referenced by an 'unsigned int' numeric
+ value which selects the corresponding interrupt decription structure
+ in the description structures array.
+ The description structure contains status information and pointers
+ to the interrupt type structure and the interrupt chip structure
+ which are assigned to this interrupt.
+
+
+ Whenever an interrupt triggers, the lowlevel arch code calls into
+ the generic interrupt code by calling desc->handler->handle_irq().
+ This highlevel IRQ handling function only uses other
+ desc->handler primitives which describe the control flow operation
+ necessary for the interrupt type. These operations are calling
+ the chip primitives referenced by the assigned chip description
+ structure.
+
+
+
+ Highlevel Driver API
+
+ The highlevel Driver API consists of following functions:
+
+ request_irq()
+ free_irq()
+ disable_irq()
+ enable_irq()
+ disable_irq_nosync() (SMP only)
+ synchronize_irq() (SMP only)
+ set_irq_type()
+ set_irq_wake()
+ set_irq_data()
+ set_irq_chip()
+ set_irq_chip_data()
+
+ See the autogenerated function documentation for details.
+
+
+
+ Abstract interrupt type
+
+ The 'interrupt type' (struct irq_type) abstraction mainly consists of
+ methods which implement the 'interrupt handling flow'. The generic
+ layer provides a set of pre-defined types:
+
+ default_level_type
+ default_edge_type
+ default_simple_type
+ default_percpu_type
+
+ The default type implementations use the generic type handlers.
+
+ handle_level_type
+ handle_edge_type
+ handle_simple_type
+ handle_percpu_type
+
+ The interrupt types (either predefined or architecture specific) are
+ assigned to specific interrupts by the architecture either during
+ bootup or during device initialization.
+
+
+ Default type implementations
+
+ Helper functions
+
+ The helper functions call the chip primitives and
+ are used by the default type implementations.
+ Following helper functions are implemented (simplified excerpt):
+
+default_enable(irq)
+{
+ desc->chip->unmask(irq);
+}
+
+default_disable(irq)
+{
+ desc->chip->mask(irq);
+}
+
+default_ack(irq)
+{
+ chip->ack(irq);
+}
+
+default_mask_ack(irq)
+{
+ if (chip->mask_ack) {
+ chip->mask_ack(irq);
+ } else {
+ chip->mask(irq);
+ chip->ack(irq);
+ }
+}
+
+noop(irq)
+{
+}
+
+default_set_type(irq, type)
+{
+ if (desc->chip->set_type) {
+ if (desc->chip->set_type(irq, type))
+ return NULL;
+ }
+
+ return default_handler for type;
+}
+
+
+
+
+ Default Level IRQ type
+
+ The default Level IRQ type implements the functions
+
+ enabledefault_enable
+ disabledefault_disable
+ startdefault_mask_ack
+ enddefault_enable
+ handle_irqhandle_level_irq
+ set_typedefault_set_type
+
+
+
+
+ Default Edge IRQ type
+
+ The default Edge IRQ type implements the functions
+
+ enabledefault_enable
+ disabledefault_disable
+ startdefault_ack
+ holddefault_mask_ack
+ endnoop
+ handle_irqhandle_edge_irq
+ set_typedefault_set_type
+
+
+
+
+ Default simple IRQ type
+
+ The default simple IRQ type implements the functions
+
+ enablenoop
+ disablenoop
+ handle_irqhandle_simple_irq
+
+
+
+
+ Default per CPU IRQ type
+
+ The default per CPU IRQ type implements the functions
+
+ enabledefault_enable
+ disabledefault_disable
+ startdefault_ack
+ enddefault_enable
+ handle_irqhandle_percpu_irq
+
+
+
+
+
+ Default type handler implementations
+
+ Default Level IRQ type handler
+
+ handle_level_type provides a generic implementation
+ for level type interrupts.
+
+
+ Following control flow is implemented (simplified excerpt):
+
+desc->handler->start();
+handle_IRQ_event(desc->action);
+desc->handler->end();
+
+
+
+
+ Default Edge IRQ type handler
+
+ handle_edge_type provides a generic implementation
+ for edge type interrupts.
+
+
+ Following control flow is implemented (simplified excerpt):
+
+if (desc->status & running) {
+ desc->handler->hold();
+ desc->status |= pending | masked;
+ return;
+}
+desc->handler->start();
+desc->status |= running;
+do {
+ if (desc->status & masked)
+ desc->handler->enable();
+ desc-status &= ~pending;
+ handle_IRQ_event(desc->action);
+} while (status & pending);
+desc-status &= ~running;
+desc->handler->end();
+
+
+
+
+ Default simple IRQ type handler
+
+ handle_simple_type provides a generic implementation
+ for simple type interrupts.
+
+
+ Note: The simple type handler does not call any
+ handler/chip primitives.
+
+
+ Following control flow is implemented (simplified excerpt):
+
+handle_IRQ_event(desc->action);
+
+
+
+
+ Default per CPU type handler
+
+ handle_percpu_type provides a generic implementation
+ for per CPU type interrupts.
+
+
+ Per CPU interrupts are only available on SMP and
+ the handler provides a simplified version without
+ locking.
+
+
+ Following control flow is implemented (simplified excerpt):
+
+desc->handler->start();
+handle_IRQ_event(desc->action);
+desc->handler->end();
+
+
+
+
+
+ Architecture specific type implementation
+
+ If an architecture needs to implement its own type structures, then
+ the following primitives have to be implemented:
+
+ handle_irq() - The handle_irq function pointer should preferably point to
+ one of the generic type handler functions
+ startup() - Optional
+ shutdown() - Optional
+ enable()
+ disable()
+ start()
+ hold() - For edge type interupts only
+ end()
+ set_type - Optional
+ set_affinity - SMP only
+
+
+
+
+ Quirks and optimizations
+
+ The generic functions are intended for 'clean' architectures and chips,
+ which have no platform-specific IRQ handling quirks. If an architecture
+ needs to implement quirks on the 'flow' level then it can do so by
+ overriding the irqtype. This is also done for compatibility reasons, as
+ most architectures use irqtypes only at the moment.
+
+
+ An architecture could implement all of its IRQ logic via pushing
+ chip handling details into the irqtype's ->start()/->end()/->hold()
+ functions. This is only recommended when the underlying primitives
+ are pure chip primitives without additional quirks. The direct pointer
+ to the chip functions reduces the indirection level by one.
+
+
+
+
+ Chiplevel hardware encapsulation
+
+ The chip level hardware description structure irq_chip
+ contains all the direct chip relevant functions, which
+ can be utilized by the irq_type implementations.
+
+ ack()
+ mask_ack() - Optional, recommended for performance
+ mask()
+ unmask()
+ retrigger() - Optional
+ set_type() - Optional
+ set_wake() - Optional
+
+ These primitives are strictly intended to mean what they say: ack means
+ ACK, masking means masking of an IRQ line, etc. It is up to the flow
+ handler(s) to use these basic units of lowlevel functionality.
+
+
+
+
+
+ __do_IRQ entry point
+
+ The original implementation __do_IRQ() is an alternative entry
+ point for all types of interrupts.
+
+
+ This handler turned out to be not suitable for all
+ interrupt hardware and was therefor reimplemented with split
+ functionality for egde/level/simple/percpu interrupts. This is not
+ only a functional optimization. It also shortenes code pathes for
+ interrupts.
+
+
+ To make use of the split implementation, replace the call to
+ __do_IRQ by a call to desc->handler->handle_irq() and associate
+ the appropriate handler function to desc->handler->handle_irq().
+ In most cases the generic type and handler implementations should
+ be sufficient.
+
+
+
+
+ Locking on SMP
+
+ The locking of chip registers is up to the architecture that
+ defines the chip primitives. There is a chip->lock field that can be used
+ for serialization, but the generic layer does not touch it. The per-irq
+ structure is protected via desc->lock, by the generic layer.
+
+
+
+ Structures
+
+ This chapter contains the autogenerated documentation of the structures which are
+ used in the generic IRQ layer.
+
+!Iinclude/linux/irq.h
+
+
+
+ Public Functions Provided
+
+ This chapter contains the autogenerated documentation of the kernel API functions
+ which are exported.
+
+!Ekernel/irq/manage.c
+
+
+
+ Internal Functions Provided
+
+ This chapter contains the autogenerated documentation of the internal functions.
+
+!Ikernel/irq/handle.c
+
+
+
+ Credits
+
+ The following people have contributed to this document:
+
+ Thomas Gleixnertglx@linutronix.de
+ Ingo Molnarmingo@elte.hu
+
+
+
+
Index: linux/Documentation/DocBook/kernel-api.tmpl
===================================================================
--- linux.orig/Documentation/DocBook/kernel-api.tmpl
+++ linux/Documentation/DocBook/kernel-api.tmpl
@@ -54,6 +54,11 @@
!Ekernel/sched.c
!Ekernel/timer.c
+ High-precision timers
+!Iinclude/linux/ktime.h
+!Iinclude/linux/ktimer.h
+!Ekernel/ktimers.c
+
Internal Functions
!Ikernel/exit.c
!Ikernel/signal.c
Index: linux/Documentation/RCU/proc.txt
===================================================================
--- /dev/null
+++ linux/Documentation/RCU/proc.txt
@@ -0,0 +1,119 @@
+/proc Filesystem Entries for RCU
+
+
+CONFIG_RCU_STATS
+
+The CONFIG_RCU_STATS config option is available only in conjunction with
+CONFIG_PREEMPT_RCU. It makes four /proc entries available, namely: rcuctrs,
+rcuptrs, rcugp, and rcustats.
+
+/proc/rcuctrs
+
+ CPU last cur
+ 0 1 1
+ 1 1 1
+ 2 1 1
+ 3 0 2
+ ggp = 230725
+
+This displays the number of processes that started RCU read-side critical
+sections on each CPU. In absence of preemption, the "last" and "cur"
+counts for a given CPU will always sum to one. Therefore, in the example
+output above, each CPU has started one RCU read-side critical section
+that was later preempted. The "last" column counts RCU read-side critical
+sections that started prior to the last counter flip, while the "cur"
+column counts critical sections that started after the last counter flip.
+
+The "ggp" count is a count of the number of counter flips since boot.
+Since this is shown as an odd number, the "cur" counts are stored in
+the zero-th element of each of the per-CPU arrays, and the "last" counts
+are stored in the first element of each of the per-CPU arrays.
+
+
+/proc/rcuptrs
+
+ nl=c04c7160/c04c7960 nt=c04c72d0
+ wl=c04c7168/c04c794c wt=c04c72bc dl=c04c7170/00000000 dt=c04c7170
+
+This displays the head and tail of each of CONFIG_PREEMPT_RCU's three
+callback lists. This will soon change to display this on a per-CPU
+basis, since each CPU will soon have its own set of callback lists.
+In the example above, the "next" list header is located at hex address
+0xc04c7160, the first element on the list at hex address 0xc04c7960,
+and the last element on the list at hex address 0xc04c72d0. The "wl="
+and "wt=" output is similar for the "wait" list, and the "dl=" and "dt="
+output for the "done" list. The "done" list is normally emptied very
+quickly after being filled, so will usually be empty as shown above.
+Note that the tail pointer points into the list header in this case.
+
+Callbacks are placed in the "next" list by call_rcu(), moved to the
+"wait" list after the next counter flip, and moved to the "done" list
+on the counter flip after that. Once on the "done" list, the callbacks
+are invoked.
+
+
+/proc/rcugp
+
+ oldggp=241419 newggp=241421
+
+This entry invokes synchronize_rcu() and prints out the number of counter
+flips since boot before and after the synchronize_rcu(). These two
+numbers will always differ by at least two. Unless RCU is broken. ;-)
+
+
+/proc/rcustats
+
+ ggp=242416 lgp=242416 sr=0 rcc=396233
+ na=2090938 nl=9 wa=2090929 wl=9 dl=0 dr=2090920 di=2090920
+ rtf1=22230730 rtf2=20139162 rtf3=242416 rtfe1=2085911 rtfe2=5657 rtfe3=19896746
+
+The quantities printed are as follows:
+
+o "ggp=": The number of flips since boot.
+
+o "lgp=": The number of flips sensed by the local structure since
+ boot. This will soon be per-CPU.
+
+o "sr=": The number of explicit call to synchronize_rcu().
+ Except that this is currently broken, so always reads as zero.
+ It is likely to be removed...
+
+o "rcc=": The number of calls to rcu_check_callbacks().
+
+o "na=": The number of callbacks that call_rcu() has registered
+ since boot.
+
+o "nl=": The number of callbacks currently on the "next" list.
+
+o "wa=": The number of callbacks that have moved to the "wait"
+ list since boot.
+
+o "wl=": The number of callbacks currently on the "wait" list.
+
+o "da=": The number of callbacks that have been moved to the
+ "done" list since boot.
+
+o "dl=": The number of callbacks currently on the "done" list.
+
+o "dr=": The number of callbacks that have been removed from the
+ "done" list since boot.
+
+o "di=": The number of callbacks that have been invoked after being
+ removed from the "done" list.
+
+o "rtf1=": The number of attempts to flip the counters.
+
+o "rtf2=": The number of attempts to flip the counters that successfully
+ acquired the fliplock.
+
+o "rtf3=": The number of successful counter flips.
+
+o "rtfe1=": The number of attempts to flip the counters that failed
+ due to the lock being held by someone else.
+
+o "rtfe2=": The number of attempts to flip the counters that were
+ abandoned due to someone else doing the job for us.
+
+o "rtfe3=": The number of attempts to flip the counters that failed
+ due to some task still being in an RCU read-side critical section
+ starting from before the last successful counter flip.
Index: linux/Documentation/RCU/torture.txt
===================================================================
--- /dev/null
+++ linux/Documentation/RCU/torture.txt
@@ -0,0 +1,127 @@
+RCU Torture Test Operation
+
+
+CONFIG_RCU_TORTURE_TEST
+
+The CONFIG_RCU_TORTURE_TEST config option is available for all RCU
+implementations. It creates an rcutorture kernel module that can
+be loaded to run a torture test. The test periodically outputs
+status messages via printk(), which can be examined via the dmesg
+command (perhaps grepping for "rcutorture"). The test is started
+when the module is loaded, and stops when the module is unloaded.
+
+However, actually setting this config option to "y" results in the system
+running the test immediately upon boot, and ending only when the system
+is taken down. Normally, one will instead want to build the system
+with CONFIG_RCU_TORTURE_TEST=m and to use modprobe and rmmod to control
+the test, perhaps using a script similar to the one shown at the end of
+this document. Note that you will need CONFIG_MODULE_UNLOAD in order
+to be able to end the test.
+
+
+MODULE PARAMETERS
+
+This module has the following parameters:
+
+nreaders This is the number of RCU reading threads supported.
+ The default is twice the number of CPUs. Why twice?
+ To properly exercise RCU implementations with preemptible
+ read-side critical sections.
+
+stat_interval The number of seconds between output of torture
+ statistics (via printk()). Regardless of the interval,
+ statistics are printed when the module is unloaded.
+ Setting the interval to zero causes the statistics to
+ be printed -only- when the module is unloaded, and this
+ is the default.
+
+verbose Enable debug printk()s. Default is disabled.
+
+
+OUTPUT
+
+The statistics output is as follows:
+
+ rcutorture: --- Start of test: nreaders=16 stat_interval=0 verbose=0
+ rcutorture: rtc: 0000000000000000 ver: 1916 tfle: 0 rta: 1916 rtaf: 0 rtf: 1915 rtbme: 0
+ rcutorture: Reader Pipe: 1466408 9747 0 0 0 0 0 0 0 0 0
+ rcutorture: Reader Batch: 1464477 11678 0 0 0 0 0 0 0 0
+ rcutorture: Free-Block Circulation: 1915 1915 1915 1915 1915 1915 1915 1915 1915 1915 0
+ rcutorture: --- End of test: SUCCESS
+
+The command "dmesg | grep rcutorture:" will extract this information on
+most systems. On more esoteric configurations, it may be necessary to
+use other commands to access the output of the printk()s used by
+the RCU torture test. The printk()s use KERN_ALERT, so they should
+be evident. ;-)
+
+The entries are as follows:
+
+o "ggp": The number of counter flips (or batches) since boot.
+
+o "rtc": The hexadecimal address of the structure currently visible
+ to readers.
+
+o "ver": The number of times since boot that the rcutw writer task
+ has changed the structure visible to readers.
+
+o "tfle": If non-zero, indicates that the "torture freelist"
+ containing structure to be placed into the "rtc" area is empty.
+ This condition is important, since it can fool you into thinking
+ that RCU is working when it is not. :-/
+
+o "rta": Number of structures allocated from the torture freelist.
+
+o "rtaf": Number of allocations from the torture freelist that have
+ failed due to the list being empty.
+
+o "rtf": Number of frees into the torture freelist.
+
+o "rtmbe": Number of memory-barrier failures detected (which would
+ indicate problems with either the test itself or the underlying
+ memory-barrier primitives for the CPU architecture on which the
+ failure occurred.
+
+o "Reader Pipe": Histogram of "ages" of structures seen by readers.
+ If any entries past the first two are non-zero, RCU is broken.
+ And rcutorture prints the error flag string "!!!" to make sure
+ you notice. The age of a newly allocated structure is zero,
+ it becomes one when removed from reader visibility, and is
+ incremented once per grace period subsequently -- and is freed
+ after passing through (RCU_TORTURE_PIPE_LEN-2) grace periods.
+
+ The output displayed above was taken from a correctly working
+ RCU. If you want to see what it looks like when broken, break
+ it yourself. ;-)
+
+o "Reader Batch": Another histogram of "ages" of structures seen
+ by readers, but in terms of counter flips (or batches) rather
+ than in terms of grace periods. The legal number of non-zero
+ entries is again two. The reason for this separate view is
+ that it is easier to get the third entry to show up in the
+ "Reader Batch" list than in the "Reader Pipe" list.
+
+o "Free-Block Circulation": Shows the number of torture structures
+ that have reached a given point in the pipeline. The first element
+ should closely correspond to the number of structures allocated,
+ the second to the number that have been removed from reader view,
+ and all but the last remaining to the corresponding number of
+ passes through a grace period. The last entry should be zero,
+ as it is only incremented if a torture structure's counter
+ somehow gets incremented farther than it should.
+
+
+USAGE
+
+The following script may be used to torture RCU:
+
+ #!/bin/sh
+
+ modprobe rcutorture
+ sleep 100
+ rmmod rcutorture
+ dmesg | grep rcutorture:
+
+The output can be manually inspected for the error flag of "!!!".
+One could of course create a more elaborate script that automatically
+checked for such errors.
Index: linux/Documentation/kernel-parameters.txt
===================================================================
--- linux.orig/Documentation/kernel-parameters.txt
+++ linux/Documentation/kernel-parameters.txt
@@ -52,6 +52,7 @@ restrictions referred to are that the re
MTD MTD support is enabled.
NET Appropriate network support is enabled.
NUMA NUMA support is enabled.
+ GENERIC_TIME The generic timeofday code is enabled.
NFS Appropriate NFS support is enabled.
OSS OSS sound support is enabled.
PARIDE The ParIDE subsystem is enabled.
@@ -329,10 +330,11 @@ running once the system is up.
Value can be changed at runtime via
/selinux/checkreqprot.
- clock= [BUGS=IA-32,HW] gettimeofday timesource override.
- Forces specified timesource (if avaliable) to be used
- when calculating gettimeofday(). If specicified
- timesource is not avalible, it defaults to PIT.
+ clock= [BUGS=IA-32, HW] gettimeofday clocksource override.
+ [Deprecated]
+ Forces specified clocksource (if avaliable) to be used
+ when calculating gettimeofday(). If specified
+ clocksource is not avalible, it defaults to PIT.
Format: { pit | tsc | cyclone | pmtmr }
hpet= [IA-32,HPET] option to disable HPET and use PIT.
@@ -1479,6 +1481,10 @@ running once the system is up.
time Show timing data prefixed to each printk message line
+ clocksource= [GENERIC_TIME] Override the default clocksource
+ Override the default clocksource and use the clocksource
+ with the name specified.
+
tipar.timeout= [HW,PPT]
Set communications timeout in tenths of a second
(default 15).
Index: linux/Documentation/ktimers.txt
===================================================================
--- /dev/null
+++ linux/Documentation/ktimers.txt
@@ -0,0 +1,239 @@
+
+ktimers - subsystem for high-precision kernel timers
+----------------------------------------------------
+
+This patch introduces a new subsystem for high-precision kernel timers.
+
+Why two timer subsystems? After a lot of back and forth trying to
+integrate high-precision and high-resolution features into the existing
+timer framework, and after testing various such high-resolution timer
+implementations in practice, we came to the conclusion that the timer
+wheel code is fundamentally not suitable for such an approach. We
+initially didnt believe this ('there must be a way to solve this'), and
+we spent a considerable effort trying to integrate things into the timer
+wheel, but we failed. There are several reasons why such integration is
+impossible:
+
+- the forced handling of low-resolution and high-resolution timers in
+ the same way leads to a lot of compromises, macro magic and #ifdef
+ mess. The timers.c code is very "tightly coded" around jiffies and
+ 32-bitness assumptions, and has been honed and micro-optimized for a
+ narrow use case for many years - and thus even small extensions to it
+ frequently break the wheel concept, leading to even worse
+ compromises.
+
+- the unpredictable [O(N)] overhead of cascading leads to delays which
+ necessiate a more complex handling of high resolution timers, which
+ decreases robustness. Such a design still led to rather large timing
+ inaccuracies. Cascading is a fundamental property of the timer wheel
+ concept, it cannot be 'designed out' without unevitabling degrading
+ other portions of the timers.c code in an unacceptable way.
+
+- the implementation of the current posix-timer subsystem on top of
+ the timer wheel has already introduced a quite complex handling of
+ the required readjusting of absolute CLOCK_REALTIME timers at
+ settimeofday or NTP time - showing the rigidity of the timer wheel
+ data structure.
+
+- the timer wheel code is most optimal for use cases which can be
+ identified as "timeouts". Such timeouts are usually set up to cover
+ error conditions in various I/O paths, such as networking and block
+ I/O. The vast majority of those timers never expire and are rarely
+ recascaded because the expected correct event arrives in time so they
+ can be removed from the timer wheel before any further processing of
+ them becomes necessary. Thus the users of these timeouts can accept
+ the granularity and precision tradeoffs of the timer wheel, and
+ largely expect the timer subsystem to have near-zero overhead. Timing
+ for them is not a core purpose, it's most a necessary evil to
+ guarantee the processing of requests, which should be as cheap and
+ unintrusive as possible.
+
+The primary users of precision timers are user-space applications that
+utilize nanosleep, posix-timers and itimer interfaces. Also, in-kernel
+users like drivers and subsystems with a requirement for precise timed
+events can benefit from the availability of a seperate high-precision
+timer subsystem as well.
+
+The ktimer subsystem is easily extended with high-resolution
+capabilities, and patches for that exist and are maturing quickly. The
+increasing demand for realtime and multimedia applications along with
+other potential users for precise timers gives another reason to
+separate the "timeout" and "precise timer" subsystems.
+
+Another potential benefit is that such seperation allows for future
+optimizations of the existing timer wheel implementation for the low
+resolution and low precision use cases - once the precision-sensitive
+APIs are separated from the timer wheel and are migrated over to
+ktimers. E.g. we could decrease the frequency of the timeout subsystem
+from 250 Hz to 100 HZ (or even smaller).
+
+ktimer subsystem implementation details
+---------------------------------------
+
+the basic design considerations were:
+
+- simplicity
+- robust, extensible abstractions
+- data structure not bound to jiffies or any other granularity
+- simplification of existing, timing related kernel code
+
+From our previous experience with various approaches of high-resolution
+timers another basic requirement was the immediate enqueueing and
+ordering of timers at activation time. After looking at several possible
+solutions such as radix trees and hashes, the red black tree was choosen
+as the basic data structure. Rbtrees are available as a library in the
+kernel and are used in various performance-critical areas of e.g. memory
+management and file systems. The rbtree is solely used for the time
+sorted ordering, while a seperate list is used to give the expiry code
+fast access to the queued timers, without having to walk the rbtree.
+(This seperate list is also useful for high-resolution timers where we
+need seperate pending and expired queues while keeping the time-order
+intact.)
+
+The time-ordered enqueueing is not purely for the purposes of the
+high-resolution timers extension though, it also simplifies the handling
+of absolute timers based on CLOCK_REALTIME. The existing implementation
+needed to keep an extra list of all armed absolute CLOCK_REALTIME timers
+along with complex locking. In case of settimeofday and NTP, all the
+timers (!) had to be dequeued, the time-changing code had to fix them up
+one by one, and all of them had to be enqueued again. The time-ordered
+enqueueing and the storage of the expiry time in absolute time units
+removes all this complex and poorly scaling code from the posix-timer
+implementation - the clock can simply be set without having to touch the
+rbtree. This also makes the handling of posix-timers simpler in general.
+
+The locking and per-CPU behavior of ktimers was mostly taken from the
+existing timer wheel code, as it is mature and well suited. Sharing code
+was not really a win, due to the different data structures. Also, the
+ktimer functions now have clearer behavior and clearer names - such as
+ktimer_try_to_cancel() and ktimer_cancel() [which are roughly equivalent
+to del_timer() and del_timer_sync()] - and there's no direct 1:1 mapping
+between them on the algorithmical level.
+
+The internal representation of time values (ktime_t) is implemented via
+macros and inline functions, and can be switched between a "hybrid
+union" type and a plain "scalar" 64bit nanoseconds representation (at
+compile time). The hybrid union type exists to optimize time conversions
+on 32bit CPUs. This build-time-selectable ktime_t storage format was
+implemented to avoid the performance impact of 64-bit multiplications
+and divisions on 32bit CPUs. Such operations are frequently necessary to
+convert between the storage formats provided by kernel and userspace
+interfaces and the internal time format. (See include/linux/ktime.h for
+further details.)
+
+ktimers - rounding of timer values
+----------------------------------
+
+Why do we need rounding at all ?
+
+Firstly, the POSIX specification requires rounding to the resolution -
+whatever that means. The POSIX specification is quite imprecise on the
+details of rounding though, so a practical interpretation had to be
+found.
+
+The first question is which resolution value should be returned to the
+user by the clock_getres() interface.
+
+The simplest case is when the hardware is capable of 1 nsec resolution:
+in that case we can fulfill all wishes and there is no rounding :-)
+
+Another simple case is when the clock hardware has a limited resolution
+that the kernel wants to fully offer to user-space: in this case that
+limited resolution is returned to userspace.
+
+The hairy case is when the underlying hardware is capable of finer
+grained resolution, but the kernel is not willing to offer that
+resolution. Why would the kernel want to do that? Because e.g. the
+system could easily be DoS-ed with high-frequency timer interrupts. Or
+the kernel might want to cluster high-res timer interrupts into groups
+for performance reasons, so that extremely high interrupt rates are
+avoided. So the kernel needs some leeway in deciding the 'effective'
+resolution that it is willing to expose to userspace.
+
+In this case, the clock_getres() decision is easy: we want to return the
+'effective' resolution, not the 'theoretical' resolution. Thus an
+application programmer gets correct information about what granularity
+and accuracy to expect from the system.
+
+What is much less obvious in both the 'hardware is low-res' and 'kernel
+wants to offer low-res' cases is the actual behavior of timers, and
+where and how to round time values to the 'effective' resolution of the
+clock.
+
+For this we first need to see what types of expiries there exist for
+ktimers, and how rounding affects them. Ktimers have the following
+variants:
+
+- relative one-shot timers
+- absolute one-shot timers
+- relative interval timers
+- absolute interval timers
+
+Interval timers can be led back to one-shot timers: they are a series of
+one-shot timers with the same interval. Relative one-shot timers can be
+handled identically to absolute one-shot timers after adding the
+relative expiry time to the current time of the respective clock.
+
+We picked to handle two cases of rounding:
+
+- the rounding of the absolute value of the first expiry time
+- the rounding of the timer interval
+
+An alternative implementation would be to not round the interval and to
+implicitly round at every timer event, but it's not clear what the
+advantages would be from doing that. There are a couple of
+disadvantages:
+
+- the technique seems to contradict the standard's requirement that
+ 'time values ... be rounded' (which the interval clearly is).
+
+- other OSs implement the rounding in the way we implemented it.
+
+- also, there is an application surprise factor, the 'do not round
+ intervals' technique can lead to the following sample sequence of
+ events:
+
+ Interval: 1.7ms
+ Resolution: 1ms
+
+ Event timeline:
+
+ 2ms - 4ms - 6ms - 7ms - 9ms - 11ms - 12ms - 14ms - 16ms - 17ms ...
+
+ this 2,2,1,2,2,1...msec 'unpredictable and uneven' relative distance
+ of events could surprise applications.
+
+(as a sidenote, current POSIX APIs could be extended with a method of
+periodic timers to have an 'average' frequency, where there is no
+rounding of the interval. No such API exists at the moment.)
+
+ktimers - testing and verification
+----------------------------------
+
+We used the high-resolution timer subsystem ontop of ktimers to verify
+the ktimer implementation details in praxis, and we also ran the posix
+timer tests in order to ensure specification compliance.
+
+The ktimer patch converts the following kernel functionality to use
+ktimers:
+
+ - nanosleep
+ - itimers
+ - posix-timers
+
+The conversion of nanosleep and posix-timers enabled the unification of
+nanosleep and clock_nanosleep.
+
+The code was successfully compiled for the following platforms:
+
+ i386, x86_64, ARM, PPC, PPC64, IA64
+
+The code was run-tested on the following platforms:
+
+ i386(UP/SMP), x86_64(UP/SMP), ARM, PPC
+
+ktimers were also integrated into the -rt tree, along with a
+ktimers-based high-resolution timer implementation, so the ktimers code
+got a healthy amount of testing and use in practice.
+
+ Thomas Gleixner, Ingo Molnar
Index: linux/Documentation/timekeeping.txt
===================================================================
--- /dev/null
+++ linux/Documentation/timekeeping.txt
@@ -0,0 +1,246 @@
+How timekeeping works with CONFIG_GENERIC_TIME
+========================================================================
+
+The generic timekeeping code maintains and allows access to the systems understanding of how much time has passed from a certain point. However, in order to measure the passing of time, the generic timekeeping code relies on the clocksource abstraction. A clocksource abstracts a free running counter who's value increases at a known frequency.
+
+In the generic timekeeping code, we use a pointer to a selected clocksource to measure the passing of time.
+
+struct clocksource *clock
+
+The clocksource has some limitations however. Since its likely of fixed width, it will not increment forever and will overflow. In order to still properly keep time, we must occasionally accumulate an interval of time. In the generic timekeeping code, we accumulate the amount of time system the system booted into the value system_time, which keeps nanosecond resolution in a ktime_t storage.
+
+ktime_t system_time
+
+Since its likely your system has not been running continually since midnight on the 1st of January in 1970, we must provide an offset from that time in accordance with conventions. This only occasionally changed (via settimeofday()) offset is the wall_time_offset value, which is also stored as a ktime_t.
+
+ktime_t wall_time_offset
+
+
+Since we accumulate time in intervals, we need a base cycle value that we can use to generate an offset from the time value kept in system_time. We store this value in cycle_last.
+
+cycle_t cycle_last;
+
+
+Further since all clocks drift somewhat from each other, we use the adjustment values provided via adjtimex() to correct our clocksource frequency for each interval. This frequency adjustment value is stored in ntp_adj.
+
+long ntp_adj;
+
+Now that we've covered the core global variables for timekeeping, lets look at how we maintain these values.
+
+As stated above, we want to avoid the clocksource from overflowing on us, so we accumulate a time interval periodically. This periodic accumulation function is called timeofday_periodic_hook(). In simplified pseudo code, it logically is presented as:
+
+timeofday_periodic_hook():
+ cycle_now = read_clocksource(clock)
+ cycle_delta = (cycle_now - cycle_last) & clock->mask
+ nsec = cyc2ns(clock, cycle_delta, ntp_adj)
+ system_time += nsec
+ cycle_last = cycle_now
+
+ /* do other stuff */
+
+You can see we read the cycle value from the clocksource, calculate a cycle delta for the interval since we last called timeofday_periodic_hook(), convert that cycle delta to a nanosecond interval (for now ignore ntp_adj), add it to the system time and finally set our cycle_last value to cycle_now for the next interval. Using this simple algorithm we can correctly measure and record the passing of time.
+
+But just storing this info isn't very useful, we also want to make it available to be used elsewhere. So how do we provide a notion of how much time has passed inbetween calls to timeofday_periodic_hook()?
+
+First, lets create a function that calculates the time since the last call to timeofday_peridoic_hook().
+
+get_nsec_offset():
+ cycle_now = read_clocksource(clock)
+ cycle_delta = (cycle_now - cycle_last) & clock->mask
+ nsec = cyc2ns(clock, cycle_delta, ntp_adj)
+ return nsec
+
+Here you can see, we read the clocksource, calculate a cycle interval, and convert that to a nanosecond interval. Just like how it is done in timeofday_periodic_hook!
+
+Now lets use this function to provide the number of nanoseconds that the system has been running:
+
+do_monotonic_clock():
+ return system_time + get_nsec_offset()
+
+Here we trivially add the nanosecond offset since the last timeofday_periodic_hook() to the value of system_time which was stored at the last timeofday_periodic_hook().
+
+Note that since we use the same method to calculate time intervals, assuming each function is atomic and the clocksource functions as it should, time cannot go backward!
+
+Now to get the time of day using the standard convention:
+
+do_gettimeofday():
+ return do_monotonic_clock() + wall_time_offset
+
+We simply add the wall_time_offset, and we have the number of nanoseconds since 1970 began!
+
+
+Of course, in real life, things are not so static. We have to handle a number of dynamic values that may change and affect timekeeping. In order to do these safely, we must only change values in-between intervals. This means the periodic_hook call must handle these changes.
+
+Since clocksources can be changed while the system is running, we need to check for and possibly switch to using new clocksources in the periodic_hook call. Further, clocksources may change their frequency. Since this must be done only at a safe point, we use the update_callback function pointer (for more details, see "How to write a clocksource driver" below), this too must be done in-between intervals in the periodic_hook call. Finally, since the ntp adjustment made in the cyc2ns conversion is not static, we need to update the ntp state machine and get a calculate a new adjustment value.
+
+This adds some extra pseudo code to the timeofday_periodic_hook function:
+
+timeofday_periodic_hook():
+ cycle_now = read_clocksource(clock)
+ cycle_delta = (cycle_now - cycle_last) & clock->mask
+ nsec = cyc2ns(clock, cycle_delta, ntp_adj)
+ system_time += nsec
+ cycle_last = cycle_now
+
+ next = get_next_clocksource()
+ if (next != clock):
+ cycle_last = read_clocksource(next)
+ clock = next
+
+ if (clock->update_callback):
+ clock->update_callback()
+
+ ntp_advance(nsec)
+ ppm = ntp_get_ppm_adjustment()
+ ntp_adj = ppm_to_mult_adj(clock, ppm)
+
+
+Unfortunately, the actual timeofday_periodic_hook code is not as simple as this pseudo code. For performance concerns, much has been done to pre-calculate values and use them repeatedly. Thus be aware that the code in timeofday.c is more complex, however the functional logic is the same.
+
+
+How to port an architecture to GENERIC_TIME
+========================================================================
+Porting an architecture to the GENERIC_TIME timekeeping code consists of moving a little bit of code around then deleting a fair amount. It is my hope that this will reduce the arch specific maintenance work around timekeeping.
+
+Porting an arch usually requires the following steps.
+
+1. Define CONFIG_GENERIC_TIME in the arches Kconfig
+2. Implmenting the following functions
+ nsec_t read_persistent_clock(void)
+ void sync_persistent_clock(struct timespec ts)
+3. Removing all of the arch specific timekeeping code
+ do_gettimeofday()
+ do_settimeofday()
+ etc
+4. Implementing clocksource drivers
+ See "How to write a clocksource driver" for more details
+
+The exeptions to the above are:
+
+5. If the arch is has no continuous clocksource
+ A) Implement 1-3 in the above list.
+ B) Define CONFIG_IS_TICK_BASED in arches Kconfig
+ C) Implement the "long arch_getoffset(void)" function
+
+6. If the arch supports vsyscall gettimeofday (see x86_64 for reference)
+ A) Implement 1-4 in the above list
+ B) Define GENERIC_TIME_VSYSCALL
+ C) Implement arch_update_vsyscall_gtod()
+ D) Implement vsyscall gettimeofday (similar to __get_realtime_clock_ts)
+ E) Implement vread functions for supported clocksources
+
+
+
+How to write a clocksource driver.
+========================================================================
+First, a quick summary of what a clocksource driver provides.
+
+Simply put, a clocksource is a abstraction of a free running increasing counter. The abstraction provides the minimal amount of info for that counter to be usable for timekeeping. Those required values are:
+ 1. It's name
+ 2. A rating value for selection priority
+ 3. A read function pointer
+ 4. A mask value for correct twos-complement subtraction
+ 5. A mult and shift pair that aproximate the counter frequency
+ mult/(2^shift) ~= nanoseconds per cycle
+
+Additionally, there are other optionally set values that allow for advanced functinoality. Those values are:
+ 6. The update_callback function.
+ 7. The is_continuous flag.
+ 8. The vread function pointer
+ 9. The vdata pointer value
+
+
+Now lets go over these values in detail.
+
+1. Name.
+ The clocksource's name should be unique since it is used for both identification as well as for manually overriding the default clocksource selection. The name length must be shorter then 32 characters in order for it to be properly overrided.
+
+2. Rating value
+ This rating value is used as a priority value for clocksource selection. It has no direct connection to quality or physical properties of the clocksource, but is to be set and manipulated to guarantee that the best (by no specific metric) clocksource that will provide correct timekeeping is automatically selected. Rating suggestions can be found in include/linux/clocksource.h
+
+3. Read function pointer
+ This pointer should point to a function that returns an unsigned increasing cycle value from the clocksource. The value should have a coverage from zero to the maximum cycle value the clocksource can provide. This does not have to be direct hardware value and can also be a software counter. An example of a software counter is the jiffies clocksource.
+
+4. The mask value
+ This value should be the largest power of two that is smaller then the maximum cycle value. This allows twos complement subtraction to work on overflow boundary conditions if the max value is less then (cycle_t)-1. So for example, if we have a 16 bit counter (ie: one that loops to zero after 0x0000FFFF), the mask would be 0xFFFF. So then when finding the cycle difference around a overflow, where now = 0x0013 and then = 0xFFEE, we can compute the cycle delta properly using the equation:
+ delta = (now - then)&mask
+ delta = (0x0013 - 0xFFEE) & 0xFFFF
+ delta = 0xFFFF0025 & 0xFFFF /* note the unmasked negative value */
+ delta = 0x25
+
+5. The mult and shift pair
+ These 32bit values approximate the nanosecond per cycle frequency of the clocksource using the equation: mult/(2^shift). If you have a khz or hz frequency value, the mult value for a given shift value can be easily calculated using the clocksource_hz2mult() and clocksource_khz2mult() helper functions. When selecting a shift value, it is important to be careful. Larger shift values give a finer precision in the cycle to nanosecond conversion and allows for more exact NTP adjustments. However if you select too large a shift value, the resulting mult value might overflow a cycle_t * mult computation.
+
+
+So if you have a simple hardware counter that does not change frequency, filling in the above should be sufficient for a functional clocksource. But read on for details on implementing a more complex clocksource.
+
+6. The update_callback function pointer.
+ If this function pointer is non-NULL, it will be called every periodic hook when it is safe for the clocksource to change its state. This would be necessary in the case where the counter frequency changes, for example. One user of this function pointer is the TSC clocksource. When the TSC frequency changes (which may occur if the cpu changes frequency) we need to notify the clocksource at a safe point where that state may change. Thus, if the TSC has changed frequency we set the new mult/shift values in the update_callback function.
+
+7. The is_continuous flag.
+ This flag variable (0 if false, 1 if true) denotes that the clocksource is continuous. This means that it is a purely hardware driven clocksource and is not dependent on any software code to run for it to increment properly. This denotation will be useful in the future when timer ticks may be disabled for long periods of time. Doing so using software clocksources, like the jiffies clocksource, would cause timekeeping problems.
+
+8. The vread function pointer.
+ This function pointer points to a user-space accessible function that reads the clocksource. This is used in userspace gettimeofday implementations to improve performance. See the x86-64 TSC clocksource implementation for an example.
+
+8. The vdata pointer.
+ This pointer is passed to the vread function pointer in a userspace gettimeofday implementation. Its usage is dependent on the vread implementation, but if the pointer points to data, that data must be readable from userspace.
+
+
+Now lets write a quick clocksource for an imaginary bit of hardware. Here are the specs:
+
+ A 32bit counter can be found at the MMIO address 0xFEEDF000. It runs at 100Mhz. To enable it, the the low bit of the address 0xFEEDF0F0 must be set to one.
+
+So lets start out an empty cool-counter.c file, and define the clocksource.
+
+#include
+#include
+#include
+
+#define COOL_READ_PTR 0xFEEDF000
+#define COOL_START_PTR 0xFEEDF0F0
+
+static __iomem *cool_ptr = COOL_READ_PTR;
+
+struct clocksource clocksource_cool
+{
+ .name = "cool",
+ .rating = 200, /* its a pretty decent clock */
+ .mask = 0xFFFFFFFF, /* 32 bits */
+ .mult = 0, /*to be computed */
+ .shift = 10,
+}
+
+
+Now let's write the read function:
+
+cycle_t cool_counter_read(void)
+{
+ cycle_t ret = readl(cool_ptr);
+ return ret;
+}
+
+Finally, lets write the init function:
+
+void cool_counter_init(void)
+{
+ __iomem *ptr = COOL_START_PTR;
+ u32 val;
+
+ /* start the counter */
+ val = readl(ptr);
+ val |= 0x1;
+ writel(val, ptr);
+
+ /* finish initializing the clocksource */
+ clocksource_cool.read = cool_counter_read;
+ clocksource_cool.mult = clocksource_khz2mult(100000,
+ clocksource_cool.shift);
+
+ /* register the clocksource */
+ register_clocksource(&clocksource_cool);
+}
+module_init(cool_counter_init);
+
+
+Now wasn't that easy!
Index: linux/Makefile
===================================================================
--- linux.orig/Makefile
+++ linux/Makefile
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 14
-EXTRAVERSION =
+EXTRAVERSION = -rt22
NAME=Affluent Albatross
# *DOCUMENTATION*
@@ -517,10 +517,14 @@ CFLAGS += $(call add-align,CONFIG_CC_AL
CFLAGS += $(call add-align,CONFIG_CC_ALIGN_LOOPS,-loops)
CFLAGS += $(call add-align,CONFIG_CC_ALIGN_JUMPS,-jumps)
-ifdef CONFIG_FRAME_POINTER
-CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,)
+ifdef CONFIG_MCOUNT
+CFLAGS += -pg -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,)
else
-CFLAGS += -fomit-frame-pointer
+ ifdef CONFIG_FRAME_POINTER
+ CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,)
+ else
+ CFLAGS += -fomit-frame-pointer
+ endif
endif
ifdef CONFIG_DEBUG_INFO
Index: linux/arch/alpha/kernel/time.c
===================================================================
--- linux.orig/arch/alpha/kernel/time.c
+++ linux/arch/alpha/kernel/time.c
@@ -55,10 +55,6 @@
#include "proto.h"
#include "irq_impl.h"
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
extern unsigned long wall_jiffies; /* kernel/timer.c */
static int set_rtc_mmss(unsigned long);
Index: linux/arch/arm/Kconfig
===================================================================
--- linux.orig/arch/arm/Kconfig
+++ linux/arch/arm/Kconfig
@@ -50,6 +50,10 @@ config UID16
bool
default y
+config GENERIC_HARDIRQS
+ bool
+ default y
+
config RWSEM_GENERIC_SPINLOCK
bool
default y
@@ -339,18 +343,7 @@ config NR_CPUS
depends on SMP
default "4"
-config PREEMPT
- bool "Preemptible Kernel (EXPERIMENTAL)"
- depends on EXPERIMENTAL
- help
- This option reduces the latency of the kernel when reacting to
- real-time or interactive events by allowing a low priority process to
- be preempted even if it is in kernel mode executing a system call.
- This allows applications to run more reliably even when the system is
- under load.
-
- Say Y here if you are building a kernel for a desktop, embedded
- or real-time system. Say N if you are unsure.
+source kernel/Kconfig.preempt
config NO_IDLE_HZ
bool "Dynamic tick timer"
Index: linux/arch/arm/boot/compressed/head.S
===================================================================
--- linux.orig/arch/arm/boot/compressed/head.S
+++ linux/arch/arm/boot/compressed/head.S
@@ -718,6 +718,19 @@ memdump: mov r12, r0
mov pc, r10
#endif
+#ifdef CONFIG_MCOUNT
+/* CONFIG_MCOUNT causes boot header to be built with -pg requiring this
+ * trampoline
+ */
+ .text
+ .align 0
+ .type mcount %function
+ .global mcount
+mcount:
+ mov pc, lr @ just return
+#endif
+
+
reloc_end:
.align
Index: linux/arch/arm/boot/compressed/misc.c
===================================================================
--- linux.orig/arch/arm/boot/compressed/misc.c
+++ linux/arch/arm/boot/compressed/misc.c
@@ -199,6 +199,7 @@ static ulg free_mem_ptr_end;
#define HEAP_SIZE 0x2000
+#define ZLIB_INFLATE_NO_INFLATE_LOCK
#include "../../../../lib/inflate.c"
#ifndef STANDALONE_DEBUG
Index: linux/arch/arm/common/dmabounce.c
===================================================================
--- linux.orig/arch/arm/common/dmabounce.c
+++ linux/arch/arm/common/dmabounce.c
@@ -403,11 +403,11 @@ dma_map_single(struct device *dev, void
BUG_ON(dir == DMA_NONE);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
dma_addr = map_single(dev, ptr, size, dir);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return dma_addr;
}
@@ -430,11 +430,11 @@ dma_unmap_single(struct device *dev, dma
BUG_ON(dir == DMA_NONE);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
unmap_single(dev, dma_addr, size, dir);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
int
@@ -449,7 +449,7 @@ dma_map_sg(struct device *dev, struct sc
BUG_ON(dir == DMA_NONE);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
for (i = 0; i < nents; i++, sg++) {
struct page *page = sg->page;
@@ -461,7 +461,7 @@ dma_map_sg(struct device *dev, struct sc
map_single(dev, ptr, length, dir);
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return nents;
}
@@ -478,7 +478,7 @@ dma_unmap_sg(struct device *dev, struct
BUG_ON(dir == DMA_NONE);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
for (i = 0; i < nents; i++, sg++) {
dma_addr_t dma_addr = sg->dma_address;
@@ -487,7 +487,7 @@ dma_unmap_sg(struct device *dev, struct
unmap_single(dev, dma_addr, length, dir);
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
void
@@ -499,11 +499,11 @@ dma_sync_single_for_cpu(struct device *d
dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n",
__func__, (void *) dma_addr, size, dir);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
sync_single(dev, dma_addr, size, dir);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
void
@@ -515,11 +515,11 @@ dma_sync_single_for_device(struct device
dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n",
__func__, (void *) dma_addr, size, dir);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
sync_single(dev, dma_addr, size, dir);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
void
@@ -534,7 +534,7 @@ dma_sync_sg_for_cpu(struct device *dev,
BUG_ON(dir == DMA_NONE);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
for (i = 0; i < nents; i++, sg++) {
dma_addr_t dma_addr = sg->dma_address;
@@ -543,7 +543,7 @@ dma_sync_sg_for_cpu(struct device *dev,
sync_single(dev, dma_addr, length, dir);
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
void
@@ -558,7 +558,7 @@ dma_sync_sg_for_device(struct device *de
BUG_ON(dir == DMA_NONE);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
for (i = 0; i < nents; i++, sg++) {
dma_addr_t dma_addr = sg->dma_address;
@@ -567,7 +567,7 @@ dma_sync_sg_for_device(struct device *de
sync_single(dev, dma_addr, length, dir);
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
int
Index: linux/arch/arm/common/locomo.c
===================================================================
--- linux.orig/arch/arm/common/locomo.c
+++ linux/arch/arm/common/locomo.c
@@ -425,6 +425,12 @@ static struct irqchip locomo_spi_chip =
.unmask = locomo_spi_unmask_irq,
};
+static DEFINE_IRQ_CHAINED_TYPE(locomo_handler);
+static DEFINE_IRQ_CHAINED_TYPE(locomo_key_handler);
+static DEFINE_IRQ_CHAINED_TYPE(locomo_gpio_handler);
+static DEFINE_IRQ_CHAINED_TYPE(locomo_lt_handler);
+static DEFINE_IRQ_CHAINED_TYPE(locomo_spi_handler);
+
static void locomo_setup_irq(struct locomo *lchip)
{
int irq;
Index: linux/arch/arm/common/sa1111.c
===================================================================
--- linux.orig/arch/arm/common/sa1111.c
+++ linux/arch/arm/common/sa1111.c
@@ -159,11 +159,11 @@ sa1111_irq_handler(unsigned int irq, str
for (i = IRQ_SA1111_START; stat0; i++, stat0 >>= 1)
if (stat0 & 1)
- do_edge_IRQ(i, irq_desc + i, regs);
+ handle_edge_irq(i, irq_desc + i, regs);
for (i = IRQ_SA1111_START + 32; stat1; i++, stat1 >>= 1)
if (stat1 & 1)
- do_edge_IRQ(i, irq_desc + i, regs);
+ handle_edge_irq(i, irq_desc + i, regs);
/* For level-based interrupts */
desc->chip->unmask(irq);
@@ -368,6 +368,8 @@ static struct irqchip sa1111_high_chip =
.set_wake = sa1111_wake_highirq,
};
+static DEFINE_IRQ_CHAINED_TYPE(sa1111_irq_handler);
+
static void sa1111_setup_irq(struct sa1111 *sachip)
{
void __iomem *irqbase = sachip->base + SA1111_INTC;
Index: linux/arch/arm/common/time-acorn.c
===================================================================
--- linux.orig/arch/arm/common/time-acorn.c
+++ linux/arch/arm/common/time-acorn.c
@@ -16,6 +16,7 @@
#include
#include
#include
+#include
#include
#include
@@ -76,7 +77,7 @@ ioc_timer_interrupt(int irq, void *dev_i
static struct irqaction ioc_timer_irq = {
.name = "timer",
- .flags = SA_INTERRUPT,
+ .flags = SA_INTERRUPT | SA_NODELAY,
.handler = ioc_timer_interrupt
};
Index: linux/arch/arm/kernel/calls.S
===================================================================
--- linux.orig/arch/arm/kernel/calls.S
+++ linux/arch/arm/kernel/calls.S
@@ -7,11 +7,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
- * This file is included twice in entry-common.S
+ * NR_syscalls now defined in include/asm-arm/unistd.h - tglx
*/
-#ifndef NR_syscalls
-#define NR_syscalls 328
-#else
__syscall_start:
/* 0 */ .long sys_restart_syscall
@@ -341,4 +338,3 @@ __syscall_end:
.rept NR_syscalls - (__syscall_end - __syscall_start) / 4
.long sys_ni_syscall
.endr
-#endif
Index: linux/arch/arm/kernel/dma.c
===================================================================
--- linux.orig/arch/arm/kernel/dma.c
+++ linux/arch/arm/kernel/dma.c
@@ -22,7 +22,7 @@
#include
-DEFINE_SPINLOCK(dma_spin_lock);
+DEFINE_RAW_SPINLOCK(dma_spin_lock);
#if MAX_DMA_CHANNELS > 0
Index: linux/arch/arm/kernel/ecard.c
===================================================================
--- linux.orig/arch/arm/kernel/ecard.c
+++ linux/arch/arm/kernel/ecard.c
@@ -619,7 +619,7 @@ ecard_irqexp_handler(unsigned int irq, s
ecard_t *ec = slot_to_ecard(slot);
if (ec->claimed) {
- struct irqdesc *d = irqdesc + ec->irq;
+ struct irqdesc *d = irq_desc + ec->irq;
/*
* this ugly code is so that we can operate a
* prioritorising system:
@@ -1052,6 +1052,9 @@ ecard_probe(int slot, card_type_t type)
return rc;
}
+static DEFINE_IRQ_CHAINED_TYPE(ecard_irqexp_handler);
+static DEFINE_IRQ_CHAINED_TYPE(ecard_irq_handler);
+
/*
* Initialise the expansion card system.
* Locate all hardware - interrupt management and
@@ -1081,8 +1084,10 @@ static int __init ecard_init(void)
irqhw = ecard_probeirqhw();
- set_irq_chained_handler(IRQ_EXPANSIONCARD,
- irqhw ? ecard_irqexp_handler : ecard_irq_handler);
+ if (irqhw)
+ set_irq_chained_handler(IRQ_EXPANSIONCARD, ecard_irqexp_handler);
+ else
+ set_irq_chained_handler(IRQ_EXPANSIONCARD, ecard_irq_handler);
ecard_proc_init();
Index: linux/arch/arm/kernel/entry-armv.S
===================================================================
--- linux.orig/arch/arm/kernel/entry-armv.S
+++ linux/arch/arm/kernel/entry-armv.S
@@ -184,7 +184,7 @@ __irq_svc:
irq_handler
#ifdef CONFIG_PREEMPT
ldr r0, [tsk, #TI_FLAGS] @ get flags
- tst r0, #_TIF_NEED_RESCHED
+ tst r0, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED
blne svc_preempt
preempt_return:
ldr r0, [tsk, #TI_PREEMPT] @ read preempt value
@@ -211,7 +211,7 @@ svc_preempt:
str r7, [tsk, #TI_PREEMPT] @ expects preempt_count == 0
1: bl preempt_schedule_irq @ irq en/disable is done inside
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
- tst r0, #_TIF_NEED_RESCHED
+ tst r0, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED
beq preempt_return @ go again
b 1b
#endif
Index: linux/arch/arm/kernel/entry-common.S
===================================================================
--- linux.orig/arch/arm/kernel/entry-common.S
+++ linux/arch/arm/kernel/entry-common.S
@@ -3,6 +3,8 @@
*
* Copyright (C) 2000 Russell King
*
+ * LATENCY_TRACE/mcount support (C) 2005 Timesys john.cooper@timesys.com
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
@@ -41,7 +43,7 @@ ret_fast_syscall:
fast_work_pending:
str r0, [sp, #S_R0+S_OFF]! @ returned r0
work_pending:
- tst r1, #_TIF_NEED_RESCHED
+ tst r1, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED
bne work_resched
tst r1, #_TIF_NOTIFY_RESUME | _TIF_SIGPENDING
beq no_work_pending
@@ -52,7 +54,8 @@ work_pending:
b no_work_pending
work_resched:
- bl schedule
+ bl __schedule
+
/*
* "slow" syscall return path. "why" tells us if this was a real syscall.
*/
@@ -88,8 +91,6 @@ ENTRY(ret_from_fork)
b ret_slow_syscall
-#include "calls.S"
-
/*=============================================================================
* SWI handler
*-----------------------------------------------------------------------------
@@ -288,3 +289,110 @@ sys_mmap2:
str r5, [sp, #4]
b do_mmap2
#endif
+
+#ifdef CONFIG_FRAME_POINTER
+
+#ifdef CONFIG_MCOUNT
+/*
+ * At the point where we are in mcount() we maintain the
+ * frame of the prologue code and keep the call to mcount()
+ * out of the stack frame list:
+
+ saved pc <---\ caller of instrumented routine
+ saved lr |
+ ip/prev_sp |
+ fp -----^ |
+ : |
+ |
+ -> saved pc | instrumented routine
+ | saved lr |
+ | ip/prev_sp |
+ | fp ---------/
+ | :
+ |
+ | mcount
+ | saved pc
+ | saved lr
+ | ip/prev sp
+ -- fp
+ r3
+ r2
+ r1
+ sp-> r0
+ :
+ */
+
+ .text
+ .align 0
+ .type mcount %function
+ .global mcount
+
+/* gcc -pg generated FUNCTION_PROLOGUE references mcount()
+ * and has already created the stack frame invocation for
+ * the routine we have been called to instrument. We create
+ * a complete frame nevertheless, as we want to use the same
+ * call to mcount() from c code.
+ */
+mcount:
+
+ ldr ip, =mcount_enabled @ leave early, if disabled
+ ldr ip, [ip]
+ cmp ip, #0
+ moveq pc,lr
+
+ mov ip, sp
+ stmdb sp!, {r0 - r3, fp, ip, lr, pc} @ create stack frame
+
+ ldr r1, [fp, #-4] @ get lr (the return address
+ @ of the caller of the
+ @ instrumented function)
+ mov r0, lr @ get lr - (the return address
+ @ of the instrumented function)
+
+ sub fp, ip, #4 @ point fp at this frame
+
+ bl __trace
+1:
+ ldmdb fp, {r0 - r3, fp, sp, pc} @ pop entry frame and return
+
+#endif
+
+/* ARM replacement for unsupported gcc __builtin_return_address(n)
+ * where 0 < n. n == 0 is supported here as well.
+ *
+ * Walk up the stack frame until the desired frame is found or a NULL
+ * fp is encountered, return NULL in the latter case.
+ *
+ * Note: it is possible under code optimization for the stack invocation
+ * of an ancestor function (level N) to be removed before calling a
+ * descendant function (level N+1). No easy means is available to deduce
+ * this scenario with the result being [for example] caller_addr(0) when
+ * called from level N+1 returning level N-1 rather than the expected
+ * level N. This optimization issue appears isolated to the case of
+ * a call to a level N+1 routine made at the tail end of a level N
+ * routine -- the level N frame is deleted and a simple branch is made
+ * to the level N+1 routine.
+ */
+
+ .text
+ .align 0
+ .type arm_return_addr %function
+ .global arm_return_addr
+
+arm_return_addr:
+ mov ip, r0
+ mov r0, fp
+3:
+ cmp r0, #0
+ beq 1f @ frame list hit end, bail
+ cmp ip, #0
+ beq 2f @ reached desired frame
+ ldr r0, [r0, #-12] @ else continue, get next fp
+ sub ip, ip, #1
+ b 3b
+2:
+ ldr r0, [r0, #-4] @ get target return address
+1:
+ mov pc, lr
+
+#endif
Index: linux/arch/arm/kernel/fiq.c
===================================================================
--- linux.orig/arch/arm/kernel/fiq.c
+++ linux/arch/arm/kernel/fiq.c
@@ -38,6 +38,7 @@
#include
#include
#include
+#include
#include
#include
@@ -88,7 +89,7 @@ void set_fiq_handler(void *start, unsign
* disable irqs for the duration. Note - these functions are almost
* entirely coded in assembly.
*/
-void __attribute__((naked)) set_fiq_regs(struct pt_regs *regs)
+void notrace __attribute__((naked)) set_fiq_regs(struct pt_regs *regs)
{
register unsigned long tmp;
asm volatile (
@@ -106,7 +107,7 @@ void __attribute__((naked)) set_fiq_regs
: "r" (®s->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | FIQ_MODE));
}
-void __attribute__((naked)) get_fiq_regs(struct pt_regs *regs)
+void notrace __attribute__((naked)) get_fiq_regs(struct pt_regs *regs)
{
register unsigned long tmp;
asm volatile (
Index: linux/arch/arm/kernel/init_task.c
===================================================================
--- linux.orig/arch/arm/kernel/init_task.c
+++ linux/arch/arm/kernel/init_task.c
@@ -12,8 +12,8 @@
#include
#include
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
+static struct fs_struct init_fs = INIT_FS(init_fs);
+static struct files_struct init_files = INIT_FILES(init_files);
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
Index: linux/arch/arm/kernel/irq.c
===================================================================
--- linux.orig/arch/arm/kernel/irq.c
+++ linux/arch/arm/kernel/irq.c
@@ -27,6 +27,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -38,193 +39,11 @@
#include
#include
-#include
#include
-#include
#include
-/*
- * Maximum IRQ count. Currently, this is arbitary. However, it should
- * not be set too low to prevent false triggering. Conversely, if it
- * is set too high, then you could miss a stuck IRQ.
- *
- * Maybe we ought to set a timer and re-enable the IRQ at a later time?
- */
-#define MAX_IRQ_CNT 100000
-
-static int noirqdebug;
-static volatile unsigned long irq_err_count;
-static DEFINE_SPINLOCK(irq_controller_lock);
-static LIST_HEAD(irq_pending);
-
-struct irqdesc irq_desc[NR_IRQS];
void (*init_arch_irq)(void) __initdata = NULL;
-/*
- * No architecture-specific irq_finish function defined in arm/arch/irqs.h.
- */
-#ifndef irq_finish
-#define irq_finish(irq) do { } while (0)
-#endif
-
-/*
- * Dummy mask/unmask handler
- */
-void dummy_mask_unmask_irq(unsigned int irq)
-{
-}
-
-irqreturn_t no_action(int irq, void *dev_id, struct pt_regs *regs)
-{
- return IRQ_NONE;
-}
-
-void do_bad_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
-{
- irq_err_count += 1;
- printk(KERN_ERR "IRQ: spurious interrupt %d\n", irq);
-}
-
-static struct irqchip bad_chip = {
- .ack = dummy_mask_unmask_irq,
- .mask = dummy_mask_unmask_irq,
- .unmask = dummy_mask_unmask_irq,
-};
-
-static struct irqdesc bad_irq_desc = {
- .chip = &bad_chip,
- .handle = do_bad_IRQ,
- .pend = LIST_HEAD_INIT(bad_irq_desc.pend),
- .disable_depth = 1,
-};
-
-#ifdef CONFIG_SMP
-void synchronize_irq(unsigned int irq)
-{
- struct irqdesc *desc = irq_desc + irq;
-
- while (desc->running)
- barrier();
-}
-EXPORT_SYMBOL(synchronize_irq);
-
-#define smp_set_running(desc) do { desc->running = 1; } while (0)
-#define smp_clear_running(desc) do { desc->running = 0; } while (0)
-#else
-#define smp_set_running(desc) do { } while (0)
-#define smp_clear_running(desc) do { } while (0)
-#endif
-
-/**
- * disable_irq_nosync - disable an irq without waiting
- * @irq: Interrupt to disable
- *
- * Disable the selected interrupt line. Enables and disables
- * are nested. We do this lazily.
- *
- * This function may be called from IRQ context.
- */
-void disable_irq_nosync(unsigned int irq)
-{
- struct irqdesc *desc = irq_desc + irq;
- unsigned long flags;
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- desc->disable_depth++;
- list_del_init(&desc->pend);
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-EXPORT_SYMBOL(disable_irq_nosync);
-
-/**
- * disable_irq - disable an irq and wait for completion
- * @irq: Interrupt to disable
- *
- * Disable the selected interrupt line. Enables and disables
- * are nested. This functions waits for any pending IRQ
- * handlers for this interrupt to complete before returning.
- * If you use this function while holding a resource the IRQ
- * handler may need you will deadlock.
- *
- * This function may be called - with care - from IRQ context.
- */
-void disable_irq(unsigned int irq)
-{
- struct irqdesc *desc = irq_desc + irq;
-
- disable_irq_nosync(irq);
- if (desc->action)
- synchronize_irq(irq);
-}
-EXPORT_SYMBOL(disable_irq);
-
-/**
- * enable_irq - enable interrupt handling on an irq
- * @irq: Interrupt to enable
- *
- * Re-enables the processing of interrupts on this IRQ line.
- * Note that this may call the interrupt handler, so you may
- * get unexpected results if you hold IRQs disabled.
- *
- * This function may be called from IRQ context.
- */
-void enable_irq(unsigned int irq)
-{
- struct irqdesc *desc = irq_desc + irq;
- unsigned long flags;
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- if (unlikely(!desc->disable_depth)) {
- printk("enable_irq(%u) unbalanced from %p\n", irq,
- __builtin_return_address(0));
- } else if (!--desc->disable_depth) {
- desc->probing = 0;
- desc->chip->unmask(irq);
-
- /*
- * If the interrupt is waiting to be processed,
- * try to re-run it. We can't directly run it
- * from here since the caller might be in an
- * interrupt-protected region.
- */
- if (desc->pending && list_empty(&desc->pend)) {
- desc->pending = 0;
- if (!desc->chip->retrigger ||
- desc->chip->retrigger(irq))
- list_add(&desc->pend, &irq_pending);
- }
- }
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-EXPORT_SYMBOL(enable_irq);
-
-/*
- * Enable wake on selected irq
- */
-void enable_irq_wake(unsigned int irq)
-{
- struct irqdesc *desc = irq_desc + irq;
- unsigned long flags;
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- if (desc->chip->set_wake)
- desc->chip->set_wake(irq, 1);
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-EXPORT_SYMBOL(enable_irq_wake);
-
-void disable_irq_wake(unsigned int irq)
-{
- struct irqdesc *desc = irq_desc + irq;
- unsigned long flags;
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- if (desc->chip->set_wake)
- desc->chip->set_wake(irq, 0);
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-EXPORT_SYMBOL(disable_irq_wake);
-
int show_interrupts(struct seq_file *p, void *v)
{
int i = *(loff_t *) v, cpu;
@@ -243,7 +62,7 @@ int show_interrupts(struct seq_file *p,
}
if (i < NR_IRQS) {
- spin_lock_irqsave(&irq_controller_lock, flags);
+ spin_lock_irqsave(&irq_desc[i].lock, flags);
action = irq_desc[i].action;
if (!action)
goto unlock;
@@ -257,7 +76,7 @@ int show_interrupts(struct seq_file *p,
seq_putc(p, '\n');
unlock:
- spin_unlock_irqrestore(&irq_controller_lock, flags);
+ spin_unlock_irqrestore(&irq_desc[i].lock, flags);
} else if (i == NR_IRQS) {
#ifdef CONFIG_ARCH_ACORN
show_fiq_list(p, v);
@@ -265,374 +84,83 @@ unlock:
#ifdef CONFIG_SMP
show_ipi_list(p);
#endif
+#ifdef FIXME_TGLX
seq_printf(p, "Err: %10lu\n", irq_err_count);
- }
- return 0;
-}
-
-/*
- * IRQ lock detection.
- *
- * Hopefully, this should get us out of a few locked situations.
- * However, it may take a while for this to happen, since we need
- * a large number if IRQs to appear in the same jiffie with the
- * same instruction pointer (or within 2 instructions).
- */
-static int check_irq_lock(struct irqdesc *desc, int irq, struct pt_regs *regs)
-{
- unsigned long instr_ptr = instruction_pointer(regs);
-
- if (desc->lck_jif == jiffies &&
- desc->lck_pc >= instr_ptr && desc->lck_pc < instr_ptr + 8) {
- desc->lck_cnt += 1;
-
- if (desc->lck_cnt > MAX_IRQ_CNT) {
- printk(KERN_ERR "IRQ LOCK: IRQ%d is locking the system, disabled\n", irq);
- return 1;
- }
- } else {
- desc->lck_cnt = 0;
- desc->lck_pc = instruction_pointer(regs);
- desc->lck_jif = jiffies;
- }
- return 0;
-}
-
-static void
-report_bad_irq(unsigned int irq, struct pt_regs *regs, struct irqdesc *desc, int ret)
-{
- static int count = 100;
- struct irqaction *action;
-
- if (!count || noirqdebug)
- return;
-
- count--;
-
- if (ret != IRQ_HANDLED && ret != IRQ_NONE) {
- printk("irq%u: bogus retval mask %x\n", irq, ret);
- } else {
- printk("irq%u: nobody cared\n", irq);
- }
- show_regs(regs);
- dump_stack();
- printk(KERN_ERR "handlers:");
- action = desc->action;
- do {
- printk("\n" KERN_ERR "[<%p>]", action->handler);
- print_symbol(" (%s)", (unsigned long)action->handler);
- action = action->next;
- } while (action);
- printk("\n");
-}
-
-static int
-__do_irq(unsigned int irq, struct irqaction *action, struct pt_regs *regs)
-{
- unsigned int status;
- int ret, retval = 0;
-
- spin_unlock(&irq_controller_lock);
-
-#ifdef CONFIG_NO_IDLE_HZ
- if (!(action->flags & SA_TIMER) && system_timer->dyn_tick != NULL) {
- write_seqlock(&xtime_lock);
- if (system_timer->dyn_tick->state & DYN_TICK_ENABLED)
- system_timer->dyn_tick->handler(irq, 0, regs);
- write_sequnlock(&xtime_lock);
- }
#endif
-
- if (!(action->flags & SA_INTERRUPT))
- local_irq_enable();
-
- status = 0;
- do {
- ret = action->handler(irq, action->dev_id, regs);
- if (ret == IRQ_HANDLED)
- status |= action->flags;
- retval |= ret;
- action = action->next;
- } while (action);
-
- if (status & SA_SAMPLE_RANDOM)
- add_interrupt_randomness(irq);
-
- spin_lock_irq(&irq_controller_lock);
-
- return retval;
-}
-
-/*
- * This is for software-decoded IRQs. The caller is expected to
- * handle the ack, clear, mask and unmask issues.
- */
-void
-do_simple_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
-{
- struct irqaction *action;
- const unsigned int cpu = smp_processor_id();
-
- desc->triggered = 1;
-
- kstat_cpu(cpu).irqs[irq]++;
-
- smp_set_running(desc);
-
- action = desc->action;
- if (action) {
- int ret = __do_irq(irq, action, regs);
- if (ret != IRQ_HANDLED)
- report_bad_irq(irq, regs, desc, ret);
- }
-
- smp_clear_running(desc);
-}
-
-/*
- * Most edge-triggered IRQ implementations seem to take a broken
- * approach to this. Hence the complexity.
- */
-void
-do_edge_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
-{
- const unsigned int cpu = smp_processor_id();
-
- desc->triggered = 1;
-
- /*
- * If we're currently running this IRQ, or its disabled,
- * we shouldn't process the IRQ. Instead, turn on the
- * hardware masks.
- */
- if (unlikely(desc->running || desc->disable_depth))
- goto running;
-
- /*
- * Acknowledge and clear the IRQ, but don't mask it.
- */
- desc->chip->ack(irq);
-
- /*
- * Mark the IRQ currently in progress.
- */
- desc->running = 1;
-
- kstat_cpu(cpu).irqs[irq]++;
-
- do {
- struct irqaction *action;
-
- action = desc->action;
- if (!action)
- break;
-
- if (desc->pending && !desc->disable_depth) {
- desc->pending = 0;
- desc->chip->unmask(irq);
- }
-
- __do_irq(irq, action, regs);
- } while (desc->pending && !desc->disable_depth);
-
- desc->running = 0;
-
- /*
- * If we were disabled or freed, shut down the handler.
- */
- if (likely(desc->action && !check_irq_lock(desc, irq, regs)))
- return;
-
- running:
- /*
- * We got another IRQ while this one was masked or
- * currently running. Delay it.
- */
- desc->pending = 1;
- desc->chip->mask(irq);
- desc->chip->ack(irq);
-}
-
-/*
- * Level-based IRQ handler. Nice and simple.
- */
-void
-do_level_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
-{
- struct irqaction *action;
- const unsigned int cpu = smp_processor_id();
-
- desc->triggered = 1;
-
- /*
- * Acknowledge, clear _AND_ disable the interrupt.
- */
- desc->chip->ack(irq);
-
- if (likely(!desc->disable_depth)) {
- kstat_cpu(cpu).irqs[irq]++;
-
- smp_set_running(desc);
-
- /*
- * Return with this interrupt masked if no action
- */
- action = desc->action;
- if (action) {
- int ret = __do_irq(irq, desc->action, regs);
-
- if (ret != IRQ_HANDLED)
- report_bad_irq(irq, regs, desc, ret);
-
- if (likely(!desc->disable_depth &&
- !check_irq_lock(desc, irq, regs)))
- desc->chip->unmask(irq);
- }
-
- smp_clear_running(desc);
}
+ return 0;
}
-static void do_pending_irqs(struct pt_regs *regs)
-{
- struct list_head head, *l, *n;
-
- do {
- struct irqdesc *desc;
-
- /*
- * First, take the pending interrupts off the list.
- * The act of calling the handlers may add some IRQs
- * back onto the list.
- */
- head = irq_pending;
- INIT_LIST_HEAD(&irq_pending);
- head.next->prev = &head;
- head.prev->next = &head;
-
- /*
- * Now run each entry. We must delete it from our
- * list before calling the handler.
- */
- list_for_each_safe(l, n, &head) {
- desc = list_entry(l, struct irqdesc, pend);
- list_del_init(&desc->pend);
- desc_handle_irq(desc - irq_desc, desc, regs);
- }
-
- /*
- * The list must be empty.
- */
- BUG_ON(!list_empty(&head));
- } while (!list_empty(&irq_pending));
-}
+/* Handle bad interrupts */
+static struct irq_desc bad_irq = {
+ .handler = &no_irq_type,
+ .lock = RAW_SPIN_LOCK_UNLOCKED
+};
/*
- * do_IRQ handles all hardware IRQ's. Decoded IRQs should not
+ * asm_do_IRQ handles all hardware IRQ's. Decoded IRQs should not
* come via this function. Instead, they should provide their
* own 'handler'
*/
-asmlinkage void asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
+asmlinkage notrace void asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
{
struct irqdesc *desc = irq_desc + irq;
+ trace_special(instruction_pointer(regs), irq, 0);
+
/*
* Some hardware gives randomly wrong interrupts. Rather
* than crashing, do something sensible.
*/
if (irq >= NR_IRQS)
- desc = &bad_irq_desc;
+ desc = &bad_irq;
irq_enter();
- spin_lock(&irq_controller_lock);
- desc_handle_irq(irq, desc, regs);
-
- /*
- * Now re-run any pending interrupts.
- */
- if (!list_empty(&irq_pending))
- do_pending_irqs(regs);
- irq_finish(irq);
+ desc_handle_irq(irq, desc, regs);
- spin_unlock(&irq_controller_lock);
irq_exit();
}
-void __set_irq_handler(unsigned int irq, irq_handler_t handle, int is_chained)
+void __set_irq_handler(unsigned int irq, struct irq_type *type, int is_chained)
{
struct irqdesc *desc;
unsigned long flags;
if (irq >= NR_IRQS) {
- printk(KERN_ERR "Trying to install handler for IRQ%d\n", irq);
+ printk(KERN_ERR "Trying to install type control for IRQ%d\n", irq);
return;
}
- if (handle == NULL)
- handle = do_bad_IRQ;
-
desc = irq_desc + irq;
- if (is_chained && desc->chip == &bad_chip)
- printk(KERN_WARNING "Trying to install chained handler for IRQ%d\n", irq);
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- if (handle == do_bad_IRQ) {
- desc->chip->mask(irq);
- desc->chip->ack(irq);
- desc->disable_depth = 1;
- }
- desc->handle = handle;
- if (handle != do_bad_IRQ && is_chained) {
- desc->valid = 0;
- desc->probe_ok = 0;
- desc->disable_depth = 0;
- desc->chip->unmask(irq);
+ /* Uninstall ? */
+ if (type == NULL || type == &no_irq_type) {
+ spin_lock_irqsave(&desc->lock, flags);
+ if (desc->chip) {
+ desc->chip->mask(irq);
+ desc->chip->ack(irq);
+ }
+ desc->depth = 1;
+ spin_unlock_irqrestore(&desc->lock, flags);
}
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-
-void set_irq_chip(unsigned int irq, struct irqchip *chip)
-{
- struct irqdesc *desc;
- unsigned long flags;
- if (irq >= NR_IRQS) {
- printk(KERN_ERR "Trying to install chip for IRQ%d\n", irq);
+ /* Install the irq_type */
+ if (generic_set_irq_type(irq, type))
return;
- }
-
- if (chip == NULL)
- chip = &bad_chip;
-
- desc = irq_desc + irq;
- spin_lock_irqsave(&irq_controller_lock, flags);
- desc->chip = chip;
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-int set_irq_type(unsigned int irq, unsigned int type)
-{
- struct irqdesc *desc;
- unsigned long flags;
- int ret = -ENXIO;
+ spin_lock_irqsave(&desc->lock, flags);
+ if (is_chained && (desc->handler == &no_irq_type || !desc->chip))
+ printk(KERN_WARNING "Trying to install chained interrupt type for IRQ%d\n", irq);
- if (irq >= NR_IRQS) {
- printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
- return -ENODEV;
- }
-
- desc = irq_desc + irq;
- if (desc->chip->set_type) {
- spin_lock_irqsave(&irq_controller_lock, flags);
- ret = desc->chip->set_type(irq, type);
- spin_unlock_irqrestore(&irq_controller_lock, flags);
+ if (type != NULL && is_chained) {
+ desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
+ desc->depth = 0;
+ if (desc->chip)
+ desc->chip->unmask(irq);
}
-
- return ret;
+ spin_unlock_irqrestore(&desc->lock, flags);
}
-EXPORT_SYMBOL(set_irq_type);
void set_irq_flags(unsigned int irq, unsigned int iflags)
{
@@ -645,408 +173,28 @@ void set_irq_flags(unsigned int irq, uns
}
desc = irq_desc + irq;
- spin_lock_irqsave(&irq_controller_lock, flags);
- desc->valid = (iflags & IRQF_VALID) != 0;
- desc->probe_ok = (iflags & IRQF_PROBE) != 0;
- desc->noautoenable = (iflags & IRQF_NOAUTOEN) != 0;
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-
-int setup_irq(unsigned int irq, struct irqaction *new)
-{
- int shared = 0;
- struct irqaction *old, **p;
- unsigned long flags;
- struct irqdesc *desc;
-
- /*
- * Some drivers like serial.c use request_irq() heavily,
- * so we have to be careful not to interfere with a
- * running system.
- */
- if (new->flags & SA_SAMPLE_RANDOM) {
- /*
- * This function might sleep, we want to call it first,
- * outside of the atomic block.
- * Yes, this might clear the entropy pool if the wrong
- * driver is attempted to be loaded, without actually
- * installing a new handler, but is this really a problem,
- * only the sysadmin is able to do this.
- */
- rand_initialize_irq(irq);
- }
-
- /*
- * The following block of code has to be executed atomically
- */
- desc = irq_desc + irq;
- spin_lock_irqsave(&irq_controller_lock, flags);
- p = &desc->action;
- if ((old = *p) != NULL) {
- /* Can't share interrupts unless both agree to */
- if (!(old->flags & new->flags & SA_SHIRQ)) {
- spin_unlock_irqrestore(&irq_controller_lock, flags);
- return -EBUSY;
- }
-
- /* add new interrupt at end of irq queue */
- do {
- p = &old->next;
- old = *p;
- } while (old);
- shared = 1;
- }
-
- *p = new;
-
- if (!shared) {
- desc->probing = 0;
- desc->running = 0;
- desc->pending = 0;
- desc->disable_depth = 1;
- if (!desc->noautoenable) {
- desc->disable_depth = 0;
- desc->chip->unmask(irq);
- }
- }
-
- spin_unlock_irqrestore(&irq_controller_lock, flags);
- return 0;
-}
-
-/**
- * request_irq - allocate an interrupt line
- * @irq: Interrupt line to allocate
- * @handler: Function to be called when the IRQ occurs
- * @irqflags: Interrupt type flags
- * @devname: An ascii name for the claiming device
- * @dev_id: A cookie passed back to the handler function
- *
- * This call allocates interrupt resources and enables the
- * interrupt line and IRQ handling. From the point this
- * call is made your handler function may be invoked. Since
- * your handler function must clear any interrupt the board
- * raises, you must take care both to initialise your hardware
- * and to set up the interrupt handler in the right order.
- *
- * Dev_id must be globally unique. Normally the address of the
- * device data structure is used as the cookie. Since the handler
- * receives this value it makes sense to use it.
- *
- * If your interrupt is shared you must pass a non NULL dev_id
- * as this is required when freeing the interrupt.
- *
- * Flags:
- *
- * SA_SHIRQ Interrupt is shared
- *
- * SA_INTERRUPT Disable local interrupts while processing
- *
- * SA_SAMPLE_RANDOM The interrupt can be used for entropy
- *
- */
-int request_irq(unsigned int irq, irqreturn_t (*handler)(int, void *, struct pt_regs *),
- unsigned long irq_flags, const char * devname, void *dev_id)
-{
- unsigned long retval;
- struct irqaction *action;
-
- if (irq >= NR_IRQS || !irq_desc[irq].valid || !handler ||
- (irq_flags & SA_SHIRQ && !dev_id))
- return -EINVAL;
-
- action = (struct irqaction *)kmalloc(sizeof(struct irqaction), GFP_KERNEL);
- if (!action)
- return -ENOMEM;
-
- action->handler = handler;
- action->flags = irq_flags;
- cpus_clear(action->mask);
- action->name = devname;
- action->next = NULL;
- action->dev_id = dev_id;
-
- retval = setup_irq(irq, action);
-
- if (retval)
- kfree(action);
- return retval;
-}
-
-EXPORT_SYMBOL(request_irq);
-
-/**
- * free_irq - free an interrupt
- * @irq: Interrupt line to free
- * @dev_id: Device identity to free
- *
- * Remove an interrupt handler. The handler is removed and if the
- * interrupt line is no longer in use by any driver it is disabled.
- * On a shared IRQ the caller must ensure the interrupt is disabled
- * on the card it drives before calling this function.
- *
- * This function must not be called from interrupt context.
- */
-void free_irq(unsigned int irq, void *dev_id)
-{
- struct irqaction * action, **p;
- unsigned long flags;
-
- if (irq >= NR_IRQS || !irq_desc[irq].valid) {
- printk(KERN_ERR "Trying to free IRQ%d\n",irq);
- dump_stack();
- return;
- }
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) {
- if (action->dev_id != dev_id)
- continue;
-
- /* Found it - now free it */
- *p = action->next;
- break;
- }
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-
- if (!action) {
- printk(KERN_ERR "Trying to free free IRQ%d\n",irq);
- dump_stack();
- } else {
- synchronize_irq(irq);
- kfree(action);
- }
-}
-
-EXPORT_SYMBOL(free_irq);
-
-static DECLARE_MUTEX(probe_sem);
-
-/* Start the interrupt probing. Unlike other architectures,
- * we don't return a mask of interrupts from probe_irq_on,
- * but return the number of interrupts enabled for the probe.
- * The interrupts which have been enabled for probing is
- * instead recorded in the irq_desc structure.
- */
-unsigned long probe_irq_on(void)
-{
- unsigned int i, irqs = 0;
- unsigned long delay;
-
- down(&probe_sem);
-
- /*
- * first snaffle up any unassigned but
- * probe-able interrupts
- */
- spin_lock_irq(&irq_controller_lock);
- for (i = 0; i < NR_IRQS; i++) {
- if (!irq_desc[i].probe_ok || irq_desc[i].action)
- continue;
-
- irq_desc[i].probing = 1;
- irq_desc[i].triggered = 0;
- if (irq_desc[i].chip->set_type)
- irq_desc[i].chip->set_type(i, IRQT_PROBE);
- irq_desc[i].chip->unmask(i);
- irqs += 1;
- }
- spin_unlock_irq(&irq_controller_lock);
-
- /*
- * wait for spurious interrupts to mask themselves out again
- */
- for (delay = jiffies + HZ/10; time_before(jiffies, delay); )
- /* min 100ms delay */;
-
- /*
- * now filter out any obviously spurious interrupts
- */
- spin_lock_irq(&irq_controller_lock);
- for (i = 0; i < NR_IRQS; i++) {
- if (irq_desc[i].probing && irq_desc[i].triggered) {
- irq_desc[i].probing = 0;
- irqs -= 1;
- }
- }
- spin_unlock_irq(&irq_controller_lock);
-
- return irqs;
-}
-
-EXPORT_SYMBOL(probe_irq_on);
-
-unsigned int probe_irq_mask(unsigned long irqs)
-{
- unsigned int mask = 0, i;
-
- spin_lock_irq(&irq_controller_lock);
- for (i = 0; i < 16 && i < NR_IRQS; i++)
- if (irq_desc[i].probing && irq_desc[i].triggered)
- mask |= 1 << i;
- spin_unlock_irq(&irq_controller_lock);
-
- up(&probe_sem);
-
- return mask;
-}
-EXPORT_SYMBOL(probe_irq_mask);
-
-/*
- * Possible return values:
- * >= 0 - interrupt number
- * -1 - no interrupt/many interrupts
- */
-int probe_irq_off(unsigned long irqs)
-{
- unsigned int i;
- int irq_found = NO_IRQ;
-
- /*
- * look at the interrupts, and find exactly one
- * that we were probing has been triggered
- */
- spin_lock_irq(&irq_controller_lock);
- for (i = 0; i < NR_IRQS; i++) {
- if (irq_desc[i].probing &&
- irq_desc[i].triggered) {
- if (irq_found != NO_IRQ) {
- irq_found = NO_IRQ;
- goto out;
- }
- irq_found = i;
- }
- }
-
- if (irq_found == -1)
- irq_found = NO_IRQ;
-out:
- spin_unlock_irq(&irq_controller_lock);
-
- up(&probe_sem);
-
- return irq_found;
-}
-
-EXPORT_SYMBOL(probe_irq_off);
-
-#ifdef CONFIG_SMP
-static void route_irq(struct irqdesc *desc, unsigned int irq, unsigned int cpu)
-{
- pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->cpu, cpu);
-
- spin_lock_irq(&irq_controller_lock);
- desc->cpu = cpu;
- desc->chip->set_cpu(desc, irq, cpu);
- spin_unlock_irq(&irq_controller_lock);
-}
-
-#ifdef CONFIG_PROC_FS
-static int
-irq_affinity_read_proc(char *page, char **start, off_t off, int count,
- int *eof, void *data)
-{
- struct irqdesc *desc = irq_desc + ((int)data);
- int len = cpumask_scnprintf(page, count, desc->affinity);
-
- if (count - len < 2)
- return -EINVAL;
- page[len++] = '\n';
- page[len] = '\0';
-
- return len;
-}
-
-static int
-irq_affinity_write_proc(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
-{
- unsigned int irq = (unsigned int)data;
- struct irqdesc *desc = irq_desc + irq;
- cpumask_t affinity, tmp;
- int ret = -EIO;
-
- if (!desc->chip->set_cpu)
- goto out;
-
- ret = cpumask_parse(buffer, count, affinity);
- if (ret)
- goto out;
-
- cpus_and(tmp, affinity, cpu_online_map);
- if (cpus_empty(tmp)) {
- ret = -EINVAL;
- goto out;
- }
-
- desc->affinity = affinity;
- route_irq(desc, irq, first_cpu(tmp));
- ret = count;
-
- out:
- return ret;
-}
-#endif
-#endif
-
-void __init init_irq_proc(void)
-{
-#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
- struct proc_dir_entry *dir;
- int irq;
-
- dir = proc_mkdir("irq", 0);
- if (!dir)
- return;
-
- for (irq = 0; irq < NR_IRQS; irq++) {
- struct proc_dir_entry *entry;
- struct irqdesc *desc;
- char name[16];
-
- desc = irq_desc + irq;
- memset(name, 0, sizeof(name));
- snprintf(name, sizeof(name) - 1, "%u", irq);
-
- desc->procdir = proc_mkdir(name, dir);
- if (!desc->procdir)
- continue;
-
- entry = create_proc_entry("smp_affinity", 0600, desc->procdir);
- if (entry) {
- entry->nlink = 1;
- entry->data = (void *)irq;
- entry->read_proc = irq_affinity_read_proc;
- entry->write_proc = irq_affinity_write_proc;
- }
- }
-#endif
+ spin_lock_irqsave(&desc->lock, flags);
+ desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
+ if (iflags & IRQF_VALID)
+ desc->status &= ~IRQ_NOREQUEST;
+ if (iflags & IRQF_PROBE)
+ desc->status &= ~IRQ_NOPROBE;
+ spin_unlock_irqrestore(&desc->lock, flags);
}
void __init init_IRQ(void)
{
- struct irqdesc *desc;
extern void init_dma(void);
int irq;
+ for (irq = 0; irq < NR_IRQS; irq++)
+ irq_desc[irq].status |= IRQ_NOREQUEST;
+
#ifdef CONFIG_SMP
bad_irq_desc.affinity = CPU_MASK_ALL;
bad_irq_desc.cpu = smp_processor_id();
#endif
- for (irq = 0, desc = irq_desc; irq < NR_IRQS; irq++, desc++) {
- *desc = bad_irq_desc;
- INIT_LIST_HEAD(&desc->pend);
- }
-
init_arch_irq();
init_dma();
}
-
-static int __init noirqdebug_setup(char *str)
-{
- noirqdebug = 1;
- return 1;
-}
-
-__setup("noirqdebug", noirqdebug_setup);
Index: linux/arch/arm/kernel/process.c
===================================================================
--- linux.orig/arch/arm/kernel/process.c
+++ linux/arch/arm/kernel/process.c
@@ -85,12 +85,12 @@ EXPORT_SYMBOL(pm_power_off);
*/
void default_idle(void)
{
- local_irq_disable();
+ raw_local_irq_disable();
if (!need_resched() && !hlt_counter) {
timer_dyn_reprogram();
arch_idle();
}
- local_irq_enable();
+ raw_local_irq_enable();
}
/*
@@ -112,8 +112,8 @@ void cpu_idle(void)
while (!need_resched())
idle();
leds_event(led_idle_end);
- preempt_enable();
- schedule();
+ __preempt_enable_no_resched();
+ __schedule();
}
}
Index: linux/arch/arm/kernel/semaphore.c
===================================================================
--- linux.orig/arch/arm/kernel/semaphore.c
+++ linux/arch/arm/kernel/semaphore.c
@@ -49,14 +49,14 @@
* we cannot lose wakeup events.
*/
-void __up(struct semaphore *sem)
+fastcall void __attribute_used__ __compat_up(struct compat_semaphore *sem)
{
wake_up(&sem->wait);
}
static DEFINE_SPINLOCK(semaphore_lock);
-void __sched __down(struct semaphore * sem)
+fastcall void __attribute_used__ __sched __compat_down(struct compat_semaphore * sem)
{
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
@@ -89,7 +89,7 @@ void __sched __down(struct semaphore * s
wake_up(&sem->wait);
}
-int __sched __down_interruptible(struct semaphore * sem)
+fastcall int __attribute_used__ __sched __compat_down_interruptible(struct compat_semaphore * sem)
{
int retval = 0;
struct task_struct *tsk = current;
@@ -148,7 +148,7 @@ int __sched __down_interruptible(struct
* single "cmpxchg" without failure cases,
* but then it wouldn't work on a 386.
*/
-int __down_trylock(struct semaphore * sem)
+fastcall int __attribute_used__ __compat_down_trylock(struct compat_semaphore * sem)
{
int sleepers;
unsigned long flags;
@@ -168,6 +168,11 @@ int __down_trylock(struct semaphore * se
return 1;
}
+fastcall int compat_sem_is_locked(struct compat_semaphore *sem)
+{
+ return (int) atomic_read(&sem->count) < 0;
+}
+
/*
* The semaphore operations have a special calling sequence that
* allow us to do a simpler in-line version of them. These routines
@@ -184,7 +189,7 @@ asm(" .section .sched.text,\"ax\",%progb
__down_failed: \n\
stmfd sp!, {r0 - r3, lr} \n\
mov r0, ip \n\
- bl __down \n\
+ bl __compat_down \n\
ldmfd sp!, {r0 - r3, pc} \n\
\n\
.align 5 \n\
@@ -192,7 +197,7 @@ __down_failed: \n\
__down_interruptible_failed: \n\
stmfd sp!, {r0 - r3, lr} \n\
mov r0, ip \n\
- bl __down_interruptible \n\
+ bl __compat_down_interruptible \n\
mov ip, r0 \n\
ldmfd sp!, {r0 - r3, pc} \n\
\n\
@@ -201,7 +206,7 @@ __down_interruptible_failed: \n\
__down_trylock_failed: \n\
stmfd sp!, {r0 - r3, lr} \n\
mov r0, ip \n\
- bl __down_trylock \n\
+ bl __compat_down_trylock \n\
mov ip, r0 \n\
ldmfd sp!, {r0 - r3, pc} \n\
\n\
@@ -210,7 +215,7 @@ __down_trylock_failed: \n\
__up_wakeup: \n\
stmfd sp!, {r0 - r3, lr} \n\
mov r0, ip \n\
- bl __up \n\
+ bl __compat_up \n\
ldmfd sp!, {r0 - r3, pc} \n\
");
Index: linux/arch/arm/kernel/signal.c
===================================================================
--- linux.orig/arch/arm/kernel/signal.c
+++ linux/arch/arm/kernel/signal.c
@@ -689,6 +689,14 @@ static int do_signal(sigset_t *oldset, s
siginfo_t info;
int signr;
+#ifdef CONFIG_PREEMPT_RT
+ /*
+ * Fully-preemptible kernel does not need interrupts disabled:
+ */
+ raw_local_irq_enable();
+ preempt_check_resched();
+#endif
+
/*
* We want the common case to go fast, which
* is why we may in certain cases get here from
Index: linux/arch/arm/kernel/smp.c
===================================================================
--- linux.orig/arch/arm/kernel/smp.c
+++ linux/arch/arm/kernel/smp.c
@@ -56,6 +56,7 @@ struct ipi_data {
unsigned long bits;
};
+/* FIXME */
static DEFINE_PER_CPU(struct ipi_data, ipi_data) = {
.lock = SPIN_LOCK_UNLOCKED,
};
@@ -246,7 +247,7 @@ static void send_ipi_message(cpumask_t c
unsigned long flags;
unsigned int cpu;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
for_each_cpu_mask(cpu, callmap) {
struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
@@ -261,7 +262,7 @@ static void send_ipi_message(cpumask_t c
*/
smp_cross_call(callmap);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
/*
@@ -394,7 +395,7 @@ static void ipi_call_function(unsigned i
cpu_clear(cpu, data->unfinished);
}
-static DEFINE_SPINLOCK(stop_lock);
+static DEFINE_RAW_SPINLOCK(stop_lock);
/*
* ipi_cpu_stop - handle IPI from smp_send_stop()
@@ -409,7 +410,7 @@ static void ipi_cpu_stop(unsigned int cp
cpu_clear(cpu, cpu_online_map);
local_fiq_disable();
- local_irq_disable();
+ raw_local_irq_disable();
while (1)
cpu_relax();
Index: linux/arch/arm/kernel/time.c
===================================================================
--- linux.orig/arch/arm/kernel/time.c
+++ linux/arch/arm/kernel/time.c
@@ -36,10 +36,6 @@
#include
#include
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
/*
* Our system timer.
*/
Index: linux/arch/arm/kernel/traps.c
===================================================================
--- linux.orig/arch/arm/kernel/traps.c
+++ linux/arch/arm/kernel/traps.c
@@ -177,6 +177,8 @@ void dump_stack(void)
{
#ifdef CONFIG_DEBUG_ERRORS
__backtrace();
+ print_traces(current);
+ show_held_locks(current);
#endif
}
@@ -198,7 +200,7 @@ void show_stack(struct task_struct *tsk,
barrier();
}
-DEFINE_SPINLOCK(die_lock);
+DEFINE_RAW_SPINLOCK(die_lock);
/*
* This function is protected against re-entrancy.
@@ -244,7 +246,7 @@ void notify_die(const char *str, struct
}
static LIST_HEAD(undef_hook);
-static DEFINE_SPINLOCK(undef_lock);
+static DEFINE_RAW_SPINLOCK(undef_lock);
void register_undef_hook(struct undef_hook *hook)
{
@@ -336,7 +338,7 @@ asmlinkage void bad_mode(struct pt_regs
handler[reason], processor_modes[proc_mode]);
die("Oops - bad mode", regs, 0);
- local_irq_disable();
+ raw_local_irq_disable();
panic("bad mode");
}
Index: linux/arch/arm/mach-clps711x/p720t-leds.c
===================================================================
--- linux.orig/arch/arm/mach-clps711x/p720t-leds.c
+++ linux/arch/arm/mach-clps711x/p720t-leds.c
@@ -36,7 +36,7 @@ static void p720t_leds_event(led_event_t
unsigned long flags;
u32 pddr;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch(ledevt) {
case led_idle_start:
break;
@@ -53,7 +53,7 @@ static void p720t_leds_event(led_event_t
break;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
static int __init leds_init(void)
Index: linux/arch/arm/mach-clps711x/time.c
===================================================================
--- linux.orig/arch/arm/mach-clps711x/time.c
+++ linux/arch/arm/mach-clps711x/time.c
@@ -19,6 +19,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux/arch/arm/mach-clps7500/core.c
===================================================================
--- linux.orig/arch/arm/mach-clps7500/core.c
+++ linux/arch/arm/mach-clps7500/core.c
@@ -9,6 +9,7 @@
#include
#include
#include
+#include
#include
#include
#include
Index: linux/arch/arm/mach-ebsa110/core.c
===================================================================
--- linux.orig/arch/arm/mach-ebsa110/core.c
+++ linux/arch/arm/mach-ebsa110/core.c
@@ -56,14 +56,14 @@ static void __init ebsa110_init_irq(void
unsigned long flags;
unsigned int irq;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
__raw_writeb(0xff, IRQ_MCLR);
__raw_writeb(0x55, IRQ_MSET);
__raw_writeb(0x00, IRQ_MSET);
if (__raw_readb(IRQ_MASK) != 0x55)
while (1);
__raw_writeb(0xff, IRQ_MCLR); /* clear all interrupt enables */
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
for (irq = 0; irq < NR_IRQS; irq++) {
set_irq_chip(irq, &ebsa110_irq_chip);
Index: linux/arch/arm/mach-footbridge/dc21285-timer.c
===================================================================
--- linux.orig/arch/arm/mach-footbridge/dc21285-timer.c
+++ linux/arch/arm/mach-footbridge/dc21285-timer.c
@@ -6,6 +6,7 @@
*/
#include
#include
+#include
#include
Index: linux/arch/arm/mach-footbridge/isa-irq.c
===================================================================
--- linux.orig/arch/arm/mach-footbridge/isa-irq.c
+++ linux/arch/arm/mach-footbridge/isa-irq.c
@@ -102,6 +102,17 @@ static struct irqaction irq_cascade = {
static struct resource pic1_resource = { "pic1", 0x20, 0x3f };
static struct resource pic2_resource = { "pic2", 0xa0, 0xbf };
+static DEFINE_IRQ_CHAINED_TYPE(isa_irq_handler);
+
+static unsigned int startup_irq_disabled(unsigned int irq)
+{
+ return 0;
+}
+
+/* Interrupt type for irqs which must not be
+ * automatically enabled in reqeust_irq */
+static struct irq_type level_type_nostart;
+
void __init isa_init_irq(unsigned int host_irq)
{
unsigned int irq;
@@ -159,9 +170,11 @@ void __init isa_init_irq(unsigned int ho
* There appears to be a missing pull-up
* resistor on this line.
*/
- if (machine_is_netwinder())
- set_irq_flags(_ISA_IRQ(11), IRQF_VALID |
- IRQF_PROBE | IRQF_NOAUTOEN);
+ if (machine_is_netwinder()) {
+ level_type_nostart = default_level_type;
+ level_type_nostart.startup = startup_irq_disabled;
+ set_irq_handler(_ISA_IRQ(11), &level_type_nostart);
+ }
}
}
Index: linux/arch/arm/mach-footbridge/isa-timer.c
===================================================================
--- linux.orig/arch/arm/mach-footbridge/isa-timer.c
+++ linux/arch/arm/mach-footbridge/isa-timer.c
@@ -6,6 +6,7 @@
*/
#include
#include
+#include
#include
#include
Index: linux/arch/arm/mach-footbridge/netwinder-hw.c
===================================================================
--- linux.orig/arch/arm/mach-footbridge/netwinder-hw.c
+++ linux/arch/arm/mach-footbridge/netwinder-hw.c
@@ -68,7 +68,7 @@ static inline void wb977_ww(int reg, int
/*
* This is a lock for accessing ports GP1_IO_BASE and GP2_IO_BASE
*/
-DEFINE_SPINLOCK(gpio_lock);
+DEFINE_RAW_SPINLOCK(gpio_lock);
static unsigned int current_gpio_op;
static unsigned int current_gpio_io;
Index: linux/arch/arm/mach-footbridge/netwinder-leds.c
===================================================================
--- linux.orig/arch/arm/mach-footbridge/netwinder-leds.c
+++ linux/arch/arm/mach-footbridge/netwinder-leds.c
@@ -33,7 +33,7 @@ static char led_state;
static char hw_led_state;
static DEFINE_SPINLOCK(leds_lock);
-extern spinlock_t gpio_lock;
+extern raw_spinlock_t gpio_lock;
static void netwinder_leds_event(led_event_t evt)
{
Index: linux/arch/arm/mach-h720x/common.c
===================================================================
--- linux.orig/arch/arm/mach-h720x/common.c
+++ linux/arch/arm/mach-h720x/common.c
@@ -163,6 +163,11 @@ h720x_gpiod_demux_handler(unsigned int i
h720x_gpio_handler(mask, irq, desc, regs);
}
+static DEFINE_IRQ_CHAINED_TYPE(h720x_gpioa_demux_handler);
+static DEFINE_IRQ_CHAINED_TYPE(h720x_gpiob_demux_handler);
+static DEFINE_IRQ_CHAINED_TYPE(h720x_gpioc_demux_handler);
+static DEFINE_IRQ_CHAINED_TYPE(h720x_gpiod_demux_handler);
+
#ifdef CONFIG_CPU_H7202
static void
h720x_gpioe_demux_handler(unsigned int irq_unused, struct irqdesc *desc,
@@ -175,6 +180,7 @@ h720x_gpioe_demux_handler(unsigned int i
IRQDBG("%s mask: 0x%08x irq: %d\n",__FUNCTION__,mask,irq);
h720x_gpio_handler(mask, irq, desc, regs);
}
+static DEFINE_IRQ_CHAINED_TYPE(h720x_gpioe_demux_handler);
#endif
static struct irqchip h720x_global_chip = {
Index: linux/arch/arm/mach-h720x/cpu-h7202.c
===================================================================
--- linux.orig/arch/arm/mach-h720x/cpu-h7202.c
+++ linux/arch/arm/mach-h720x/cpu-h7202.c
@@ -175,6 +175,8 @@ static struct irqaction h7202_timer_irq
.handler = h7202_timer_interrupt,
};
+static DEFINE_IRQ_CHAINED_TYPE(h7202_timerx_demux_handler);
+
/*
* Setup TIMER0 as system timer
*/
Index: linux/arch/arm/mach-imx/dma.c
===================================================================
--- linux.orig/arch/arm/mach-imx/dma.c
+++ linux/arch/arm/mach-imx/dma.c
@@ -43,7 +43,7 @@ imx_request_dma(char *name, imx_dma_prio
if (!name || !irq_handler)
return -EINVAL;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
/* try grabbing a DMA channel with the requested priority */
for (i = prio; i < prio + (prio == DMA_PRIO_LOW) ? 8 : 4; i++) {
@@ -75,7 +75,7 @@ imx_request_dma(char *name, imx_dma_prio
i = -ENODEV;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return i;
}
@@ -91,10 +91,10 @@ imx_free_dma(int dma_ch)
return;
}
- local_irq_save(flags);
+ raw_local_irq_save(flags);
DIMR &= ~(1 << dma_ch);
dma_channels[dma_ch].name = NULL;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
static irqreturn_t
Index: linux/arch/arm/mach-imx/irq.c
===================================================================
--- linux.orig/arch/arm/mach-imx/irq.c
+++ linux/arch/arm/mach-imx/irq.c
@@ -217,6 +217,11 @@ static struct irqchip imx_gpio_chip = {
.set_type = imx_gpio_irq_type,
};
+static DEFINE_IRQ_CHAINED_TYPE(imx_gpioa_demux_handler);
+static DEFINE_IRQ_CHAINED_TYPE(imx_gpiob_demux_handler);
+static DEFINE_IRQ_CHAINED_TYPE(imx_gpioc_demux_handler);
+static DEFINE_IRQ_CHAINED_TYPE(imx_gpiod_demux_handler);
+
void __init
imx_init_irq(void)
{
Index: linux/arch/arm/mach-imx/leds-mx1ads.c
===================================================================
--- linux.orig/arch/arm/mach-imx/leds-mx1ads.c
+++ linux/arch/arm/mach-imx/leds-mx1ads.c
@@ -29,7 +29,7 @@ mx1ads_leds_event(led_event_t ledevt)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch (ledevt) {
#ifdef CONFIG_LEDS_CPU
@@ -49,5 +49,5 @@ mx1ads_leds_event(led_event_t ledevt)
default:
break;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux/arch/arm/mach-imx/time.c
===================================================================
--- linux.orig/arch/arm/mach-imx/time.c
+++ linux/arch/arm/mach-imx/time.c
@@ -13,6 +13,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux/arch/arm/mach-integrator/core.c
===================================================================
--- linux.orig/arch/arm/mach-integrator/core.c
+++ linux/arch/arm/mach-integrator/core.c
@@ -13,6 +13,7 @@
#include
#include
#include
+#include
#include
#include
@@ -117,7 +118,7 @@ arch_initcall(integrator_init);
#define CM_CTRL IO_ADDRESS(INTEGRATOR_HDR_BASE) + INTEGRATOR_HDR_CTRL_OFFSET
-static DEFINE_SPINLOCK(cm_lock);
+static DEFINE_RAW_SPINLOCK(cm_lock);
/**
* cm_control - update the CM_CTRL register.
Index: linux/arch/arm/mach-integrator/leds.c
===================================================================
--- linux.orig/arch/arm/mach-integrator/leds.c
+++ linux/arch/arm/mach-integrator/leds.c
@@ -41,7 +41,7 @@ static void integrator_leds_event(led_ev
unsigned int update_alpha_leds;
// yup, change the LEDs
- local_irq_save(flags);
+ raw_local_irq_save(flags);
update_alpha_leds = 0;
switch(ledevt) {
@@ -76,7 +76,7 @@ static void integrator_leds_event(led_ev
while (__raw_readl(dbg_base + INTEGRATOR_DBG_ALPHA_OFFSET) & 1);
__raw_writel(saved_leds, dbg_base + INTEGRATOR_DBG_LEDS_OFFSET);
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
static int __init leds_init(void)
Index: linux/arch/arm/mach-integrator/pci_v3.c
===================================================================
--- linux.orig/arch/arm/mach-integrator/pci_v3.c
+++ linux/arch/arm/mach-integrator/pci_v3.c
@@ -163,7 +163,7 @@
* 7:2 register number
*
*/
-static DEFINE_SPINLOCK(v3_lock);
+static DEFINE_RAW_SPINLOCK(v3_lock);
#define PCI_BUS_NONMEM_START 0x00000000
#define PCI_BUS_NONMEM_SIZE SZ_256M
Index: linux/arch/arm/mach-integrator/platsmp.c
===================================================================
--- linux.orig/arch/arm/mach-integrator/platsmp.c
+++ linux/arch/arm/mach-integrator/platsmp.c
@@ -31,7 +31,7 @@ extern void integrator_secondary_startup
volatile int __cpuinitdata pen_release = -1;
unsigned long __cpuinitdata phys_pen_release = 0;
-static DEFINE_SPINLOCK(boot_lock);
+static DEFINE_RAW_SPINLOCK(boot_lock);
void __cpuinit platform_secondary_init(unsigned int cpu)
{
Index: linux/arch/arm/mach-ixp2000/core.c
===================================================================
--- linux.orig/arch/arm/mach-ixp2000/core.c
+++ linux/arch/arm/mach-ixp2000/core.c
@@ -20,6 +20,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -286,9 +287,9 @@ void gpio_line_config(int line, int dire
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (direction == GPIO_OUT) {
- irq_desc[line + IRQ_IXP2000_GPIO0].valid = 0;
+ set_irq_flags(line + IRQ_IXP2000_GPIO0, 0);
/* if it's an output, it ain't an interrupt anymore */
GPIO_IRQ_falling_edge &= ~(1 << line);
@@ -301,7 +302,7 @@ void gpio_line_config(int line, int dire
} else if (direction == GPIO_IN) {
ixp2000_reg_write(IXP2000_GPIO_PDCR, 1 << line);
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
@@ -354,8 +355,7 @@ static int ixp2000_GPIO_irq_type(unsigne
/*
* Finally, mark the corresponding IRQ as valid.
*/
- irq_desc[irq].valid = 1;
-
+ set_irq_flags(irq, IRQF_VALID);
return 0;
}
@@ -425,6 +425,8 @@ static struct irqchip ixp2000_irq_chip =
.unmask = ixp2000_irq_unmask
};
+static DEFINE_IRQ_CHAINED_TYPE(ixp2000_GPIO_irq_handler);
+
void __init ixp2000_init_irq(void)
{
int irq;
Index: linux/arch/arm/mach-ixp2000/ixdp2x00.c
===================================================================
--- linux.orig/arch/arm/mach-ixp2000/ixdp2x00.c
+++ linux/arch/arm/mach-ixp2000/ixdp2x00.c
@@ -146,6 +146,8 @@ static struct irqchip ixdp2x00_cpld_irq_
.unmask = ixdp2x00_irq_unmask
};
+static DEFINE_IRQ_CHAINED_TYPE(ixdp2x00_irq_handler);
+
void ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_irqs)
{
unsigned int irq;
@@ -168,7 +170,7 @@ void ixdp2x00_init_irq(volatile unsigned
}
/* Hook into PCI interrupt */
- set_irq_chained_handler(IRQ_IXP2000_PCIB, &ixdp2x00_irq_handler);
+ set_irq_chained_handler(IRQ_IXP2000_PCIB, ixdp2x00_irq_handler);
}
/*************************************************************************
Index: linux/arch/arm/mach-ixp2000/ixdp2x01.c
===================================================================
--- linux.orig/arch/arm/mach-ixp2000/ixdp2x01.c
+++ linux/arch/arm/mach-ixp2000/ixdp2x01.c
@@ -95,6 +95,8 @@ static struct irqchip ixdp2x01_irq_chip
.unmask = ixdp2x01_irq_unmask
};
+static DEFINE_IRQ_CHAINED_TYPE(ixdp2x01_irq_handler);
+
/*
* We only do anything if we are the master NPU on the board.
* The slave NPU only has the ethernet chip going directly to
@@ -127,7 +129,7 @@ void __init ixdp2x01_init_irq(void)
}
/* Hook into PCI interrupts */
- set_irq_chained_handler(IRQ_IXP2000_PCIB, &ixdp2x01_irq_handler);
+ set_irq_chained_handler(IRQ_IXP2000_PCIB, ixdp2x01_irq_handler);
}
Index: linux/arch/arm/mach-ixp2000/pci.c
===================================================================
--- linux.orig/arch/arm/mach-ixp2000/pci.c
+++ linux/arch/arm/mach-ixp2000/pci.c
@@ -145,7 +145,7 @@ int ixp2000_pci_abort_handler(unsigned l
pci_master_aborts = 1;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
temp = *(IXP2000_PCI_CONTROL);
if (temp & ((1 << 8) | (1 << 5))) {
ixp2000_reg_write(IXP2000_PCI_CONTROL, temp);
@@ -158,7 +158,7 @@ int ixp2000_pci_abort_handler(unsigned l
temp = *(IXP2000_PCI_CMDSTAT);
}
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
/*
* If it was an imprecise abort, then we need to correct the
@@ -176,7 +176,7 @@ clear_master_aborts(void)
volatile u32 temp;
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
temp = *(IXP2000_PCI_CONTROL);
if (temp & ((1 << 8) | (1 << 5))) {
ixp2000_reg_write(IXP2000_PCI_CONTROL, temp);
@@ -189,7 +189,7 @@ clear_master_aborts(void)
temp = *(IXP2000_PCI_CMDSTAT);
}
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
Index: linux/arch/arm/mach-ixp4xx/common-pci.c
===================================================================
--- linux.orig/arch/arm/mach-ixp4xx/common-pci.c
+++ linux/arch/arm/mach-ixp4xx/common-pci.c
@@ -53,7 +53,7 @@ unsigned long ixp4xx_pci_reg_base = 0;
* these transactions are atomic or we will end up
* with corrupt data on the bus or in a driver.
*/
-static DEFINE_SPINLOCK(ixp4xx_pci_lock);
+static DEFINE_RAW_SPINLOCK(ixp4xx_pci_lock);
/*
* Read from PCI config space
Index: linux/arch/arm/mach-ixp4xx/coyote-pci.c
===================================================================
--- linux.orig/arch/arm/mach-ixp4xx/coyote-pci.c
+++ linux/arch/arm/mach-ixp4xx/coyote-pci.c
@@ -17,6 +17,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux/arch/arm/mach-ixp4xx/ixdp425-pci.c
===================================================================
--- linux.orig/arch/arm/mach-ixp4xx/ixdp425-pci.c
+++ linux/arch/arm/mach-ixp4xx/ixdp425-pci.c
@@ -16,6 +16,7 @@
#include
#include
+#include
#include
#include
#include
Index: linux/arch/arm/mach-ixp4xx/ixdpg425-pci.c
===================================================================
--- linux.orig/arch/arm/mach-ixp4xx/ixdpg425-pci.c
+++ linux/arch/arm/mach-ixp4xx/ixdpg425-pci.c
@@ -16,10 +16,10 @@
#include
#include
#include
+#include
#include
#include
-#include
#include
Index: linux/arch/arm/mach-l7200/core.c
===================================================================
--- linux.orig/arch/arm/mach-l7200/core.c
+++ linux/arch/arm/mach-l7200/core.c
@@ -7,6 +7,7 @@
*/
#include
#include
+#include
#include
#include
Index: linux/arch/arm/mach-lh7a40x/arch-kev7a400.c
===================================================================
--- linux.orig/arch/arm/mach-lh7a40x/arch-kev7a400.c
+++ linux/arch/arm/mach-lh7a40x/arch-kev7a400.c
@@ -72,6 +72,8 @@ static void kev7a400_cpld_handler (unsig
}
}
+static DEFINE_IRQ_CHAINED_TYPE(kev7a400_cpld_handler);
+
void __init lh7a40x_init_board_irq (void)
{
int irq;
Index: linux/arch/arm/mach-lh7a40x/arch-lpd7a40x.c
===================================================================
--- linux.orig/arch/arm/mach-lh7a40x/arch-lpd7a40x.c
+++ linux/arch/arm/mach-lh7a40x/arch-lpd7a40x.c
@@ -12,6 +12,7 @@
#include
#include
#include
+#include
#include
#include
@@ -173,6 +174,7 @@ static void lpd7a40x_cpld_handler (unsig
desc->chip->unmask (irq); /* Level-triggered need this */
}
+static DEFINE_IRQ_CHAINED_TYPE(lpd7a40x_cpld_handler);
void __init lh7a40x_init_board_irq (void)
{
Index: linux/arch/arm/mach-lh7a40x/irq-kev7a400.c
===================================================================
--- linux.orig/arch/arm/mach-lh7a40x/irq-kev7a400.c
+++ linux/arch/arm/mach-lh7a40x/irq-kev7a400.c
@@ -60,6 +60,8 @@ lh7a400_cpld_handler (unsigned int irq,
}
}
+static DEFINE_IRQ_CHAINED_TYPE(kev7a400_cpld_handler);
+
/* IRQ initialization */
void __init
Index: linux/arch/arm/mach-lh7a40x/irq-lpd7a40x.c
===================================================================
--- linux.orig/arch/arm/mach-lh7a40x/irq-lpd7a40x.c
+++ linux/arch/arm/mach-lh7a40x/irq-lpd7a40x.c
@@ -71,6 +71,7 @@ static void lh7a40x_cpld_handler (unsign
desc->chip->unmask (irq); /* Level-triggered need this */
}
+static DEFINE_IRQ_CHAINED_TYPE(lh7a40x_cpld_handler);
/* IRQ initialization */
Index: linux/arch/arm/mach-lh7a40x/time.c
===================================================================
--- linux.orig/arch/arm/mach-lh7a40x/time.c
+++ linux/arch/arm/mach-lh7a40x/time.c
@@ -12,6 +12,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux/arch/arm/mach-omap1/board-osk.c
===================================================================
--- linux.orig/arch/arm/mach-omap1/board-osk.c
+++ linux/arch/arm/mach-omap1/board-osk.c
@@ -29,7 +29,7 @@
#include
#include
#include
-#include
+#include
#include
#include
Index: linux/arch/arm/mach-omap1/fpga.c
===================================================================
--- linux.orig/arch/arm/mach-omap1/fpga.c
+++ linux/arch/arm/mach-omap1/fpga.c
@@ -120,6 +120,8 @@ static struct irqchip omap_fpga_irq = {
.unmask = fpga_unmask_irq,
};
+static DEFINE_IRQ_CHAINED_TYPE(innovator_fpga_IRQ_demux);
+
/*
* All of the FPGA interrupt request inputs except for the touchscreen are
* edge-sensitive; the touchscreen is level-sensitive. The edge-sensitive
Index: linux/arch/arm/mach-omap1/leds-h2p2-debug.c
===================================================================
--- linux.orig/arch/arm/mach-omap1/leds-h2p2-debug.c
+++ linux/arch/arm/mach-omap1/leds-h2p2-debug.c
@@ -45,7 +45,7 @@ void h2p2_dbg_leds_event(led_event_t evt
static struct h2p2_dbg_fpga __iomem *fpga;
static u16 led_state, hw_led_state;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (!(led_state & LED_STATE_ENABLED) && evt != led_start)
goto done;
@@ -140,5 +140,5 @@ void h2p2_dbg_leds_event(led_event_t evt
__raw_writew(~hw_led_state, &fpga->leds);
done:
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux/arch/arm/mach-omap1/serial.c
===================================================================
--- linux.orig/arch/arm/mach-omap1/serial.c
+++ linux/arch/arm/mach-omap1/serial.c
@@ -12,6 +12,7 @@
#include
#include
#include
+#include
#include
#include
#include
Index: linux/arch/arm/mach-pxa/dma.c
===================================================================
--- linux.orig/arch/arm/mach-pxa/dma.c
+++ linux/arch/arm/mach-pxa/dma.c
@@ -43,7 +43,7 @@ int pxa_request_dma (char *name, pxa_dma
if (!name || !irq_handler)
return -EINVAL;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
/* try grabbing a DMA channel with the requested priority */
for (i = prio; i < prio + PXA_DMA_NBCH(prio); i++) {
@@ -73,7 +73,7 @@ int pxa_request_dma (char *name, pxa_dma
i = -ENODEV;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return i;
}
@@ -88,10 +88,10 @@ void pxa_free_dma (int dma_ch)
return;
}
- local_irq_save(flags);
+ raw_local_irq_save(flags);
DCSR(dma_ch) = DCSR_STARTINTR|DCSR_ENDINTR|DCSR_BUSERR;
dma_channels[dma_ch].name = NULL;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
static irqreturn_t dma_irq_handler(int irq, void *dev_id, struct pt_regs *regs)
Index: linux/arch/arm/mach-pxa/generic.c
===================================================================
--- linux.orig/arch/arm/mach-pxa/generic.c
+++ linux/arch/arm/mach-pxa/generic.c
@@ -49,7 +49,7 @@ void pxa_gpio_mode(int gpio_mode)
int fn = (gpio_mode & GPIO_MD_MASK_FN) >> 8;
int gafr;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (gpio_mode & GPIO_DFLT_LOW)
GPCR(gpio) = GPIO_bit(gpio);
else if (gpio_mode & GPIO_DFLT_HIGH)
@@ -60,7 +60,7 @@ void pxa_gpio_mode(int gpio_mode)
GPDR(gpio) &= ~GPIO_bit(gpio);
gafr = GAFR(gpio) & ~(0x3 << (((gpio) & 0xf)*2));
GAFR(gpio) = gafr | (fn << (((gpio) & 0xf)*2));
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL(pxa_gpio_mode);
@@ -71,14 +71,14 @@ EXPORT_SYMBOL(pxa_gpio_mode);
void pxa_set_cken(int clock, int enable)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (enable)
CKEN |= clock;
else
CKEN &= ~clock;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL(pxa_set_cken);
Index: linux/arch/arm/mach-pxa/idp.c
===================================================================
--- linux.orig/arch/arm/mach-pxa/idp.c
+++ linux/arch/arm/mach-pxa/idp.c
@@ -18,6 +18,7 @@
#include
#include
+#include
#include
#include
Index: linux/arch/arm/mach-pxa/irq.c
===================================================================
--- linux.orig/arch/arm/mach-pxa/irq.c
+++ linux/arch/arm/mach-pxa/irq.c
@@ -244,6 +244,7 @@ static struct irqchip pxa_muxed_gpio_chi
.set_type = pxa_gpio_irq_type,
};
+static DEFINE_IRQ_CHAINED_TYPE(pxa_gpio_demux_handler);
void __init pxa_init_irq(void)
{
Index: linux/arch/arm/mach-pxa/leds-idp.c
===================================================================
--- linux.orig/arch/arm/mach-pxa/leds-idp.c
+++ linux/arch/arm/mach-pxa/leds-idp.c
@@ -34,7 +34,7 @@ void idp_leds_event(led_event_t evt)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch (evt) {
case led_start:
@@ -113,5 +113,5 @@ void idp_leds_event(led_event_t evt)
else
IDP_CPLD_LED_CONTROL |= IDP_LEDS_MASK;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux/arch/arm/mach-pxa/leds-lubbock.c
===================================================================
--- linux.orig/arch/arm/mach-pxa/leds-lubbock.c
+++ linux/arch/arm/mach-pxa/leds-lubbock.c
@@ -48,7 +48,7 @@ void lubbock_leds_event(led_event_t evt)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch (evt) {
case led_start:
@@ -122,5 +122,5 @@ void lubbock_leds_event(led_event_t evt)
else
LUB_DISC_BLNK_LED |= 0xff;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux/arch/arm/mach-pxa/leds-mainstone.c
===================================================================
--- linux.orig/arch/arm/mach-pxa/leds-mainstone.c
+++ linux/arch/arm/mach-pxa/leds-mainstone.c
@@ -43,7 +43,7 @@ void mainstone_leds_event(led_event_t ev
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch (evt) {
case led_start:
@@ -117,5 +117,5 @@ void mainstone_leds_event(led_event_t ev
else
MST_LEDCTRL |= 0xff;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux/arch/arm/mach-pxa/lubbock.c
===================================================================
--- linux.orig/arch/arm/mach-pxa/lubbock.c
+++ linux/arch/arm/mach-pxa/lubbock.c
@@ -47,9 +47,9 @@ void lubbock_set_misc_wr(unsigned int ma
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
LUB_MISC_WR = (LUB_MISC_WR & ~mask) | (set & mask);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL(lubbock_set_misc_wr);
@@ -90,6 +90,8 @@ static void lubbock_irq_handler(unsigned
} while (pending);
}
+static DEFINE_IRQ_CHAINED_TYPE(lubbock_irq_handler);
+
static void __init lubbock_init_irq(void)
{
int irq;
Index: linux/arch/arm/mach-pxa/mainstone.c
===================================================================
--- linux.orig/arch/arm/mach-pxa/mainstone.c
+++ linux/arch/arm/mach-pxa/mainstone.c
@@ -78,6 +78,8 @@ static void mainstone_irq_handler(unsign
} while (pending);
}
+static DEFINE_IRQ_CHAINED_TYPE(mainstone_irq_handler);
+
static void __init mainstone_init_irq(void)
{
int irq;
Index: linux/arch/arm/mach-rpc/dma.c
===================================================================
--- linux.orig/arch/arm/mach-rpc/dma.c
+++ linux/arch/arm/mach-rpc/dma.c
@@ -171,11 +171,11 @@ static void iomd_disable_dma(dmach_t cha
unsigned long dma_base = dma->dma_base;
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (dma->state != ~DMA_ST_AB)
disable_irq(dma->dma_irq);
iomd_writeb(0, dma_base + CR);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
static int iomd_set_dma_speed(dmach_t channel, dma_t *dma, int cycle)
Index: linux/arch/arm/mach-rpc/irq.c
===================================================================
--- linux.orig/arch/arm/mach-rpc/irq.c
+++ linux/arch/arm/mach-rpc/irq.c
@@ -112,6 +112,15 @@ static struct irqchip iomd_fiq_chip = {
.unmask = iomd_unmask_irq_fiq,
};
+static unsigned int startup_irq_disabled(unsigned int irq)
+{
+ return 0;
+}
+
+/* Interrupt type for irqs which must not be
+ * automatically enabled in reqeust_irq */
+static struct irq_type level_type_nostart;
+
void __init rpc_init_irq(void)
{
unsigned int irq, flags;
@@ -121,16 +130,15 @@ void __init rpc_init_irq(void)
iomd_writeb(0, IOMD_FIQMASK);
iomd_writeb(0, IOMD_DMAMASK);
+ level_type_nostart = default_level_type;
+ level_type_nostart.startup = startup_irq_disabled;
+
for (irq = 0; irq < NR_IRQS; irq++) {
flags = IRQF_VALID;
if (irq <= 6 || (irq >= 9 && irq <= 15))
flags |= IRQF_PROBE;
- if (irq == 21 || (irq >= 16 && irq <= 19) ||
- irq == IRQ_KEYBOARDTX)
- flags |= IRQF_NOAUTOEN;
-
switch (irq) {
case 0 ... 7:
set_irq_chip(irq, &iomd_a_chip);
@@ -155,6 +163,10 @@ void __init rpc_init_irq(void)
set_irq_flags(irq, IRQF_VALID);
break;
}
+
+ if (irq == 21 || (irq >= 16 && irq <= 19) ||
+ irq == IRQ_KEYBOARDTX)
+ set_irq_handler(irq, &level_type_nostart);
}
init_FIQ();
Index: linux/arch/arm/mach-s3c2410/bast-irq.c
===================================================================
--- linux.orig/arch/arm/mach-s3c2410/bast-irq.c
+++ linux/arch/arm/mach-s3c2410/bast-irq.c
@@ -136,13 +136,15 @@ bast_irq_pc104_demux(unsigned int irq,
for (i = 0; stat != 0; i++, stat >>= 1) {
if (stat & 1) {
irqno = bast_pc104_irqs[i];
-
- desc_handle_irq(irqno, irq_desc + irqno, regs);
+ desc = irq_desc + irqno;
+ desc_handle_irq(irqno, desc, regs);
}
}
}
}
+DEFINE_IRQ_CHAINED_TYPE(bast_irq_pc104_demux);
+
static __init int bast_irq_init(void)
{
unsigned int i;
@@ -156,7 +158,7 @@ static __init int bast_irq_init(void)
set_irq_chained_handler(IRQ_ISA, bast_irq_pc104_demux);
- /* reigster our IRQs */
+ /* register our IRQs */
for (i = 0; i < 4; i++) {
unsigned int irqno = bast_pc104_irqs[i];
Index: linux/arch/arm/mach-s3c2410/clock.c
===================================================================
--- linux.orig/arch/arm/mach-s3c2410/clock.c
+++ linux/arch/arm/mach-s3c2410/clock.c
@@ -61,7 +61,7 @@ void inline s3c24xx_clk_enable(unsigned
unsigned long clkcon;
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
clkcon = __raw_readl(S3C2410_CLKCON);
clkcon &= ~clocks;
@@ -74,7 +74,7 @@ void inline s3c24xx_clk_enable(unsigned
__raw_writel(clkcon, S3C2410_CLKCON);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
/* enable and disable calls for use with the clk struct */
Index: linux/arch/arm/mach-s3c2410/dma.c
===================================================================
--- linux.orig/arch/arm/mach-s3c2410/dma.c
+++ linux/arch/arm/mach-s3c2410/dma.c
@@ -329,11 +329,11 @@ static int s3c2410_dma_start(s3c2410_dma
pr_debug("s3c2410_start_dma: channel=%d\n", chan->number);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (chan->state == S3C2410_DMA_RUNNING) {
pr_debug("s3c2410_start_dma: already running (%d)\n", chan->state);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
@@ -348,7 +348,7 @@ static int s3c2410_dma_start(s3c2410_dma
printk(KERN_ERR "dma%d: channel has nothing loaded\n",
chan->number);
chan->state = S3C2410_DMA_IDLE;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return -EINVAL;
}
@@ -385,7 +385,7 @@ static int s3c2410_dma_start(s3c2410_dma
dbg_showchan(chan);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
@@ -451,7 +451,7 @@ int s3c2410_dma_enqueue(unsigned int cha
buf->id = id;
buf->magic = BUF_MAGIC;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (chan->curr == NULL) {
/* we've got nothing loaded... */
@@ -485,7 +485,7 @@ int s3c2410_dma_enqueue(unsigned int cha
"timeout loading buffer\n",
chan->number);
dbg_showchan(chan);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return -EINVAL;
}
}
@@ -499,7 +499,7 @@ int s3c2410_dma_enqueue(unsigned int cha
}
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
@@ -661,9 +661,9 @@ s3c2410_dma_irq(int irq, void *devpw, st
return IRQ_HANDLED;
}
- local_irq_save(flags);
+ raw_local_irq_save(flags);
s3c2410_dma_loadbuffer(chan, chan->next);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
} else {
s3c2410_dma_lastxfer(chan);
@@ -698,14 +698,14 @@ int s3c2410_dma_request(unsigned int cha
check_channel(channel);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
dbg_showchan(chan);
if (chan->in_use) {
if (client != chan->client) {
printk(KERN_ERR "dma%d: already in use\n", channel);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return -EBUSY;
} else {
printk(KERN_ERR "dma%d: client already has channel\n", channel);
@@ -724,7 +724,7 @@ int s3c2410_dma_request(unsigned int cha
if (err) {
chan->in_use = 0;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
printk(KERN_ERR "%s: cannot get IRQ %d for DMA %d\n",
client->name, chan->irq, chan->number);
@@ -735,7 +735,7 @@ int s3c2410_dma_request(unsigned int cha
chan->irq_enabled = 1;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
/* need to setup */
@@ -764,7 +764,7 @@ int s3c2410_dma_free(dmach_t channel, s3
check_channel(channel);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (chan->client != client) {
@@ -789,7 +789,7 @@ int s3c2410_dma_free(dmach_t channel, s3
free_irq(chan->irq, (void *)chan);
chan->irq_claimed = 0;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
@@ -805,7 +805,7 @@ static int s3c2410_dma_dostop(s3c2410_dm
dbg_showchan(chan);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
s3c2410_dma_call_op(chan, S3C2410_DMAOP_STOP);
@@ -823,7 +823,7 @@ static int s3c2410_dma_dostop(s3c2410_dm
chan->state = S3C2410_DMA_IDLE;
chan->load_state = S3C2410_DMALOAD_NONE;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
@@ -840,7 +840,7 @@ static int s3c2410_dma_flush(s3c2410_dma
pr_debug("%s:\n", __FUNCTION__);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (chan->state != S3C2410_DMA_IDLE) {
pr_debug("%s: stopping channel...\n", __FUNCTION__ );
@@ -865,7 +865,7 @@ static int s3c2410_dma_flush(s3c2410_dma
}
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
Index: linux/arch/arm/mach-s3c2410/gpio.c
===================================================================
--- linux.orig/arch/arm/mach-s3c2410/gpio.c
+++ linux/arch/arm/mach-s3c2410/gpio.c
@@ -58,7 +58,7 @@ void s3c2410_gpio_cfgpin(unsigned int pi
mask = 3 << S3C2410_GPIO_OFFSET(pin)*2;
}
- local_irq_save(flags);
+ raw_local_irq_save(flags);
con = __raw_readl(base + 0x00);
con &= ~mask;
@@ -66,7 +66,7 @@ void s3c2410_gpio_cfgpin(unsigned int pi
__raw_writel(con, base + 0x00);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL(s3c2410_gpio_cfgpin);
@@ -97,14 +97,14 @@ void s3c2410_gpio_pullup(unsigned int pi
if (pin < S3C2410_GPIO_BANKB)
return;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
up = __raw_readl(base + 0x08);
up &= ~(1L << offs);
up |= to << offs;
__raw_writel(up, base + 0x08);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL(s3c2410_gpio_pullup);
@@ -116,14 +116,14 @@ void s3c2410_gpio_setpin(unsigned int pi
unsigned long flags;
unsigned long dat;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
dat = __raw_readl(base + 0x04);
dat &= ~(1 << offs);
dat |= to << offs;
__raw_writel(dat, base + 0x04);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL(s3c2410_gpio_setpin);
@@ -143,12 +143,12 @@ unsigned int s3c2410_modify_misccr(unsig
unsigned long flags;
unsigned long misccr;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
misccr = __raw_readl(S3C2410_MISCCR);
misccr &= ~clear;
misccr ^= change;
__raw_writel(misccr, S3C2410_MISCCR);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return misccr;
}
@@ -189,7 +189,7 @@ int s3c2410_gpio_irqfilter(unsigned int
pin -= S3C2410_GPG8_EINT16;
reg += pin & ~3;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
/* update filter width and clock source */
@@ -205,7 +205,7 @@ int s3c2410_gpio_irqfilter(unsigned int
val |= on << ((pin * 4) + 3);
__raw_writel(val, S3C2410_EXTINT2);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
Index: linux/arch/arm/mach-s3c2410/irq.c
===================================================================
--- linux.orig/arch/arm/mach-s3c2410/irq.c
+++ linux/arch/arm/mach-s3c2410/irq.c
@@ -573,6 +573,11 @@ s3c_irq_demux_uart2(unsigned int irq,
}
+static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_uart0);
+static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_uart1);
+static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_uart2);
+static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_adc);
+
/* s3c24xx_init_irq
*
* Initialise S3C2410 IRQ system
Index: linux/arch/arm/mach-s3c2410/s3c2440-dsc.c
===================================================================
--- linux.orig/arch/arm/mach-s3c2410/s3c2440-dsc.c
+++ linux/arch/arm/mach-s3c2410/s3c2440-dsc.c
@@ -45,14 +45,14 @@ int s3c2440_set_dsc(unsigned int pin, un
base = (pin & S3C2440_SELECT_DSC1) ? S3C2440_DSC1 : S3C2440_DSC0;
mask = 3 << S3C2440_DSC_GETSHIFT(pin);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
val = __raw_readl(base);
val &= ~mask;
val |= value & mask;
__raw_writel(val, base);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
Index: linux/arch/arm/mach-s3c2410/s3c2440-irq.c
===================================================================
--- linux.orig/arch/arm/mach-s3c2410/s3c2440-irq.c
+++ linux/arch/arm/mach-s3c2410/s3c2440-irq.c
@@ -157,6 +157,9 @@ static struct irqchip s3c_irq_cam = {
.ack = s3c_irq_cam_ack,
};
+static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_wdtac97);
+static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_cam);
+
static int s3c2440_irq_add(struct sys_device *sysdev)
{
unsigned int irqno;
Index: linux/arch/arm/mach-s3c2410/time.c
===================================================================
--- linux.orig/arch/arm/mach-s3c2410/time.c
+++ linux/arch/arm/mach-s3c2410/time.c
@@ -23,6 +23,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux/arch/arm/mach-sa1100/assabet.c
===================================================================
--- linux.orig/arch/arm/mach-sa1100/assabet.c
+++ linux/arch/arm/mach-sa1100/assabet.c
@@ -61,10 +61,10 @@ void ASSABET_BCR_frob(unsigned int mask,
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
BCR_value = (BCR_value & ~mask) | val;
ASSABET_BCR = BCR_value;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL(ASSABET_BCR_frob);
Index: linux/arch/arm/mach-sa1100/badge4.c
===================================================================
--- linux.orig/arch/arm/mach-sa1100/badge4.c
+++ linux/arch/arm/mach-sa1100/badge4.c
@@ -227,7 +227,7 @@ void badge4_set_5V(unsigned subsystem, i
unsigned long flags;
unsigned old_5V_bitmap;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
old_5V_bitmap = badge4_5V_bitmap;
@@ -240,15 +240,22 @@ void badge4_set_5V(unsigned subsystem, i
/* detect on->off and off->on transitions */
if ((!old_5V_bitmap) && (badge4_5V_bitmap)) {
/* was off, now on */
- printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__);
GPSR = BADGE4_GPIO_PCMEN5V;
} else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) {
/* was on, now off */
- printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__);
GPCR = BADGE4_GPIO_PCMEN5V;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
+
+ /* detect on->off and off->on transitions */
+ if ((!old_5V_bitmap) && (badge4_5V_bitmap)) {
+ /* was off, now on */
+ printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__);
+ } else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) {
+ /* was on, now off */
+ printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__);
+ }
}
EXPORT_SYMBOL(badge4_set_5V);
Index: linux/arch/arm/mach-sa1100/cerf.c
===================================================================
--- linux.orig/arch/arm/mach-sa1100/cerf.c
+++ linux/arch/arm/mach-sa1100/cerf.c
@@ -15,6 +15,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux/arch/arm/mach-sa1100/cpu-sa1110.c
===================================================================
--- linux.orig/arch/arm/mach-sa1100/cpu-sa1110.c
+++ linux/arch/arm/mach-sa1100/cpu-sa1110.c
@@ -282,7 +282,7 @@ static int sa1110_target(struct cpufreq_
* This means that we won't access SDRAM for the duration of
* the programming.
*/
- local_irq_save(flags);
+ raw_local_irq_save(flags);
asm("mcr p15, 0, %0, c7, c10, 4" : : "r" (0));
udelay(10);
__asm__ __volatile__(" \n\
@@ -303,7 +303,7 @@ static int sa1110_target(struct cpufreq_
: "r" (&MDCNFG), "r" (&PPCR), "0" (sd.mdcnfg),
"r" (sd.mdrefr), "r" (sd.mdcas[0]),
"r" (sd.mdcas[1]), "r" (sd.mdcas[2]), "r" (ppcr));
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
/*
* Now, return the SDRAM refresh back to normal.
Index: linux/arch/arm/mach-sa1100/dma.c
===================================================================
--- linux.orig/arch/arm/mach-sa1100/dma.c
+++ linux/arch/arm/mach-sa1100/dma.c
@@ -227,7 +227,7 @@ int sa1100_start_dma(dma_regs_t *regs, d
if (size > MAX_DMA_SIZE)
return -EOVERFLOW;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
status = regs->RdDCSR;
/* If both DMA buffers are started, there's nothing else we can do. */
@@ -262,7 +262,7 @@ int sa1100_start_dma(dma_regs_t *regs, d
ret = 0;
out:
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return ret;
}
Index: linux/arch/arm/mach-sa1100/generic.c
===================================================================
--- linux.orig/arch/arm/mach-sa1100/generic.c
+++ linux/arch/arm/mach-sa1100/generic.c
@@ -135,7 +135,7 @@ unsigned long long sched_clock(void)
static void sa1100_power_off(void)
{
mdelay(100);
- local_irq_disable();
+ raw_local_irq_disable();
/* disable internal oscillator, float CS lines */
PCFR = (PCFR_OPDE | PCFR_FP | PCFR_FS);
/* enable wake-up on GPIO0 (Assabet...) */
@@ -391,7 +391,7 @@ void __init sa1110_mb_disable(void)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
PGSR &= ~GPIO_MBGNT;
GPCR = GPIO_MBGNT;
@@ -399,7 +399,7 @@ void __init sa1110_mb_disable(void)
GAFR &= ~(GPIO_MBGNT | GPIO_MBREQ);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
/*
@@ -410,7 +410,7 @@ void __init sa1110_mb_enable(void)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
PGSR &= ~GPIO_MBGNT;
GPCR = GPIO_MBGNT;
@@ -419,6 +419,6 @@ void __init sa1110_mb_enable(void)
GAFR |= (GPIO_MBGNT | GPIO_MBREQ);
TUCR |= TUCR_MR;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux/arch/arm/mach-sa1100/h3600.c
===================================================================
--- linux.orig/arch/arm/mach-sa1100/h3600.c
+++ linux/arch/arm/mach-sa1100/h3600.c
@@ -319,7 +319,7 @@ static void h3100_control_egpio(enum ipa
}
if (egpio || gpio) {
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (setp) {
h3100_egpio |= egpio;
GPSR = gpio;
@@ -328,7 +328,7 @@ static void h3100_control_egpio(enum ipa
GPCR = gpio;
}
H3100_EGPIO = h3100_egpio;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
}
@@ -451,13 +451,13 @@ static void h3600_control_egpio(enum ipa
}
if (egpio) {
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (setp)
h3600_egpio |= egpio;
else
h3600_egpio &= ~egpio;
H3600_EGPIO = h3600_egpio;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
}
@@ -788,6 +788,8 @@ static void h3800_unmask_gpio_irq(unsign
H3800_ASIC2_GPIINTSTAT |= mask;
}
+static DEFINE_IRQ_CHAINED_TYPE(h3800_IRQ_demux);
+
static void __init h3800_init_irq(void)
{
int i;
@@ -826,7 +828,7 @@ static void __init h3800_init_irq(void)
}
#endif
set_irq_type(IRQ_GPIO_H3800_ASIC, IRQT_RISING);
- set_irq_chained_handler(IRQ_GPIO_H3800_ASIC, &h3800_IRQ_demux);
+ set_irq_chained_handler(IRQ_GPIO_H3800_ASIC, h3800_IRQ_demux);
}
Index: linux/arch/arm/mach-sa1100/irq.c
===================================================================
--- linux.orig/arch/arm/mach-sa1100/irq.c
+++ linux/arch/arm/mach-sa1100/irq.c
@@ -11,12 +11,13 @@
*/
#include
#include
+#include
+#include
#include
#include
#include
#include
-#include
#include
#include "generic.h"
@@ -281,6 +282,8 @@ static int __init sa1100irq_init_devicef
return sysdev_register(&sa1100irq_device);
}
+static DEFINE_IRQ_CHAINED_TYPE(sa1100_high_gpio_handler);
+
device_initcall(sa1100irq_init_devicefs);
void __init sa1100_init_irq(void)
Index: linux/arch/arm/mach-sa1100/leds-assabet.c
===================================================================
--- linux.orig/arch/arm/mach-sa1100/leds-assabet.c
+++ linux/arch/arm/mach-sa1100/leds-assabet.c
@@ -32,7 +32,7 @@ void assabet_leds_event(led_event_t evt)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch (evt) {
case led_start:
@@ -111,5 +111,5 @@ void assabet_leds_event(led_event_t evt)
if (led_state & LED_STATE_ENABLED)
ASSABET_BCR_frob(ASSABET_BCR_LED_MASK, hw_led_state);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux/arch/arm/mach-sa1100/leds-badge4.c
===================================================================
--- linux.orig/arch/arm/mach-sa1100/leds-badge4.c
+++ linux/arch/arm/mach-sa1100/leds-badge4.c
@@ -36,7 +36,7 @@ void badge4_leds_event(led_event_t evt)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch (evt) {
case led_start:
@@ -108,5 +108,5 @@ void badge4_leds_event(led_event_t evt)
GPCR = hw_led_state ^ LED_MASK;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux/arch/arm/mach-sa1100/leds-cerf.c
===================================================================
--- linux.orig/arch/arm/mach-sa1100/leds-cerf.c
+++ linux/arch/arm/mach-sa1100/leds-cerf.c
@@ -29,7 +29,7 @@ void cerf_leds_event(led_event_t evt)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch (evt) {
case led_start:
@@ -107,5 +107,5 @@ void cerf_leds_event(led_event_t evt)
GPCR = hw_led_state ^ LED_MASK;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux/arch/arm/mach-sa1100/leds-hackkit.c
===================================================================
--- linux.orig/arch/arm/mach-sa1100/leds-hackkit.c
+++ linux/arch/arm/mach-sa1100/leds-hackkit.c
@@ -33,7 +33,7 @@ void hackkit_leds_event(led_event_t evt)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch(evt) {
case led_start:
@@ -109,5 +109,5 @@ void hackkit_leds_event(led_event_t evt)
GPCR = hw_led_state ^ LED_MASK;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux/arch/arm/mach-sa1100/leds-lart.c
===================================================================
--- linux.orig/arch/arm/mach-sa1100/leds-lart.c
+++ linux/arch/arm/mach-sa1100/leds-lart.c
@@ -32,7 +32,7 @@ void lart_leds_event(led_event_t evt)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch(evt) {
case led_start:
@@ -98,5 +98,5 @@ void lart_leds_event(led_event_t evt)
GPCR = hw_led_state ^ LED_MASK;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux/arch/arm/mach-sa1100/neponset.c
===================================================================
--- linux.orig/arch/arm/mach-sa1100/neponset.c
+++ linux/arch/arm/mach-sa1100/neponset.c
@@ -137,6 +137,8 @@ static struct sa1100_port_fns neponset_p
.get_mctrl = neponset_get_mctrl,
};
+static DEFINE_IRQ_CHAINED_TYPE(neponset_irq_handler);
+
static int neponset_probe(struct device *dev)
{
sa1100_register_uart_fns(&neponset_port_fns);
Index: linux/arch/arm/mach-sa1100/pleb.c
===================================================================
--- linux.orig/arch/arm/mach-sa1100/pleb.c
+++ linux/arch/arm/mach-sa1100/pleb.c
@@ -7,6 +7,7 @@
#include
#include
#include
+#include
#include
Index: linux/arch/arm/mach-sa1100/simpad.c
===================================================================
--- linux.orig/arch/arm/mach-sa1100/simpad.c
+++ linux/arch/arm/mach-sa1100/simpad.c
@@ -168,7 +168,7 @@ static void __init simpad_map_io(void)
static void simpad_power_off(void)
{
- local_irq_disable(); // was cli
+ raw_local_irq_disable(); // was cli
set_cs3(0x800); /* only SD_MEDIAQ */
/* disable internal oscillator, float CS lines */
@@ -185,7 +185,7 @@ static void simpad_power_off(void)
PMCR = PMCR_SF;
while(1);
- local_irq_enable(); /* we won't ever call it */
+ raw_local_irq_enable(); /* we won't ever call it */
}
Index: linux/arch/arm/mach-sa1100/time.c
===================================================================
--- linux.orig/arch/arm/mach-sa1100/time.c
+++ linux/arch/arm/mach-sa1100/time.c
@@ -11,6 +11,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux/arch/arm/mach-shark/core.c
===================================================================
--- linux.orig/arch/arm/mach-shark/core.c
+++ linux/arch/arm/mach-shark/core.c
@@ -6,6 +6,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux/arch/arm/mach-shark/leds.c
===================================================================
--- linux.orig/arch/arm/mach-shark/leds.c
+++ linux/arch/arm/mach-shark/leds.c
@@ -33,7 +33,7 @@ static char led_state;
static short hw_led_state;
static short saved_state;
-static DEFINE_SPINLOCK(leds_lock);
+static DEFINE_RAW_SPINLOCK(leds_lock);
short sequoia_read(int addr) {
outw(addr,0x24);
Index: linux/arch/arm/mach-versatile/core.c
===================================================================
--- linux.orig/arch/arm/mach-versatile/core.c
+++ linux/arch/arm/mach-versatile/core.c
@@ -112,6 +112,8 @@ sic_handle_irq(unsigned int irq, struct
} while (status);
}
+static DEFINE_IRQ_CHAINED_TYPE(sic_handle_irq);
+
#if 1
#define IRQ_MMCI0A IRQ_VICSOURCE22
#define IRQ_AACI IRQ_VICSOURCE24
@@ -161,7 +163,7 @@ void __init versatile_init_irq(void)
}
}
- set_irq_handler(IRQ_VICSOURCE31, sic_handle_irq);
+ set_irq_chained_handler(IRQ_VICSOURCE31, sic_handle_irq);
vic_unmask_irq(IRQ_VICSOURCE31);
/* Do second interrupt controller */
@@ -727,7 +729,7 @@ static void versatile_leds_event(led_eve
unsigned long flags;
u32 val;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
val = readl(VA_LEDS_BASE);
switch (ledevt) {
@@ -752,7 +754,7 @@ static void versatile_leds_event(led_eve
}
writel(val, VA_LEDS_BASE);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
#endif /* CONFIG_LEDS */
Index: linux/arch/arm/mm/blockops.c
===================================================================
--- linux.orig/arch/arm/mm/blockops.c
+++ linux/arch/arm/mm/blockops.c
@@ -20,7 +20,7 @@ extern struct cpu_cache_fns blk_cache_fn
*
* - kaddr - kernel address (guaranteed to be page aligned)
*/
-static void __attribute__((naked))
+static void notrace __attribute__((naked))
blk_flush_kern_dcache_page(void *kaddr)
{
asm(
@@ -45,7 +45,7 @@ blk_flush_kern_dcache_page(void *kaddr)
* - start - virtual start address of region
* - end - virtual end address of region
*/
-static void __attribute__((naked))
+static void notrace __attribute__((naked))
blk_dma_inv_range_unified(unsigned long start, unsigned long end)
{
asm(
@@ -61,7 +61,7 @@ blk_dma_inv_range_unified(unsigned long
: "I" (L1_CACHE_BYTES - 1));
}
-static void __attribute__((naked))
+static void notrace __attribute__((naked))
blk_dma_inv_range_harvard(unsigned long start, unsigned long end)
{
asm(
@@ -82,7 +82,7 @@ blk_dma_inv_range_harvard(unsigned long
* - start - virtual start address of region
* - end - virtual end address of region
*/
-static void __attribute__((naked))
+static void notrace __attribute__((naked))
blk_dma_clean_range(unsigned long start, unsigned long end)
{
asm(
@@ -97,7 +97,7 @@ blk_dma_clean_range(unsigned long start,
* - start - virtual start address of region
* - end - virtual end address of region
*/
-static void __attribute__((naked))
+static void notrace __attribute__((naked))
blk_dma_flush_range(unsigned long start, unsigned long end)
{
asm(
Index: linux/arch/arm/mm/consistent.c
===================================================================
--- linux.orig/arch/arm/mm/consistent.c
+++ linux/arch/arm/mm/consistent.c
@@ -30,7 +30,7 @@
* This is the page table (2MB) covering uncached, DMA consistent allocations
*/
static pte_t *consistent_pte;
-static DEFINE_SPINLOCK(consistent_lock);
+static DEFINE_RAW_SPINLOCK(consistent_lock);
/*
* VM region handling support.
Index: linux/arch/arm/mm/copypage-v4mc.c
===================================================================
--- linux.orig/arch/arm/mm/copypage-v4mc.c
+++ linux/arch/arm/mm/copypage-v4mc.c
@@ -29,7 +29,7 @@
#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)
-static DEFINE_SPINLOCK(minicache_lock);
+static DEFINE_RAW_SPINLOCK(minicache_lock);
/*
* ARMv4 mini-dcache optimised copy_user_page
@@ -43,7 +43,7 @@ static DEFINE_SPINLOCK(minicache_lock);
* instruction. If your processor does not supply this, you have to write your
* own copy_user_page that does the right thing.
*/
-static void __attribute__((naked))
+static void notrace __attribute__((naked))
mc_copy_user_page(void *from, void *to)
{
asm volatile(
@@ -82,7 +82,7 @@ void v4_mc_copy_user_page(void *kto, con
/*
* ARMv4 optimised clear_user_page
*/
-void __attribute__((naked))
+void notrace __attribute__((naked))
v4_mc_clear_user_page(void *kaddr, unsigned long vaddr)
{
asm volatile(
Index: linux/arch/arm/mm/copypage-v6.c
===================================================================
--- linux.orig/arch/arm/mm/copypage-v6.c
+++ linux/arch/arm/mm/copypage-v6.c
@@ -28,7 +28,7 @@
#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)
-static DEFINE_SPINLOCK(v6_lock);
+static DEFINE_RAW_SPINLOCK(v6_lock);
/*
* Copy the user page. No aliasing to deal with so we can just
Index: linux/arch/arm/mm/copypage-xscale.c
===================================================================
--- linux.orig/arch/arm/mm/copypage-xscale.c
+++ linux/arch/arm/mm/copypage-xscale.c
@@ -31,7 +31,7 @@
#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)
-static DEFINE_SPINLOCK(minicache_lock);
+static DEFINE_RAW_SPINLOCK(minicache_lock);
/*
* XScale mini-dcache optimised copy_user_page
@@ -41,7 +41,7 @@ static DEFINE_SPINLOCK(minicache_lock);
* Dcache aliasing issue. The writes will be forwarded to the write buffer,
* and merged as appropriate.
*/
-static void __attribute__((naked))
+static void notrace __attribute__((naked))
mc_copy_user_page(void *from, void *to)
{
/*
@@ -104,7 +104,7 @@ void xscale_mc_copy_user_page(void *kto,
/*
* XScale optimised clear_user_page
*/
-void __attribute__((naked))
+void notrace __attribute__((naked))
xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr)
{
asm volatile(
Index: linux/arch/arm/mm/fault-armv.c
===================================================================
--- linux.orig/arch/arm/mm/fault-armv.c
+++ linux/arch/arm/mm/fault-armv.c
@@ -161,7 +161,7 @@ static int __init check_writebuffer(unsi
{
register unsigned long zero = 0, one = 1, val;
- local_irq_disable();
+ raw_local_irq_disable();
mb();
*p1 = one;
mb();
@@ -169,7 +169,7 @@ static int __init check_writebuffer(unsi
mb();
val = *p1;
mb();
- local_irq_enable();
+ raw_local_irq_enable();
return val != zero;
}
Index: linux/arch/arm/mm/fault.c
===================================================================
--- linux.orig/arch/arm/mm/fault.c
+++ linux/arch/arm/mm/fault.c
@@ -216,7 +216,7 @@ out:
return fault;
}
-static int
+static notrace int
do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
struct task_struct *tsk;
@@ -316,7 +316,7 @@ no_context:
* interrupt or a critical region, and should only copy the information
* from the master page table, nothing more.
*/
-static int
+static notrace int
do_translation_fault(unsigned long addr, unsigned int fsr,
struct pt_regs *regs)
{
@@ -362,7 +362,7 @@ bad_area:
* Some section permission faults need to be handled gracefully.
* They can happen due to a __{get,put}_user during an oops.
*/
-static int
+static notrace int
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
struct task_struct *tsk = current;
@@ -373,7 +373,7 @@ do_sect_fault(unsigned long addr, unsign
/*
* This abort handler always returns "fault".
*/
-static int
+static notrace int
do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
return 1;
@@ -428,7 +428,7 @@ static struct fsr_info {
{ do_bad, SIGBUS, 0, "unknown 31" }
};
-void __init
+void __init notrace
hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
int sig, const char *name)
{
@@ -442,7 +442,7 @@ hook_fault_code(int nr, int (*fn)(unsign
/*
* Dispatch a data abort to the relevant handler.
*/
-asmlinkage void
+asmlinkage notrace void
do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
const struct fsr_info *inf = fsr_info + (fsr & 15) + ((fsr & (1 << 10)) >> 6);
@@ -461,7 +461,7 @@ do_DataAbort(unsigned long addr, unsigne
notify_die("", regs, &info, fsr, 0);
}
-asmlinkage void
+asmlinkage notrace void
do_PrefetchAbort(unsigned long addr, struct pt_regs *regs)
{
do_translation_fault(addr, 0, regs);
Index: linux/arch/arm/mm/init.c
===================================================================
--- linux.orig/arch/arm/mm/init.c
+++ linux/arch/arm/mm/init.c
@@ -28,7 +28,7 @@
#define TABLE_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t))
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern void _stext, _text, _etext, __data_start, _end, __init_begin, __init_end;
Index: linux/arch/arm/plat-omap/clock.c
===================================================================
--- linux.orig/arch/arm/plat-omap/clock.c
+++ linux/arch/arm/plat-omap/clock.c
@@ -25,7 +25,7 @@
static LIST_HEAD(clocks);
static DECLARE_MUTEX(clocks_sem);
-static DEFINE_SPINLOCK(clockfw_lock);
+static DEFINE_RAW_SPINLOCK(clockfw_lock);
static void propagate_rate(struct clk * clk);
/* UART clock function */
static int set_uart_rate(struct clk * clk, unsigned long rate);
Index: linux/arch/arm/plat-omap/dma.c
===================================================================
--- linux.orig/arch/arm/plat-omap/dma.c
+++ linux/arch/arm/plat-omap/dma.c
@@ -586,7 +586,7 @@ void omap_dma_unlink_lch (int lch_head,
static struct lcd_dma_info {
- spinlock_t lock;
+ raw_spinlock_t lock;
int reserved;
void (* callback)(u16 status, void *data);
void *cb_data;
@@ -948,11 +948,11 @@ void omap_clear_dma(int lch)
unsigned long flags;
int status;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
omap_writew(omap_readw(OMAP_DMA_CCR(lch)) & ~OMAP_DMA_CCR_EN,
OMAP_DMA_CCR(lch));
status = OMAP_DMA_CSR(lch); /* clear pending interrupts */
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
/*
Index: linux/arch/arm/plat-omap/gpio.c
===================================================================
--- linux.orig/arch/arm/plat-omap/gpio.c
+++ linux/arch/arm/plat-omap/gpio.c
@@ -121,7 +121,7 @@ struct gpio_bank {
u32 reserved_map;
u32 suspend_wakeup;
u32 saved_wakeup;
- spinlock_t lock;
+ raw_spinlock_t lock;
};
#define METHOD_MPUIO 0
@@ -736,7 +736,7 @@ static void gpio_irq_handler(unsigned in
desc->chip->ack(irq);
- bank = (struct gpio_bank *) desc->data;
+ bank = (struct gpio_bank *) desc->handler_data;
if (bank->method == METHOD_MPUIO)
isr_reg = bank->base + OMAP_MPUIO_GPIO_INT;
#ifdef CONFIG_ARCH_OMAP1510
@@ -837,6 +837,8 @@ static struct irqchip mpuio_irq_chip = {
.unmask = mpuio_unmask_irq
};
+static DEFINE_IRQ_CHAINED_TYPE(gpio_irq_handler);
+
static int initialized = 0;
static struct clk * gpio_ck = NULL;
Index: linux/arch/arm/plat-omap/mux.c
===================================================================
--- linux.orig/arch/arm/plat-omap/mux.c
+++ linux/arch/arm/plat-omap/mux.c
@@ -40,7 +40,7 @@
int __init_or_module
omap_cfg_reg(const reg_cfg_t reg_cfg)
{
- static DEFINE_SPINLOCK(mux_spin_lock);
+ static DEFINE_RAW_SPINLOCK(mux_spin_lock);
unsigned long flags;
reg_cfg_set *cfg;
Index: linux/arch/arm/plat-omap/pm.c
===================================================================
--- linux.orig/arch/arm/plat-omap/pm.c
+++ linux/arch/arm/plat-omap/pm.c
@@ -81,11 +81,11 @@ void omap_pm_idle(void)
* seconds for wait for interrupt.
*/
- local_irq_disable();
+ raw_local_irq_disable();
local_fiq_disable();
if (need_resched()) {
local_fiq_enable();
- local_irq_enable();
+ raw_local_irq_enable();
return;
}
mask32 = omap_readl(ARM_SYSST);
@@ -110,7 +110,7 @@ void omap_pm_idle(void)
omap_sram_idle();
local_fiq_enable();
- local_irq_enable();
+ raw_local_irq_enable();
}
/*
@@ -171,7 +171,7 @@ void omap_pm_suspend(void)
* Step 1: turn off interrupts (FIXME: NOTE: already disabled)
*/
- local_irq_disable();
+ raw_local_irq_disable();
local_fiq_disable();
/*
@@ -308,7 +308,7 @@ void omap_pm_suspend(void)
* Reenable interrupts
*/
- local_irq_enable();
+ raw_local_irq_enable();
local_fiq_enable();
omap_serial_wake_trigger(0);
Index: linux/arch/arm26/boot/compressed/misc.c
===================================================================
--- linux.orig/arch/arm26/boot/compressed/misc.c
+++ linux/arch/arm26/boot/compressed/misc.c
@@ -184,6 +184,7 @@ static ulg free_mem_ptr_end;
#define HEAP_SIZE 0x2000
+#define ZLIB_INFLATE_NO_INFLATE_LOCK
#include "../../../../lib/inflate.c"
#ifndef STANDALONE_DEBUG
Index: linux/arch/arm26/kernel/time.c
===================================================================
--- linux.orig/arch/arm26/kernel/time.c
+++ linux/arch/arm26/kernel/time.c
@@ -34,10 +34,6 @@
#include
#include
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
extern unsigned long wall_jiffies;
/* this needs a better home */
Index: linux/arch/cris/kernel/time.c
===================================================================
--- linux.orig/arch/cris/kernel/time.c
+++ linux/arch/cris/kernel/time.c
@@ -32,10 +32,6 @@
#include
#include
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
int have_rtc; /* used to remember if we have an RTC or not */;
#define TICK_SIZE tick
Index: linux/arch/frv/kernel/time.c
===================================================================
--- linux.orig/arch/frv/kernel/time.c
+++ linux/arch/frv/kernel/time.c
@@ -34,9 +34,6 @@
extern unsigned long wall_jiffies;
-u64 jiffies_64 = INITIAL_JIFFIES;
-EXPORT_SYMBOL(jiffies_64);
-
unsigned long __nongprelbss __clkin_clock_speed_HZ;
unsigned long __nongprelbss __ext_bus_clock_speed_HZ;
unsigned long __nongprelbss __res_bus_clock_speed_HZ;
Index: linux/arch/h8300/kernel/time.c
===================================================================
--- linux.orig/arch/h8300/kernel/time.c
+++ linux/arch/h8300/kernel/time.c
@@ -32,10 +32,6 @@
#define TICK_SIZE (tick_nsec / 1000)
-u64 jiffies_64;
-
-EXPORT_SYMBOL(jiffies_64);
-
/*
* timer_interrupt() needs to keep up the real-time clock,
* as well as call the "do_timer()" routine every clocktick
Index: linux/arch/i386/Kconfig
===================================================================
--- linux.orig/arch/i386/Kconfig
+++ linux/arch/i386/Kconfig
@@ -14,6 +14,10 @@ config X86
486, 586, Pentiums, and various instruction-set-compatible chips by
AMD, Cyrix, and others.
+config GENERIC_TIME
+ bool
+ default y
+
config SEMAPHORE_SLEEPERS
bool
default y
@@ -376,16 +380,6 @@ config X86_L1_CACHE_SHIFT
default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1
default "6" if MK7 || MK8 || MPENTIUMM
-config RWSEM_GENERIC_SPINLOCK
- bool
- depends on M386
- default y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
- depends on !M386
- default y
-
config GENERIC_CALIBRATE_DELAY
bool
default y
@@ -442,7 +436,7 @@ config X86_USE_PPRO_CHECKSUM
config X86_USE_3DNOW
bool
- depends on MCYRIXIII || MK7
+ depends on (MCYRIXIII || MK7) && !PREEMPT_RT
default y
config X86_OOSTORE
@@ -466,6 +460,8 @@ config HPET_EMULATE_RTC
depends on HPET_TIMER && RTC=y
default y
+source "kernel/time/Kconfig"
+
config SMP
bool "Symmetric multi-processing support"
---help---
@@ -521,6 +517,20 @@ config SCHED_SMT
source "kernel/Kconfig.preempt"
+config RWSEM_GENERIC_SPINLOCK
+ bool
+ depends on M386 || PREEMPT_RT
+ default y
+
+config ASM_SEMAPHORES
+ bool
+ default y
+
+config RWSEM_XCHGADD_ALGORITHM
+ bool
+ depends on !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT
+ default y
+
config X86_UP_APIC
bool "Local APIC support on uniprocessors"
depends on !SMP && !(X86_VISWS || X86_VOYAGER)
@@ -556,6 +566,16 @@ config X86_IO_APIC
depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER))
default y
+config X86_IOAPIC_FAST
+ bool "enhanced IO-APIC support"
+ depends on X86_IO_APIC
+ default y
+ help
+ this option will activate further optimizations in the IO-APIC
+ code. NOTE: this is experimental code, and disabled by default.
+ Symptoms of non-working systems are boot-time lockups, stray or
+ screaming interrupts and other interrupt related weirdnesses.
+
config X86_VISWS_APIC
bool
depends on X86_VISWS
@@ -917,7 +937,7 @@ config BOOT_IOREMAP
config REGPARM
bool "Use register arguments (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ depends on EXPERIMENTAL && !MCOUNT
default n
help
Compile the kernel with -mregparm=3. This uses a different ABI
Index: linux/arch/i386/Kconfig.debug
===================================================================
--- linux.orig/arch/i386/Kconfig.debug
+++ linux/arch/i386/Kconfig.debug
@@ -18,6 +18,7 @@ config EARLY_PRINTK
config DEBUG_STACKOVERFLOW
bool "Check for stack overflows"
depends on DEBUG_KERNEL
+ default y
help
This option will cause messages to be printed if free stack space
drops below a certain limit.
@@ -35,6 +36,7 @@ config KPROBES
config DEBUG_STACK_USAGE
bool "Stack utilization instrumentation"
depends on DEBUG_KERNEL
+ default y
help
Enables the display of the minimum amount of free stack which each
task has ever had available in the sysrq-T and sysrq-P debug output.
@@ -69,7 +71,7 @@ config X86_FIND_SMP_CONFIG
config X86_MPPARSE
bool
- depends on X86_LOCAL_APIC && !X86_VISWS
+ depends on X86_LOCAL_APIC && X86_IO_APIC && !X86_VISWS
default y
endmenu
Index: linux/arch/i386/boot/compressed/misc.c
===================================================================
--- linux.orig/arch/i386/boot/compressed/misc.c
+++ linux/arch/i386/boot/compressed/misc.c
@@ -15,6 +15,12 @@
#include
#include
+#ifdef CONFIG_MCOUNT
+void notrace mcount(void)
+{
+}
+#endif
+
/*
* gzip declarations
*/
@@ -112,7 +118,7 @@ static long free_mem_end_ptr;
#define INPLACE_MOVE_ROUTINE 0x1000
#define LOW_BUFFER_START 0x2000
#define LOW_BUFFER_MAX 0x90000
-#define HEAP_SIZE 0x3000
+#define HEAP_SIZE 0x4000
static unsigned int low_buffer_end, low_buffer_size;
static int high_loaded =0;
static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;
@@ -125,6 +131,7 @@ static int lines, cols;
static void * xquad_portio = NULL;
#endif
+#define ZLIB_INFLATE_NO_INFLATE_LOCK
#include "../../../../lib/inflate.c"
static void *malloc(int size)
Index: linux/arch/i386/kernel/Makefile
===================================================================
--- linux.orig/arch/i386/kernel/Makefile
+++ linux/arch/i386/kernel/Makefile
@@ -4,13 +4,13 @@
extra-y := head.o init_task.o vmlinux.lds
-obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
+obj-y := process.o signal.o entry.o traps.o irq.o vm86.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
- doublefault.o quirks.o i8237.o
+ doublefault.o quirks.o i8237.o i8253.o tsc.o
+obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o
obj-y += cpu/
-obj-y += timers/
obj-$(CONFIG_ACPI) += acpi/
obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
obj-$(CONFIG_MCA) += mca.o
@@ -20,6 +20,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_X86_SMP) += smp.o smpboot.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
+obj-$(CONFIG_MCOUNT) += mcount-wrapper.o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
@@ -34,6 +35,8 @@ obj-$(CONFIG_ACPI_SRAT) += srat.o
obj-$(CONFIG_HPET_TIMER) += time_hpet.o
obj-$(CONFIG_EFI) += efi.o efi_stub.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+obj-$(CONFIG_SYSFS) += switch2poll.o
+obj-$(CONFIG_HPET_TIMER) += hpet.o
EXTRA_AFLAGS := -traditional
Index: linux/arch/i386/kernel/acpi/boot.c
===================================================================
--- linux.orig/arch/i386/kernel/acpi/boot.c
+++ linux/arch/i386/kernel/acpi/boot.c
@@ -570,7 +570,7 @@ static int __init acpi_parse_sbf(unsigne
}
#ifdef CONFIG_HPET_TIMER
-
+#include
static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
{
struct acpi_table_hpet *hpet_tbl;
@@ -592,6 +592,7 @@ static int __init acpi_parse_hpet(unsign
#ifdef CONFIG_X86_64
vxtime.hpet_address = hpet_tbl->addr.addrl |
((long)hpet_tbl->addr.addrh << 32);
+ hpet_address = vxtime.hpet_address;
printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
hpet_tbl->id, vxtime.hpet_address);
@@ -600,10 +601,10 @@ static int __init acpi_parse_hpet(unsign
extern unsigned long hpet_address;
hpet_address = hpet_tbl->addr.addrl;
- printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
- hpet_tbl->id, hpet_address);
}
-#endif /* X86 */
+#endif /* X86 */
+ printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+ hpet_tbl->id, hpet_address);
return 0;
}
@@ -612,7 +613,8 @@ static int __init acpi_parse_hpet(unsign
#endif
#ifdef CONFIG_X86_PM_TIMER
-extern u32 pmtmr_ioport;
+u32 acpi_pmtmr_ioport;
+int acpi_pmtmr_buggy;
#endif
static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
@@ -640,14 +642,22 @@ static int __init acpi_parse_fadt(unsign
ACPI_ADR_SPACE_SYSTEM_IO)
return 0;
- pmtmr_ioport = fadt->xpm_tmr_blk.address;
+ acpi_pmtmr_ioport = fadt->xpm_tmr_blk.address;
+ /*
+ * "X" fields are optional extensions to the original V1.0
+ * fields, so we must selectively expand V1.0 fields if the
+ * corresponding X field is zero.
+ */
+ if (!acpi_pmtmr_ioport)
+ acpi_pmtmr_ioport = fadt->V1_pm_tmr_blk;
} else {
/* FADT rev. 1 */
- pmtmr_ioport = fadt->V1_pm_tmr_blk;
+ acpi_pmtmr_ioport = fadt->V1_pm_tmr_blk;
}
- if (pmtmr_ioport)
- printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n",
- pmtmr_ioport);
+
+ if (acpi_pmtmr_ioport)
+ printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", acpi_pmtmr_ioport);
+
#endif
return 0;
}
Index: linux/arch/i386/kernel/apic.c
===================================================================
--- linux.orig/arch/i386/kernel/apic.c
+++ linux/arch/i386/kernel/apic.c
@@ -26,6 +26,7 @@
#include
#include
#include
+#include
#include
#include
@@ -50,6 +51,23 @@ int enable_local_apic __initdata = 0; /*
*/
int apic_verbosity;
+static unsigned int calibration_result;
+
+static void lapic_next_event(unsigned long evt);
+static void lapic_timer_setup(int mode);
+
+static struct clock_event lapic_clockevent = {
+ .name = "lapic",
+ .capabilities = CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE |
+ CLOCK_HAS_IRQHANDLER
+#ifdef CONFIG_SMP
+ | CLOCK_CAP_UPDATE
+#endif
+ ,
+ .shift = 32,
+ .set_mode = lapic_timer_setup,
+ .set_next_event = lapic_next_event,
+};
static void apic_pm_activate(void);
@@ -92,10 +110,6 @@ void __init apic_intr_init(void)
/* Using APIC to generate smp_local_timer_interrupt? */
int using_apic_timer = 0;
-static DEFINE_PER_CPU(int, prof_multiplier) = 1;
-static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
-static DEFINE_PER_CPU(int, prof_counter) = 1;
-
static int enabled_via_apicbase;
void enable_NMI_through_LVT0 (void * dummy)
@@ -559,15 +573,21 @@ void __devinit setup_local_APIC(void)
* If Linux enabled the LAPIC against the BIOS default
* disable it down before re-entering the BIOS on shutdown.
* Otherwise the BIOS may get confused and not power-off.
+ * Additionally clear all LVT entries before disable_local_APIC
+ * for the case where Linux didn't enable the LAPIC.
*/
void lapic_shutdown(void)
{
- if (!cpu_has_apic || !enabled_via_apicbase)
+ if (!cpu_has_apic)
return;
- local_irq_disable();
- disable_local_APIC();
- local_irq_enable();
+ raw_local_irq_disable();
+ clear_local_APIC();
+
+ if (enabled_via_apicbase)
+ disable_local_APIC();
+
+ raw_local_irq_enable();
}
#ifdef CONFIG_PM
@@ -611,9 +631,9 @@ static int lapic_suspend(struct sys_devi
apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
disable_local_APIC();
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
@@ -625,7 +645,7 @@ static int lapic_resume(struct sys_devic
if (!apic_pm_state.active)
return 0;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
/*
* Make sure the APICBASE points to the right address
@@ -656,7 +676,7 @@ static int lapic_resume(struct sys_devic
apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
@@ -849,10 +869,10 @@ fake_ioapic_page:
ioapic_phys = (unsigned long)
alloc_bootmem_pages(PAGE_SIZE);
ioapic_phys = __pa(ioapic_phys);
+ set_fixmap_nocache(idx, ioapic_phys);
+ printk(KERN_DEBUG "faked IOAPIC to %08lx (%08lx)\n",
+ __fix_to_virt(idx), ioapic_phys);
}
- set_fixmap_nocache(idx, ioapic_phys);
- printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
- __fix_to_virt(idx), ioapic_phys);
idx++;
}
}
@@ -869,6 +889,11 @@ fake_ioapic_page:
*/
/*
+ * FIXME: Move this to i8253.h. There is no need to keep the access to
+ * the PIT scattered all around the place -tglx
+ */
+
+/*
* The timer chip is already set up at HZ interrupts per second here,
* but we do not accept timer interrupts yet. We only allow the BP
* to calibrate.
@@ -926,12 +951,16 @@ void (*wait_timer_tick)(void) __devinitd
#define APIC_DIVISOR 16
-static void __setup_APIC_LVTT(unsigned int clocks)
+static void __setup_APIC_LVTT(unsigned int clocks, int oneshot)
{
unsigned int lvtt_value, tmp_value, ver;
ver = GET_APIC_VERSION(apic_read(APIC_LVR));
- lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
+
+ lvtt_value = LOCAL_TIMER_VECTOR;
+ if (!oneshot)
+ lvtt_value |= APIC_LVT_TIMER_PERIODIC;
+
if (!APIC_INTEGRATED(ver))
lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
apic_write_around(APIC_LVTT, lvtt_value);
@@ -944,23 +973,27 @@ static void __setup_APIC_LVTT(unsigned i
& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
| APIC_TDR_DIV_16);
- apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
+ if (!oneshot)
+ apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
}
-static void __devinit setup_APIC_timer(unsigned int clocks)
+static void lapic_next_event(unsigned long evt)
{
- unsigned long flags;
-
- local_irq_save(flags);
+ apic_write_around(APIC_TMICT, evt);
+}
- /*
- * Wait for IRQ0's slice:
- */
- wait_timer_tick();
+static void lapic_timer_setup(int mode)
+{
+ unsigned long flags;
- __setup_APIC_LVTT(clocks);
+ raw_local_irq_save(flags);
+ __setup_APIC_LVTT(calibration_result, mode == CLOCK_EVT_ONESHOT);
+ raw_local_irq_restore(flags);
+}
- local_irq_restore(flags);
+static void __devinit setup_APIC_timer(void)
+{
+ setup_local_clockevent(&lapic_clockevent, CPU_MASK_NONE);
}
/*
@@ -969,6 +1002,8 @@ static void __devinit setup_APIC_timer(u
* to calibrate, since some later bootup code depends on getting
* the first irq? Ugh.
*
+ * TODO: Fix this rather than saying "Ugh" -tglx
+ *
* We want to do the calibration only once since we
* want to have local timer irqs syncron. CPUs connected
* by the same APIC bus have the very same bus frequency.
@@ -991,7 +1026,7 @@ static int __init calibrate_APIC_clock(v
* value into the APIC clock, we just want to get the
* counter running for calibration.
*/
- __setup_APIC_LVTT(1000000000);
+ __setup_APIC_LVTT(1000000000, 0);
/*
* The timer chip counts down to zero. Let's wait
@@ -1028,6 +1063,13 @@ static int __init calibrate_APIC_clock(v
result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
+ /* Calculate the scaled math multiplication factor */
+ lapic_clockevent.mult = div_sc32(tt1-tt2, TICK_NSEC * LOOPS);
+ lapic_clockevent.max_delta_ns =
+ clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+ lapic_clockevent.min_delta_ns =
+ clockevent_delta2ns(0xF, &lapic_clockevent);
+
if (cpu_has_tsc)
apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
"%ld.%04ld MHz.\n",
@@ -1042,27 +1084,26 @@ static int __init calibrate_APIC_clock(v
return result;
}
-static unsigned int calibration_result;
-
void __init setup_boot_APIC_clock(void)
{
+ unsigned long flags;
apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
using_apic_timer = 1;
- local_irq_disable();
+ raw_local_irq_save(flags);
calibration_result = calibrate_APIC_clock();
/*
* Now set up the timer for real.
*/
- setup_APIC_timer(calibration_result);
+ setup_APIC_timer();
- local_irq_enable();
+ raw_local_irq_restore(flags);
}
void __devinit setup_secondary_APIC_clock(void)
{
- setup_APIC_timer(calibration_result);
+ setup_APIC_timer();
}
void __devinit disable_APIC_timer(void)
@@ -1085,6 +1126,8 @@ void enable_APIC_timer(void)
}
}
+static DEFINE_PER_CPU(int, prof_multiplier) = 1;
+
/*
* the frequency of the profiling timer can be changed
* by writing a multiplier value into /proc/profile.
@@ -1112,60 +1155,6 @@ int setup_profiling_timer(unsigned int m
return 0;
}
-
-#undef APIC_DIVISOR
-
-/*
- * Local timer interrupt handler. It does both profiling and
- * process statistics/rescheduling.
- *
- * We do profiling in every local tick, statistics/rescheduling
- * happen only every 'profiling multiplier' ticks. The default
- * multiplier is 1 and it can be changed by writing the new multiplier
- * value into /proc/profile.
- */
-
-inline void smp_local_timer_interrupt(struct pt_regs * regs)
-{
- int cpu = smp_processor_id();
-
- profile_tick(CPU_PROFILING, regs);
- if (--per_cpu(prof_counter, cpu) <= 0) {
- /*
- * The multiplier may have changed since the last time we got
- * to this point as a result of the user writing to
- * /proc/profile. In this case we need to adjust the APIC
- * timer accordingly.
- *
- * Interrupts are already masked off at this point.
- */
- per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
- if (per_cpu(prof_counter, cpu) !=
- per_cpu(prof_old_multiplier, cpu)) {
- __setup_APIC_LVTT(
- calibration_result/
- per_cpu(prof_counter, cpu));
- per_cpu(prof_old_multiplier, cpu) =
- per_cpu(prof_counter, cpu);
- }
-
-#ifdef CONFIG_SMP
- update_process_times(user_mode_vm(regs));
-#endif
- }
-
- /*
- * We take the 'long' return path, and there every subsystem
- * grabs the apropriate locks (kernel lock/ irq lock).
- *
- * we might want to decouple profiling from the 'long path',
- * and do the profiling totally in assembly.
- *
- * Currently this isn't too much of an issue (performance wise),
- * we can take more than 100K local irqs per second on a 100 MHz P5.
- */
-}
-
/*
* Local APIC timer interrupt. This is the most natural way for doing
* local interrupts, but local timer interrupts can be emulated by
@@ -1175,7 +1164,7 @@ inline void smp_local_timer_interrupt(st
* interrupt as well. Thus we cannot inline the local irq ... ]
*/
-fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
+fastcall notrace void smp_apic_timer_interrupt(struct pt_regs *regs)
{
int cpu = smp_processor_id();
@@ -1184,6 +1173,8 @@ fastcall void smp_apic_timer_interrupt(s
*/
per_cpu(irq_stat, cpu).apic_timer_irqs++;
+ trace_special(regs->eip, 0, 0);
+
/*
* NOTE! We'd better ACK the irq immediately,
* because timer handling can be slow.
@@ -1195,7 +1186,17 @@ fastcall void smp_apic_timer_interrupt(s
* interrupt lock, which is the WrongThing (tm) to do.
*/
irq_enter();
- smp_local_timer_interrupt(regs);
+ /*
+ * If the task is currently running in user mode, don't
+ * detect soft lockups. If CONFIG_DETECT_SOFTLOCKUP is not
+ * configured, this should be optimized out.
+ */
+ if (user_mode(regs))
+ touch_light_softlockup_watchdog();
+
+ if (lapic_clockevent.event_handler)
+ lapic_clockevent.event_handler(regs);
+
irq_exit();
}
@@ -1250,6 +1251,7 @@ fastcall void smp_error_interrupt(struct
*/
printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
smp_processor_id(), v , v1);
+ dump_stack();
irq_exit();
}
Index: linux/arch/i386/kernel/apm.c
===================================================================
--- linux.orig/arch/i386/kernel/apm.c
+++ linux/arch/i386/kernel/apm.c
@@ -552,9 +552,9 @@ static inline void apm_restore_cpus(cpum
*/
#define APM_DO_CLI \
if (apm_info.allow_ints) \
- local_irq_enable(); \
+ raw_local_irq_enable(); \
else \
- local_irq_disable();
+ raw_local_irq_disable();
#ifdef APM_ZERO_SEGS
# define APM_DECL_SEGS \
@@ -604,12 +604,12 @@ static u8 apm_bios_call(u32 func, u32 eb
save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8];
per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc;
- local_save_flags(flags);
+ raw_local_save_flags(flags);
APM_DO_CLI;
APM_DO_SAVE_SEGS;
apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi);
APM_DO_RESTORE_SEGS;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = save_desc_40;
put_cpu();
apm_restore_cpus(cpus);
@@ -647,12 +647,12 @@ static u8 apm_bios_call_simple(u32 func,
save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8];
per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc;
- local_save_flags(flags);
+ raw_local_save_flags(flags);
APM_DO_CLI;
APM_DO_SAVE_SEGS;
error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax);
APM_DO_RESTORE_SEGS;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
__get_cpu_var(cpu_gdt_table)[0x40 / 8] = save_desc_40;
put_cpu();
apm_restore_cpus(cpus);
@@ -1194,7 +1194,7 @@ static int suspend(int vetoable)
}
device_suspend(PMSG_SUSPEND);
- local_irq_disable();
+ raw_local_irq_disable();
device_power_down(PMSG_SUSPEND);
/* serialize with the timer interrupt */
@@ -1210,14 +1210,14 @@ static int suspend(int vetoable)
*/
spin_unlock(&i8253_lock);
write_sequnlock(&xtime_lock);
- local_irq_enable();
+ raw_local_irq_enable();
save_processor_state();
err = set_system_power_state(APM_STATE_SUSPEND);
ignore_normal_resume = 1;
restore_processor_state();
- local_irq_disable();
+ raw_local_irq_disable();
write_seqlock(&xtime_lock);
spin_lock(&i8253_lock);
reinit_timer();
@@ -1232,7 +1232,7 @@ static int suspend(int vetoable)
apm_error("suspend", err);
err = (err == APM_SUCCESS) ? 0 : -EIO;
device_power_up();
- local_irq_enable();
+ raw_local_irq_enable();
device_resume();
pm_send_all(PM_RESUME, (void *)0);
queue_event(APM_NORMAL_RESUME, NULL);
@@ -1251,22 +1251,22 @@ static void standby(void)
{
int err;
- local_irq_disable();
+ raw_local_irq_disable();
device_power_down(PMSG_SUSPEND);
/* serialize with the timer interrupt */
write_seqlock(&xtime_lock);
/* If needed, notify drivers here */
get_time_diff();
write_sequnlock(&xtime_lock);
- local_irq_enable();
+ raw_local_irq_enable();
err = set_system_power_state(APM_STATE_STANDBY);
if ((err != APM_SUCCESS) && (err != APM_NO_ERROR))
apm_error("standby", err);
- local_irq_disable();
+ raw_local_irq_disable();
device_power_up();
- local_irq_enable();
+ raw_local_irq_enable();
}
static apm_event_t get_event(void)
Index: linux/arch/i386/kernel/cpu/cpufreq/longhaul.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ linux/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -144,7 +144,7 @@ static void do_powersaver(union msr_long
longhaul->bits.RevisionKey = 0;
preempt_disable();
- local_irq_save(flags);
+ raw_local_irq_save(flags);
/*
* get current pci bus master state for all devices
@@ -166,11 +166,11 @@ static void do_powersaver(union msr_long
outb(0xFE,0x21); /* TMR0 only */
outb(0xFF,0x80); /* delay */
- safe_halt();
+ raw_safe_halt();
wrmsrl(MSR_VIA_LONGHAUL, longhaul->val);
halt();
- local_irq_disable();
+ raw_local_irq_disable();
outb(tmp_mask,0x21); /* restore mask */
@@ -184,7 +184,7 @@ static void do_powersaver(union msr_long
pci_write_config_byte(dev, PCI_COMMAND, pci_cmd);
}
} while (dev != NULL);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
preempt_enable();
/* disable bus ratio bit */
@@ -245,16 +245,16 @@ static void longhaul_setstate(unsigned i
/* Enable software clock multiplier */
bcr2.bits.ESOFTBF = 1;
bcr2.bits.CLOCKMUL = clock_ratio_index;
- local_irq_disable();
+ raw_local_irq_disable();
wrmsrl (MSR_VIA_BCR2, bcr2.val);
- safe_halt();
+ raw_safe_halt();
/* Disable software clock multiplier */
rdmsrl (MSR_VIA_BCR2, bcr2.val);
bcr2.bits.ESOFTBF = 0;
- local_irq_disable();
+ raw_local_irq_disable();
wrmsrl (MSR_VIA_BCR2, bcr2.val);
- local_irq_enable();
+ raw_local_irq_enable();
break;
/*
Index: linux/arch/i386/kernel/cpu/mtrr/cyrix.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/mtrr/cyrix.c
+++ linux/arch/i386/kernel/cpu/mtrr/cyrix.c
@@ -17,7 +17,7 @@ cyrix_get_arr(unsigned int reg, unsigned
arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
/* Save flags and disable interrupts */
- local_irq_save(flags);
+ raw_local_irq_save(flags);
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
@@ -28,7 +28,7 @@ cyrix_get_arr(unsigned int reg, unsigned
setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
/* Enable interrupts if it was enabled previously */
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
shift = ((unsigned char *) base)[1] & 0x0f;
*base >>= PAGE_SHIFT;
Index: linux/arch/i386/kernel/cpu/mtrr/generic.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/mtrr/generic.c
+++ linux/arch/i386/kernel/cpu/mtrr/generic.c
@@ -234,7 +234,7 @@ static unsigned long set_mtrr_state(u32
static unsigned long cr4 = 0;
static u32 deftype_lo, deftype_hi;
-static DEFINE_SPINLOCK(set_atomicity_lock);
+static DEFINE_RAW_SPINLOCK(set_atomicity_lock);
/*
* Since we are disabling the cache don't allow any interrupts - they
@@ -296,14 +296,14 @@ static void generic_set_all(void)
unsigned long mask, count;
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
prepare_set();
/* Actually set the state */
mask = set_mtrr_state(deftype_lo,deftype_hi);
post_set();
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
/* Use the atomic bitops to update the global mask */
for (count = 0; count < sizeof mask * 8; ++count) {
@@ -331,7 +331,7 @@ static void generic_set_mtrr(unsigned in
vr = &mtrr_state.var_ranges[reg];
- local_irq_save(flags);
+ raw_local_irq_save(flags);
prepare_set();
if (size == 0) {
@@ -350,7 +350,7 @@ static void generic_set_mtrr(unsigned in
}
post_set();
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
Index: linux/arch/i386/kernel/cpu/mtrr/main.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/mtrr/main.c
+++ linux/arch/i386/kernel/cpu/mtrr/main.c
@@ -146,7 +146,7 @@ static void ipi_handler(void *info)
struct set_mtrr_data *data = info;
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
atomic_dec(&data->count);
while(!atomic_read(&data->gate))
@@ -164,7 +164,7 @@ static void ipi_handler(void *info)
cpu_relax();
atomic_dec(&data->count);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
#endif
@@ -225,7 +225,7 @@ static void set_mtrr(unsigned int reg, u
if (smp_call_function(ipi_handler, &data, 1, 0) != 0)
panic("mtrr: timed out waiting for other CPUs\n");
- local_irq_save(flags);
+ raw_local_irq_save(flags);
while(atomic_read(&data.count))
cpu_relax();
@@ -259,7 +259,7 @@ static void set_mtrr(unsigned int reg, u
while(atomic_read(&data.count))
cpu_relax();
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
/**
@@ -687,11 +687,11 @@ void mtrr_ap_init(void)
* 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to
* prevent mtrr entry changes
*/
- local_irq_save(flags);
+ raw_local_irq_save(flags);
mtrr_if->set_all();
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
static int __init mtrr_init_finialize(void)
Index: linux/arch/i386/kernel/cpu/mtrr/state.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/mtrr/state.c
+++ linux/arch/i386/kernel/cpu/mtrr/state.c
@@ -12,7 +12,7 @@ void set_mtrr_prepare_save(struct set_mt
unsigned int cr0;
/* Disable interrupts locally */
- local_irq_save(ctxt->flags);
+ raw_local_irq_save(ctxt->flags);
if (use_intel() || is_cpu(CYRIX)) {
@@ -73,6 +73,6 @@ void set_mtrr_done(struct set_mtrr_conte
write_cr4(ctxt->cr4val);
}
/* Re-enable interrupts locally (if enabled previously) */
- local_irq_restore(ctxt->flags);
+ raw_local_irq_restore(ctxt->flags);
}
Index: linux/arch/i386/kernel/entry.S
===================================================================
--- linux.orig/arch/i386/kernel/entry.S
+++ linux/arch/i386/kernel/entry.S
@@ -76,10 +76,10 @@ NT_MASK = 0x00004000
VM_MASK = 0x00020000
#ifdef CONFIG_PREEMPT
-#define preempt_stop cli
+# define preempt_stop cli
#else
-#define preempt_stop
-#define resume_kernel restore_nocheck
+# define preempt_stop
+# define resume_kernel restore_nocheck
#endif
#define SAVE_ALL \
@@ -160,14 +160,17 @@ ENTRY(resume_userspace)
#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
cli
+ cmpl $0, kernel_preemption
+ jz restore_nocheck
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
jnz restore_nocheck
need_resched:
movl TI_flags(%ebp), %ecx # need_resched set ?
testb $_TIF_NEED_RESCHED, %cl
- jz restore_all
+ jz restore_nocheck
testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
- jz restore_all
+ jz restore_nocheck
+ cli
call preempt_schedule_irq
jmp need_resched
#endif
@@ -200,6 +203,11 @@ sysenter_past_esp:
pushl %eax
SAVE_ALL
+#ifdef CONFIG_LATENCY_TRACE
+ pushl %edx; pushl %ecx; pushl %ebx; pushl %eax
+ call sys_call
+ popl %eax; popl %ebx; popl %ecx; popl %edx
+#endif
GET_THREAD_INFO(%ebp)
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
@@ -213,6 +221,11 @@ sysenter_past_esp:
movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx
jne syscall_exit_work
+#ifdef CONFIG_LATENCY_TRACE
+ pushl %eax
+ call sys_ret
+ popl %eax
+#endif
/* if something modifies registers it must also disable sysexit */
movl EIP(%esp), %edx
movl OLDESP(%esp), %ecx
@@ -225,6 +238,11 @@ sysenter_past_esp:
ENTRY(system_call)
pushl %eax # save orig_eax
SAVE_ALL
+#ifdef CONFIG_LATENCY_TRACE
+ pushl %edx; pushl %ecx; pushl %ebx; pushl %eax
+ call sys_call
+ popl %eax; popl %ebx; popl %ecx; popl %edx
+#endif
GET_THREAD_INFO(%ebp)
# system call tracing in operation / emulation
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
@@ -254,6 +272,17 @@ restore_all:
cmpl $((4 << 8) | 3), %eax
je ldt_ss # returning to user-space with LDT SS
restore_nocheck:
+#if defined(CONFIG_CRITICAL_IRQSOFF_TIMING) || defined(CONFIG_LATENCY_TRACE)
+ pushl %eax
+#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING
+ call trace_irqs_on
+#endif
+#ifdef CONFIG_LATENCY_TRACE
+ call sys_ret
+#endif
+ popl %eax
+#endif
+restore_nocheck_nmi:
RESTORE_REGS
addl $4, %esp
1: iret
@@ -297,18 +326,19 @@ ldt_ss:
# perform work that needs to be done immediately before resumption
ALIGN
work_pending:
- testb $_TIF_NEED_RESCHED, %cl
+ testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %ecx
jz work_notifysig
work_resched:
- call schedule
- cli # make sure we don't miss an interrupt
+ cli
+ call __schedule
+ # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
movl TI_flags(%ebp), %ecx
andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
# than syscall tracing?
jz restore_all
- testb $_TIF_NEED_RESCHED, %cl
+ testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %ecx
jnz work_resched
work_notifysig: # deal with pending signals and
@@ -351,6 +381,11 @@ syscall_trace_entry:
syscall_exit_work:
testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
jz work_pending
+#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING
+ pushl %eax
+ call trace_irqs_on
+ popl %eax
+#endif
sti # could let do_syscall_trace() call
# schedule() instead
movl %esp, %eax
@@ -412,9 +447,16 @@ ENTRY(irq_entries_start)
vector=vector+1
.endr
+#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING
+# define TRACE_IRQS_OFF call trace_irqs_off_lowlevel;
+#else
+# define TRACE_IRQS_OFF
+#endif
+
ALIGN
common_interrupt:
SAVE_ALL
+ TRACE_IRQS_OFF
movl %esp,%eax
call do_IRQ
jmp ret_from_intr
@@ -423,6 +465,7 @@ common_interrupt:
ENTRY(name) \
pushl $nr-256; \
SAVE_ALL \
+ TRACE_IRQS_OFF \
movl %esp,%eax; \
call smp_/**/name; \
jmp ret_from_intr;
@@ -552,7 +595,7 @@ nmi_stack_correct:
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_nmi
- jmp restore_all
+ jmp restore_nocheck_nmi
nmi_stack_fixup:
FIX_STACK(12,nmi_stack_correct, 1)
Index: linux/arch/i386/kernel/hpet.c
===================================================================
--- /dev/null
+++ linux/arch/i386/kernel/hpet.c
@@ -0,0 +1,69 @@
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+#define HPET_MASK 0xFFFFFFFF
+#define HPET_SHIFT 22
+
+/* FSEC = 10^-15 NSEC = 10^-9 */
+#define FSEC_PER_NSEC 1000000
+
+static void *hpet_ptr;
+
+static cycle_t read_hpet(void)
+{
+ return (cycle_t)readl(hpet_ptr);
+}
+
+struct clocksource clocksource_hpet = {
+ .name = "hpet",
+ .rating = 250,
+ .read = read_hpet,
+ .mask = (cycle_t)HPET_MASK,
+ .mult = 0, /* set below */
+ .shift = HPET_SHIFT,
+ .is_continuous = 1,
+};
+
+static int __init init_hpet_clocksource(void)
+{
+ unsigned long hpet_period;
+ void __iomem* hpet_base;
+ u64 tmp;
+
+ if (!hpet_address)
+ return -ENODEV;
+
+ /* calculate the hpet address: */
+ hpet_base =
+ (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+ hpet_ptr = hpet_base + HPET_COUNTER;
+
+ /* calculate the frequency: */
+ hpet_period = readl(hpet_base + HPET_PERIOD);
+
+ /*
+ * hpet period is in femto seconds per cycle
+ * so we need to convert this to ns/cyc units
+ * aproximated by mult/2^shift
+ *
+ * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
+ * fsec/cyc * 1ns/1000000fsec * 2^shift = mult
+ * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
+ * (fsec/cyc << shift)/1000000 = mult
+ * (hpet_period << shift)/FSEC_PER_NSEC = mult
+ */
+ tmp = (u64)hpet_period << HPET_SHIFT;
+ do_div(tmp, FSEC_PER_NSEC);
+ clocksource_hpet.mult = (u32)tmp;
+
+ register_clocksource(&clocksource_hpet);
+
+ return 0;
+}
+
+module_init(init_hpet_clocksource);
Index: linux/arch/i386/kernel/i386_ksyms.c
===================================================================
--- linux.orig/arch/i386/kernel/i386_ksyms.c
+++ linux/arch/i386/kernel/i386_ksyms.c
@@ -6,10 +6,12 @@
/* This is definitely a GPL-only symbol */
EXPORT_SYMBOL_GPL(cpu_gdt_table);
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
+#ifdef CONFIG_ASM_SEMAPHORES
+EXPORT_SYMBOL(__compat_down_failed);
+EXPORT_SYMBOL(__compat_down_failed_interruptible);
+EXPORT_SYMBOL(__compat_down_failed_trylock);
+EXPORT_SYMBOL(__compat_up_wakeup);
+#endif
/* Networking helper routines. */
EXPORT_SYMBOL(csum_partial_copy_generic);
@@ -25,7 +27,7 @@ EXPORT_SYMBOL(__put_user_8);
EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strstr);
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(CONFIG_ASM_SEMAPHORES)
extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
EXPORT_SYMBOL(__write_lock_failed);
Index: linux/arch/i386/kernel/i8253.c
===================================================================
--- /dev/null
+++ linux/arch/i386/kernel/i8253.c
@@ -0,0 +1,138 @@
+/*
+ * i8253.c 8253/PIT functions
+ *
+ */
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+#include "io_ports.h"
+
+DEFINE_RAW_SPINLOCK(i8253_lock);
+EXPORT_SYMBOL(i8253_lock);
+
+static void init_pit_timer(int mode)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8253_lock, flags);
+
+ if (mode != CLOCK_EVT_ONESHOT) {
+ /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_p(0x34, PIT_MODE);
+ udelay(10);
+ outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
+ outb(LATCH >> 8 , PIT_CH0); /* MSB */
+ } else {
+ /* One shot setup */
+ outb_p(0x38, PIT_MODE);
+ udelay(10);
+ }
+
+ spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+static void pit_next_event(unsigned long evt)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8253_lock, flags);
+ outb_p(evt & 0xff , PIT_CH0); /* LSB */
+ outb(evt >> 8 , PIT_CH0); /* MSB */
+ spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+static struct clock_event pit_clockevent = {
+ .name = "pit",
+ .capabilities = CLOCK_CAP_TICK
+#ifndef CONFIG_SMP
+ | CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE |
+ CLOCK_CAP_UPDATE
+#endif
+ ,
+ .set_mode = init_pit_timer,
+ .set_next_event = pit_next_event,
+ .start_event = io_apic_timer_ack,
+ .end_event = mca_timer_ack,
+ .shift = 32,
+ .irq = 0,
+};
+
+void setup_pit_timer(void)
+{
+ pit_clockevent.mult = div_sc32(CLOCK_TICK_RATE, NSEC_PER_SEC);
+ pit_clockevent.max_delta_ns =
+ clockevent_delta2ns(0x7FFF, &pit_clockevent);
+ pit_clockevent.min_delta_ns =
+ clockevent_delta2ns(0xF, &pit_clockevent);
+ setup_global_clockevent(&pit_clockevent, CPU_MASK_NONE);
+}
+
+/*
+ * Since the PIT overflows every tick, its not very useful
+ * to just read by itself. So use jiffies to emulate a free
+ * running counter:
+ */
+static cycle_t pit_read(void)
+{
+ unsigned long flags, seq;
+ int count;
+ u64 jifs;
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+
+ spin_lock_irqsave(&i8253_lock, flags);
+ outb_p(0x00, PIT_MODE); /* latch the count ASAP */
+ count = inb_p(PIT_CH0); /* read the latched count */
+ count |= inb_p(PIT_CH0) << 8;
+
+ /* VIA686a test code... reset the latch if count > max + 1 */
+ if (count > LATCH) {
+ outb_p(0x34, PIT_MODE);
+ outb_p(LATCH & 0xff, PIT_CH0);
+ outb(LATCH >> 8, PIT_CH0);
+ count = LATCH - 1;
+ }
+ spin_unlock_irqrestore(&i8253_lock, flags);
+
+ jifs = jiffies_64;
+ } while (read_seqretry(&xtime_lock, seq));
+
+ jifs -= INITIAL_JIFFIES;
+ count = (LATCH-1) - count;
+
+ return (cycle_t)(jifs * LATCH) + count;
+}
+
+static struct clocksource clocksource_pit = {
+ .name = "pit",
+ .rating = 110,
+ .read = pit_read,
+ .mask = (cycle_t)-1,
+ .mult = 0,
+ .shift = 20,
+};
+
+static int __init init_pit_clocksource(void)
+{
+ /* TODO: bogus limit of 4 CPUs? --mingo */
+ if (num_possible_cpus() > 4) /* PIT does not scale! */
+ return 0;
+
+ clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
+ register_clocksource(&clocksource_pit);
+
+ return 0;
+}
+module_init(init_pit_clocksource);
Index: linux/arch/i386/kernel/i8259.c
===================================================================
--- linux.orig/arch/i386/kernel/i8259.c
+++ linux/arch/i386/kernel/i8259.c
@@ -35,7 +35,7 @@
* moves to arch independent land
*/
-DEFINE_SPINLOCK(i8259A_lock);
+DEFINE_RAW_SPINLOCK(i8259A_lock);
static void end_8259A_irq (unsigned int irq)
{
@@ -366,7 +366,7 @@ static irqreturn_t math_error_irq(int cp
* New motherboards sometimes make IRQ 13 be a PCI interrupt,
* so allow interrupt sharing.
*/
-static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL };
+static struct irqaction fpu_irq = { math_error_irq, SA_NODELAY, CPU_MASK_NONE, "fpu", NULL, NULL };
void __init init_ISA_irqs (void)
{
@@ -422,12 +422,6 @@ void __init init_IRQ(void)
intr_init_hook();
/*
- * Set the clock to HZ Hz, we already have a valid
- * vector now:
- */
- setup_pit_timer();
-
- /*
* External FPU? Set up irq13 if so, for
* original braindamaged IBM FERR coupling.
*/
Index: linux/arch/i386/kernel/init_task.c
===================================================================
--- linux.orig/arch/i386/kernel/init_task.c
+++ linux/arch/i386/kernel/init_task.c
@@ -10,8 +10,8 @@
#include
#include
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
+static struct fs_struct init_fs = INIT_FS(init_fs);
+static struct files_struct init_files = INIT_FILES(init_files);
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
Index: linux/arch/i386/kernel/io_apic.c
===================================================================
--- linux.orig/arch/i386/kernel/io_apic.c
+++ linux/arch/i386/kernel/io_apic.c
@@ -30,6 +30,7 @@
#include
#include
#include
+#include
#include
#include
@@ -46,7 +47,7 @@
int (*ioapic_renumber_irq)(int ioapic, int irq);
atomic_t irq_mis_count;
-static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_RAW_SPINLOCK(ioapic_lock);
/*
* Is the SiS APIC rmw bug present ?
@@ -54,11 +55,6 @@ static DEFINE_SPINLOCK(ioapic_lock);
*/
int sis_apic_bug = -1;
-/*
- * # of IRQ routing registers
- */
-int nr_ioapic_registers[MAX_IO_APICS];
-
int disable_timer_pin_1 __initdata;
/*
@@ -87,6 +83,27 @@ int vector_irq[NR_VECTORS] __read_mostly
#define vector_to_irq(vector) (vector)
#endif
+static int timer_ack;
+
+void io_apic_timer_ack(void *priv)
+{
+ unsigned long flags;
+
+ if (timer_ack) {
+ /*
+ * Subtle, when I/O APICs are used we have to ack timer IRQ
+ * manually to reset the IRR bit for do_slow_gettimeoffset().
+ * This will also deassert NMI lines for the watchdog if run
+ * on an 82489DX-based system.
+ */
+ spin_lock_irqsave(&i8259A_lock, flags);
+ outb(0x0c, PIC_MASTER_OCW3);
+ /* Ack the IRQ; AEOI will end it automatically. */
+ inb(PIC_MASTER_POLL);
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+ }
+}
+
/*
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
* shared ISA-space IRQs, so we have to support them. We are super
@@ -130,19 +147,133 @@ static void __init replace_pin_at_irq(un
}
}
+#ifdef CONFIG_X86_IOAPIC_FAST
+# define IOAPIC_CACHE
+#endif
+
+struct ioapic_data_struct {
+ struct sys_device dev;
+ int nr_registers; // # of IRQ routing registers
+ volatile unsigned int *base;
+ struct IO_APIC_route_entry *entry;
+#ifdef IOAPIC_CACHE
+ unsigned int reg_set;
+ u32 cached_val[0];
+#endif
+};
+
+static struct ioapic_data_struct *ioapic_data[MAX_IO_APICS];
+
+int nr_ioapic_registers(int apic)
+{
+ return ioapic_data[apic]->nr_registers;
+}
+
+static inline unsigned int __raw_io_apic_read(struct ioapic_data_struct *ioapic, unsigned int reg)
+{
+# ifdef IOAPIC_CACHE
+ ioapic->reg_set = reg;
+# endif
+ ioapic->base[0] = reg;
+ return ioapic->base[4];
+}
+
+
+# ifdef IOAPIC_CACHE
+static void __init ioapic_cache_init(struct ioapic_data_struct *ioapic)
+{
+ int reg;
+ for (reg = 0; reg < (0x10 + 2 * ioapic->nr_registers); reg++)
+ ioapic->cached_val[reg] = __raw_io_apic_read(ioapic, reg);
+}
+# endif
+
+
+static unsigned int raw_io_apic_read(struct ioapic_data_struct *ioapic, unsigned int reg)
+{
+ unsigned int val = __raw_io_apic_read(ioapic, reg);
+
+# ifdef IOAPIC_CACHE
+ ioapic->cached_val[reg] = val;
+# endif
+ return val;
+}
+
+static unsigned int io_apic_read(struct ioapic_data_struct *ioapic, unsigned int reg)
+{
+# ifdef IOAPIC_CACHE
+ if (likely(!sis_apic_bug)) {
+ ioapic->reg_set = -1;
+ return ioapic->cached_val[reg];
+ }
+# endif
+ return raw_io_apic_read(ioapic, reg);
+}
+
+static void io_apic_write(struct ioapic_data_struct *ioapic, unsigned int reg, unsigned int val)
+{
+# ifdef IOAPIC_CACHE
+ ioapic->cached_val[reg] = val;
+ ioapic->reg_set = reg;
+# endif
+ ioapic->base[0] = reg;
+ ioapic->base[4] = val;
+}
+
+
+/*
+ * Some systems need a POST flush or else level-triggered interrupts
+ * generate lots of spurious interrupts due to the POST-ed write not
+ * reaching the IOAPIC before the IRQ is ACK-ed in the local APIC.
+ *
+ * It seems most systems need this - disable the optimization for now.
+ */
+#ifndef CONFIG_X86_IOAPIC_FAST
+# define IOAPIC_POSTFLUSH
+#endif
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index regiser
+ */
+static void io_apic_modify(struct ioapic_data_struct *ioapic, unsigned int reg, unsigned int val)
+{
+#ifdef IOAPIC_CACHE
+ ioapic->cached_val[reg] = val;
+ if (ioapic->reg_set != reg || sis_apic_bug) {
+ ioapic->reg_set = reg;
+#else
+ if (unlikely(sis_apic_bug)) {
+#endif
+ ioapic->base[0] = reg;
+ }
+ ioapic->base[4] = val;
+#ifndef IOAPIC_POSTFLUSH
+ if (unlikely(sis_apic_bug))
+#endif
+ /*
+ * Force POST flush by reading:
+ */
+ val = ioapic->base[4];
+}
+
static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
{
struct irq_pin_list *entry = irq_2_pin + irq;
- unsigned int pin, reg;
+ unsigned int pin, val;
+ struct ioapic_data_struct *ioapic;
for (;;) {
pin = entry->pin;
if (pin == -1)
break;
- reg = io_apic_read(entry->apic, 0x10 + pin*2);
- reg &= ~disable;
- reg |= enable;
- io_apic_modify(entry->apic, 0x10 + pin*2, reg);
+ ioapic = ioapic_data[entry->apic];
+ val = io_apic_read(ioapic, 0x10 + pin*2);
+ val &= ~disable;
+ val |= enable;
+ io_apic_modify(ioapic, 0x10 + pin*2, val);
if (!entry->next)
break;
entry = irq_2_pin + entry->next;
@@ -150,29 +281,17 @@ static void __modify_IO_APIC_irq (unsign
}
/* mask = 1 */
-static void __mask_IO_APIC_irq (unsigned int irq)
+static inline void __mask_IO_APIC_irq (unsigned int irq)
{
__modify_IO_APIC_irq(irq, 0x00010000, 0);
}
/* mask = 0 */
-static void __unmask_IO_APIC_irq (unsigned int irq)
+static inline void __unmask_IO_APIC_irq (unsigned int irq)
{
__modify_IO_APIC_irq(irq, 0, 0x00010000);
}
-/* mask = 1, trigger = 0 */
-static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
-{
- __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
-}
-
-/* mask = 0, trigger = 1 */
-static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
-{
- __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
-}
-
static void mask_IO_APIC_irq (unsigned int irq)
{
unsigned long flags;
@@ -191,15 +310,15 @@ static void unmask_IO_APIC_irq (unsigned
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+static void clear_IO_APIC_pin(struct ioapic_data_struct *ioapic, unsigned int pin)
{
struct IO_APIC_route_entry entry;
unsigned long flags;
/* Check delivery_mode to be sure we're not clearing an SMI pin */
spin_lock_irqsave(&ioapic_lock, flags);
- *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
- *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+ *(((int*)&entry) + 0) = io_apic_read(ioapic, 0x10 + 2 * pin);
+ *(((int*)&entry) + 1) = io_apic_read(ioapic, 0x11 + 2 * pin);
spin_unlock_irqrestore(&ioapic_lock, flags);
if (entry.delivery_mode == dest_SMI)
return;
@@ -210,8 +329,8 @@ static void clear_IO_APIC_pin(unsigned i
memset(&entry, 0, sizeof(entry));
entry.mask = 1;
spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+ io_apic_write(ioapic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+ io_apic_write(ioapic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
spin_unlock_irqrestore(&ioapic_lock, flags);
}
@@ -219,9 +338,14 @@ static void clear_IO_APIC (void)
{
int apic, pin;
- for (apic = 0; apic < nr_ioapics; apic++)
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
- clear_IO_APIC_pin(apic, pin);
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ struct ioapic_data_struct *ioapic = ioapic_data[apic];
+#ifdef IOAPIC_CACHE
+ ioapic->reg_set = -1;
+#endif
+ for (pin = 0; pin < ioapic->nr_registers; pin++)
+ clear_IO_APIC_pin(ioapic, pin);
+ }
}
#ifdef CONFIG_SMP
@@ -247,7 +371,7 @@ static void set_ioapic_affinity_irq(unsi
pin = entry->pin;
if (pin == -1)
break;
- io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
+ io_apic_write(ioapic_data[entry->apic], 0x10 + 1 + pin*2, apicid_value);
if (!entry->next)
break;
entry = irq_2_pin + entry->next;
@@ -819,7 +943,7 @@ void __init setup_ioapic_dest(void)
return;
for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
- for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+ for (pin = 0; pin < ioapic_data[ioapic]->nr_registers; pin++) {
irq_entry = find_irq_entry(ioapic, pin, mp_INT);
if (irq_entry == -1)
continue;
@@ -1063,7 +1187,7 @@ static int pin_2_irq(int idx, int apic,
*/
i = irq = 0;
while (i < apic)
- irq += nr_ioapic_registers[i++];
+ irq += ioapic_data[i++]->nr_registers;
irq += pin;
/*
@@ -1106,7 +1230,7 @@ static inline int IO_APIC_irq_trigger(in
int apic, idx, pin;
for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ for (pin = 0; pin < ioapic_data[apic]->nr_registers; pin++) {
idx = find_irq_entry(apic,pin,mp_INT);
if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
return irq_trigger(idx);
@@ -1178,11 +1302,13 @@ static void __init setup_IO_APIC_irqs(vo
struct IO_APIC_route_entry entry;
int apic, pin, idx, irq, first_notcon = 1, vector;
unsigned long flags;
+ struct ioapic_data_struct *ioapic;
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ ioapic = ioapic_data[apic];
+ for (pin = 0; pin < ioapic->nr_registers; pin++) {
/*
* add it to the IO-APIC irq-routing table:
@@ -1239,8 +1365,8 @@ static void __init setup_IO_APIC_irqs(vo
disable_8259A_irq(irq);
}
spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+ io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
set_native_irq_info(irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
@@ -1287,8 +1413,8 @@ static void __init setup_ExtINT_IRQ0_pin
* Add it to the IO-APIC irq-routing table:
*/
spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+ io_apic_write(ioapic_data[0], 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(ioapic_data[0], 0x10+2*pin, *(((int *)&entry)+0));
spin_unlock_irqrestore(&ioapic_lock, flags);
enable_8259A_irq(0);
@@ -1298,7 +1424,7 @@ static inline void UNEXPECTED_IO_APIC(vo
{
}
-void __init print_IO_APIC(void)
+void /*__init*/ print_IO_APIC(void)
{
int apic, i;
union IO_APIC_reg_00 reg_00;
@@ -1306,6 +1432,7 @@ void __init print_IO_APIC(void)
union IO_APIC_reg_02 reg_02;
union IO_APIC_reg_03 reg_03;
unsigned long flags;
+ struct ioapic_data_struct *ioapic;
if (apic_verbosity == APIC_QUIET)
return;
@@ -1313,7 +1440,7 @@ void __init print_IO_APIC(void)
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
for (i = 0; i < nr_ioapics; i++)
printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
- mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+ mp_ioapics[i].mpc_apicid, ioapic_data[i]->nr_registers);
/*
* We are a bit conservative about what we expect. We have to
@@ -1322,14 +1449,14 @@ void __init print_IO_APIC(void)
printk(KERN_INFO "testing the IO APIC.......................\n");
for (apic = 0; apic < nr_ioapics; apic++) {
-
+ ioapic = ioapic_data[apic];
spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(apic, 0);
- reg_01.raw = io_apic_read(apic, 1);
+ reg_00.raw = io_apic_read(ioapic, 0);
+ reg_01.raw = io_apic_read(ioapic, 1);
if (reg_01.bits.version >= 0x10)
- reg_02.raw = io_apic_read(apic, 2);
+ reg_02.raw = io_apic_read(ioapic, 2);
if (reg_01.bits.version >= 0x20)
- reg_03.raw = io_apic_read(apic, 3);
+ reg_03.raw = io_apic_read(ioapic, 3);
spin_unlock_irqrestore(&ioapic_lock, flags);
printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
@@ -1400,8 +1527,8 @@ void __init print_IO_APIC(void)
struct IO_APIC_route_entry entry;
spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
- *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+ *(((int *)&entry)+0) = raw_io_apic_read(ioapic, 0x10+i*2);
+ *(((int *)&entry)+1) = raw_io_apic_read(ioapic, 0x11+i*2);
spin_unlock_irqrestore(&ioapic_lock, flags);
printk(KERN_DEBUG " %02x %03X %02X ",
@@ -1447,7 +1574,7 @@ void __init print_IO_APIC(void)
return;
}
-#if 0
+#if 1
static void print_APIC_bitfield (int base)
{
@@ -1594,9 +1721,7 @@ void /*__init*/ print_PIC(void)
static void __init enable_IO_APIC(void)
{
- union IO_APIC_reg_01 reg_01;
int i;
- unsigned long flags;
for (i = 0; i < PIN_MAP_SIZE; i++) {
irq_2_pin[i].pin = -1;
@@ -1607,16 +1732,6 @@ static void __init enable_IO_APIC(void)
pirq_entries[i] = -1;
/*
- * The number of IO-APIC IRQ registers (== #pins):
- */
- for (i = 0; i < nr_ioapics; i++) {
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_01.raw = io_apic_read(i, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- nr_ioapic_registers[i] = reg_01.bits.entries+1;
- }
-
- /*
* Do not trust the IO-APIC being empty at bootup
*/
clear_IO_APIC();
@@ -1659,8 +1774,7 @@ void disable_IO_APIC(void)
* Add it to the IO-APIC irq-routing table:
*/
spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+ io_apic_write(ioapic_data[0], 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic_data[0], 0x10+2*pin, *(((int *)&entry)+0));
spin_unlock_irqrestore(&ioapic_lock, flags);
}
disconnect_bsp_APIC(pin != -1);
@@ -1682,6 +1796,7 @@ static void __init setup_ioapic_ids_from
int i;
unsigned char old_id;
unsigned long flags;
+ struct ioapic_data_struct *ioapic;
/*
* Don't check I/O APIC IDs for xAPIC systems. They have
@@ -1699,10 +1814,10 @@ static void __init setup_ioapic_ids_from
* Set the IOAPIC ID to the value stored in the MPC table.
*/
for (apic = 0; apic < nr_ioapics; apic++) {
-
+ ioapic = ioapic_data[apic];
/* Read the register 0 value */
spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(apic, 0);
+ reg_00.raw = io_apic_read(ioapic, 0);
spin_unlock_irqrestore(&ioapic_lock, flags);
old_id = mp_ioapics[apic].mpc_apicid;
@@ -1763,14 +1878,14 @@ static void __init setup_ioapic_ids_from
reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0, reg_00.raw);
+ io_apic_write(ioapic, 0, reg_00.raw);
spin_unlock_irqrestore(&ioapic_lock, flags);
/*
* Sanity check
*/
spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(apic, 0);
+ reg_00.raw = io_apic_read(ioapic, 0);
spin_unlock_irqrestore(&ioapic_lock, flags);
if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
printk("could not set ID!\n");
@@ -1794,7 +1909,7 @@ static int __init timer_irq_works(void)
{
unsigned long t1 = jiffies;
- local_irq_enable();
+ raw_local_irq_enable();
/* Let ten ticks pass... */
mdelay((10 * 1000) / HZ);
@@ -1805,7 +1920,7 @@ static int __init timer_irq_works(void)
* might have cached one ExtINT interrupt. Finally, at
* least one tick may be lost due to delays.
*/
- if (jiffies - t1 > 4)
+ if (jiffies - t1 > 4 && jiffies - t1 < 16)
return 1;
return 0;
@@ -1858,9 +1973,11 @@ static unsigned int startup_edge_ioapic_
static void ack_edge_ioapic_irq(unsigned int irq)
{
move_irq(irq);
+#if 0
if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
== (IRQ_PENDING | IRQ_DISABLED))
mask_IO_APIC_irq(irq);
+#endif
ack_APIC_irq();
}
@@ -1885,6 +2002,30 @@ static unsigned int startup_level_ioapic
return 0; /* don't check for pending */
}
+#ifdef CONFIG_PREEMPT_HARDIRQS
+
+/*
+ * in the PREEMPT_HARDIRQS case we dont want to keep the local
+ * APIC unacked, because the prevents further interrupts from
+ * being handled - and with IRQ threads being delayed arbitrarily,
+ * that's unacceptable. So we first mask the IRQ, then ack it.
+ * The hardirq thread will then unmask it.
+ */
+static void mask_and_ack_level_ioapic_irq(unsigned int irq)
+{
+ move_irq(irq);
+ mask_IO_APIC_irq(irq);
+ ack_APIC_irq();
+}
+
+#else
+
+static void mask_and_ack_level_ioapic_irq(unsigned int irq)
+{
+}
+
+#endif
+
static void end_level_ioapic_irq (unsigned int irq)
{
unsigned long v;
@@ -1919,8 +2060,10 @@ static void end_level_ioapic_irq (unsign
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
spin_lock(&ioapic_lock);
- __mask_and_edge_IO_APIC_irq(irq);
- __unmask_and_level_IO_APIC_irq(irq);
+ /* mask = 1, trigger = 0 */
+ __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
+ /* mask = 0, trigger = 1 */
+ __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
spin_unlock(&ioapic_lock);
}
}
@@ -1948,6 +2091,13 @@ static unsigned int startup_level_ioapic
return startup_level_ioapic_irq (irq);
}
+static void mask_and_ack_level_ioapic_vector (unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ mask_and_ack_level_ioapic_irq(irq);
+}
+
static void end_level_ioapic_vector (unsigned int vector)
{
int irq = vector_to_irq(vector);
@@ -2111,22 +2261,23 @@ static void setup_nmi (void)
* cycles as some i82489DX-based boards have glue logic that keeps the
* 8259A interrupt line asserted until INTA. --macro
*/
-static inline void unlock_ExtINT_logic(void)
+static void __init unlock_ExtINT_logic(void)
{
int pin, i;
struct IO_APIC_route_entry entry0, entry1;
unsigned char save_control, save_freq_select;
unsigned long flags;
+ struct ioapic_data_struct *ioapic0 = ioapic_data[0];
pin = find_isa_irq_pin(8, mp_INT);
if (pin == -1)
return;
spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
- *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
+ *(((int *)&entry0) + 1) = io_apic_read(ioapic0, 0x11 + 2 * pin);
+ *(((int *)&entry0) + 0) = io_apic_read(ioapic0, 0x10 + 2 * pin);
spin_unlock_irqrestore(&ioapic_lock, flags);
- clear_IO_APIC_pin(0, pin);
+ clear_IO_APIC_pin(ioapic0, pin);
memset(&entry1, 0, sizeof(entry1));
@@ -2139,8 +2290,8 @@ static inline void unlock_ExtINT_logic(v
entry1.vector = 0;
spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
- io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+ io_apic_write(ioapic0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+ io_apic_write(ioapic0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
spin_unlock_irqrestore(&ioapic_lock, flags);
save_control = CMOS_READ(RTC_CONTROL);
@@ -2158,11 +2309,11 @@ static inline void unlock_ExtINT_logic(v
CMOS_WRITE(save_control, RTC_CONTROL);
CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
- clear_IO_APIC_pin(0, pin);
+ clear_IO_APIC_pin(ioapic0, pin);
spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
- io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+ io_apic_write(ioapic0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+ io_apic_write(ioapic0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
spin_unlock_irqrestore(&ioapic_lock, flags);
}
@@ -2172,10 +2323,11 @@ static inline void unlock_ExtINT_logic(v
* is so screwy. Thanks to Brian Perkins for testing/hacking this beast
* fanatically on his truly buggy board.
*/
-static inline void check_timer(void)
+static void __init check_timer(void)
{
int pin1, pin2;
int vector;
+ struct ioapic_data_struct *ioapic0 = ioapic_data[0];
/*
* get/set the timer IRQ vector:
@@ -2193,7 +2345,10 @@ static inline void check_timer(void)
*/
apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
init_8259A(1);
- timer_ack = 1;
+#ifdef CONFIG_PREEMPT_RT
+ if (nmi_watchdog)
+#endif
+ timer_ack = 1;
enable_8259A_irq(0);
pin1 = find_isa_irq_pin(0, mp_INT);
@@ -2216,7 +2371,7 @@ static inline void check_timer(void)
clear_IO_APIC_pin(0, pin1);
return;
}
- clear_IO_APIC_pin(0, pin1);
+ clear_IO_APIC_pin(ioapic0, pin1);
printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
}
@@ -2241,7 +2396,7 @@ static inline void check_timer(void)
/*
* Cleanup, just in case ...
*/
- clear_IO_APIC_pin(0, pin2);
+ clear_IO_APIC_pin(ioapic0, pin2);
}
printk(" failed.\n");
@@ -2282,6 +2437,46 @@ static inline void check_timer(void)
"report. Then try booting with the 'noapic' option");
}
+void __init setup_IO_APIC_early(int _ioapic)
+{
+ union IO_APIC_reg_01 reg_01;
+ unsigned long flags;
+ int size, nr_ioapic_registers;
+ volatile int *ioapic;
+ if (ioapic_data[_ioapic]) {
+ printk("been in %s before !!!!!\n", __FUNCTION__);
+ return;
+ }
+
+ set_fixmap_nocache(FIX_IO_APIC_BASE_0 + _ioapic, mp_ioapics[_ioapic].mpc_apicaddr);
+ printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
+ __fix_to_virt(FIX_IO_APIC_BASE_0 + _ioapic), mp_ioapics[_ioapic].mpc_apicaddr);
+ /*
+ * The number of IO-APIC IRQ registers (== #pins):
+ */
+ ioapic = IO_APIC_BASE(_ioapic);
+ spin_lock_irqsave(&ioapic_lock, flags);
+ ioapic[0] = 1;
+ reg_01.raw = ioapic[4];
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ nr_ioapic_registers = reg_01.bits.entries+1;
+
+ /*
+ * Initialsize ioapic_data struct:
+ */
+ size = sizeof(struct ioapic_data_struct);
+#ifdef IOAPIC_CACHE
+ size += 0x10 * sizeof(u32) + nr_ioapic_registers * sizeof(struct IO_APIC_route_entry);
+#endif
+ ioapic_data[_ioapic] = alloc_bootmem(size);
+ memset(ioapic_data[_ioapic], 0, size);
+ ioapic_data[_ioapic]->nr_registers = nr_ioapic_registers;
+ ioapic_data[_ioapic]->base = ioapic;
+#ifdef IOAPIC_CACHE
+ ioapic_cache_init(ioapic_data[_ioapic]);
+#endif
+}
+
/*
*
* IRQ's that are handled by the PIC in the MPS IOAPIC case.
@@ -2329,25 +2524,22 @@ static int __init io_apic_bug_finalize(v
late_initcall(io_apic_bug_finalize);
-struct sysfs_ioapic_data {
- struct sys_device dev;
- struct IO_APIC_route_entry entry[0];
-};
-static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
-
static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
{
struct IO_APIC_route_entry *entry;
- struct sysfs_ioapic_data *data;
+ struct ioapic_data_struct *data;
unsigned long flags;
int i;
+ struct ioapic_data_struct *ioapic;
- data = container_of(dev, struct sysfs_ioapic_data, dev);
+ data = container_of(dev, struct ioapic_data_struct, dev);
entry = data->entry;
+
+ ioapic = ioapic_data[dev->id];
spin_lock_irqsave(&ioapic_lock, flags);
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
- *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
- *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
+ for (i = 0; i < ioapic_data[dev->id]->nr_registers; i ++, entry ++) {
+ *(((int *)entry) + 1) = io_apic_read(ioapic, 0x11 + 2 * i);
+ *(((int *)entry) + 0) = io_apic_read(ioapic, 0x10 + 2 * i);
}
spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -2357,23 +2549,25 @@ static int ioapic_suspend(struct sys_dev
static int ioapic_resume(struct sys_device *dev)
{
struct IO_APIC_route_entry *entry;
- struct sysfs_ioapic_data *data;
+ struct ioapic_data_struct *data;
unsigned long flags;
union IO_APIC_reg_00 reg_00;
int i;
-
- data = container_of(dev, struct sysfs_ioapic_data, dev);
+ struct ioapic_data_struct *ioapic;
+
+ data = container_of(dev, struct ioapic_data_struct, dev);
entry = data->entry;
+ ioapic = ioapic_data[dev->id];
spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(dev->id, 0);
+ reg_00.raw = io_apic_read(ioapic, 0);
if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
- io_apic_write(dev->id, 0, reg_00.raw);
+ io_apic_write(ioapic, 0, reg_00.raw);
}
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
- io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
- io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
+ for (i = 0; i < ioapic_data[dev->id]->nr_registers; i ++, entry ++) {
+ io_apic_write(ioapic, 0x11+2*i, *(((int *)entry)+1));
+ io_apic_write(ioapic, 0x10+2*i, *(((int *)entry)+0));
}
spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -2396,21 +2590,20 @@ static int __init ioapic_init_sysfs(void
return error;
for (i = 0; i < nr_ioapics; i++ ) {
- size = sizeof(struct sys_device) + nr_ioapic_registers[i]
- * sizeof(struct IO_APIC_route_entry);
- mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
- if (!mp_ioapic_data[i]) {
+ size = ioapic_data[i]->nr_registers * sizeof(struct IO_APIC_route_entry);
+ ioapic_data[i]->entry = kmalloc(size, GFP_KERNEL);
+ if (!ioapic_data[i]->entry) {
printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
continue;
}
- memset(mp_ioapic_data[i], 0, size);
- dev = &mp_ioapic_data[i]->dev;
+ memset(ioapic_data[i]->entry, 0, size);
+ dev = &ioapic_data[i]->dev;
dev->id = i;
dev->cls = &ioapic_sysdev_class;
error = sysdev_register(dev);
if (error) {
- kfree(mp_ioapic_data[i]);
- mp_ioapic_data[i] = NULL;
+ kfree(ioapic_data[i]->entry);
+ ioapic_data[i]->entry = NULL;
printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
continue;
}
@@ -2427,13 +2620,14 @@ device_initcall(ioapic_init_sysfs);
#ifdef CONFIG_ACPI
-int __init io_apic_get_unique_id (int ioapic, int apic_id)
+int __init io_apic_get_unique_id (int apic, int apic_id)
{
union IO_APIC_reg_00 reg_00;
static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
physid_mask_t tmp;
unsigned long flags;
int i = 0;
+ struct ioapic_data_struct *ioapic = ioapic_data[apic];
/*
* The P4 platform supports up to 256 APIC IDs on two separate APIC
@@ -2453,7 +2647,7 @@ int __init io_apic_get_unique_id (int io
if (apic_id >= get_physical_broadcast()) {
printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
- "%d\n", ioapic, apic_id, reg_00.bits.ID);
+ "%d\n", apic, apic_id, reg_00.bits.ID);
apic_id = reg_00.bits.ID;
}
@@ -2472,7 +2666,7 @@ int __init io_apic_get_unique_id (int io
panic("Max apic_id exceeded!\n");
printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
- "trying %d\n", ioapic, apic_id, i);
+ "trying %d\n", apic, apic_id, i);
apic_id = i;
}
@@ -2490,50 +2684,50 @@ int __init io_apic_get_unique_id (int io
/* Sanity check */
if (reg_00.bits.ID != apic_id)
- panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
+ panic("IOAPIC[%d]: Unable change apic_id!\n", apic);
}
apic_printk(APIC_VERBOSE, KERN_INFO
- "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+ "IOAPIC[%d]: Assigned apic_id %d\n", apic, apic_id);
return apic_id;
}
-int __init io_apic_get_version (int ioapic)
+int __init io_apic_get_version (int apic)
{
union IO_APIC_reg_01 reg_01;
unsigned long flags;
spin_lock_irqsave(&ioapic_lock, flags);
- reg_01.raw = io_apic_read(ioapic, 1);
+ reg_01.raw = io_apic_read(ioapic_data[apic], 1);
spin_unlock_irqrestore(&ioapic_lock, flags);
return reg_01.bits.version;
}
-int __init io_apic_get_redir_entries (int ioapic)
+int __init io_apic_get_redir_entries (int apic)
{
union IO_APIC_reg_01 reg_01;
unsigned long flags;
spin_lock_irqsave(&ioapic_lock, flags);
- reg_01.raw = io_apic_read(ioapic, 1);
+ reg_01.raw = io_apic_read(ioapic_data[apic], 1);
spin_unlock_irqrestore(&ioapic_lock, flags);
return reg_01.bits.entries;
}
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
+int io_apic_set_pci_routing (int apic, int pin, int irq, int edge_level, int active_high_low)
{
struct IO_APIC_route_entry entry;
unsigned long flags;
-
+ struct ioapic_data_struct *ioapic = ioapic_data[apic];
if (!IO_APIC_IRQ(irq)) {
printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
- ioapic);
+ apic);
return -EINVAL;
}
@@ -2556,18 +2750,18 @@ int io_apic_set_pci_routing (int ioapic,
* IRQs < 16 are already in the irq_2_pin[] map
*/
if (irq >= 16)
- add_pin_to_irq(irq, ioapic, pin);
+ add_pin_to_irq(irq, apic, pin);
entry.vector = assign_irq_vector(irq);
apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
- "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
- mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
+ "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", apic,
+ mp_ioapics[apic].mpc_apicid, pin, entry.vector, irq,
edge_level, active_high_low);
ioapic_register_intr(irq, entry.vector, edge_level);
- if (!ioapic && (irq < 16))
+ if (!apic && (irq < 16))
disable_8259A_irq(irq);
spin_lock_irqsave(&ioapic_lock, flags);
Index: linux/arch/i386/kernel/irq.c
===================================================================
--- linux.orig/arch/i386/kernel/irq.c
+++ linux/arch/i386/kernel/irq.c
@@ -51,7 +51,7 @@ static union irq_ctx *softirq_ctx[NR_CPU
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
-fastcall unsigned int do_IRQ(struct pt_regs *regs)
+fastcall notrace unsigned int do_IRQ(struct pt_regs *regs)
{
/* high bits used in ret_from_ code */
int irq = regs->orig_eax & 0xff;
@@ -59,8 +59,12 @@ fastcall unsigned int do_IRQ(struct pt_r
union irq_ctx *curctx, *irqctx;
u32 *isp;
#endif
-
irq_enter();
+#ifdef CONFIG_LATENCY_TRACE
+ if (irq == trace_user_trigger_irq)
+ user_trace_start();
+#endif
+ trace_special(regs->eip, irq, 0);
#ifdef CONFIG_DEBUG_STACKOVERFLOW
/* Debugging check for stack overflow: is there less than 1KB free? */
{
@@ -69,7 +73,7 @@ fastcall unsigned int do_IRQ(struct pt_r
__asm__ __volatile__("andl %%esp,%0" :
"=r" (esp) : "0" (THREAD_SIZE - 1));
if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
- printk("do_IRQ: stack overflow: %ld\n",
+ printk("BUG: do_IRQ: stack overflow: %ld\n",
esp - sizeof(struct thread_info));
dump_stack();
}
@@ -173,7 +177,7 @@ asmlinkage void do_softirq(void)
if (in_interrupt())
return;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (local_softirq_pending()) {
curctx = current_thread_info();
@@ -194,7 +198,7 @@ asmlinkage void do_softirq(void)
);
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL(do_softirq);
@@ -224,8 +228,10 @@ int show_interrupts(struct seq_file *p,
}
if (i < NR_IRQS) {
- spin_lock_irqsave(&irq_desc[i].lock, flags);
- action = irq_desc[i].action;
+ irq_desc_t *desc = irq_desc + i;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ action = desc->action;
if (!action)
goto skip;
seq_printf(p, "%3d: ",i);
@@ -235,15 +241,27 @@ int show_interrupts(struct seq_file *p,
for_each_cpu(j)
seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
#endif
- seq_printf(p, " %14s", irq_desc[i].handler->typename);
+ seq_printf(p, " %-14s", desc->handler->typename);
+#define F(x,c) ((desc->status & x) ? c : '.')
+ seq_printf(p, " [%c%c%c%c%c%c%c%c%c/",
+ F(IRQ_INPROGRESS, 'I'),
+ F(IRQ_DISABLED, 'D'),
+ F(IRQ_PENDING, 'P'),
+ F(IRQ_REPLAY, 'R'),
+ F(IRQ_AUTODETECT, 'A'),
+ F(IRQ_WAITING, 'W'),
+ F(IRQ_LEVEL, 'L'),
+ F(IRQ_MASKED, 'M'),
+ F(IRQ_NODELAY, 'N'));
+#undef F
+ seq_printf(p, "%3d]", desc->irqs_unhandled);
seq_printf(p, " %s", action->name);
-
for (action=action->next; action; action = action->next)
seq_printf(p, ", %s", action->name);
seq_putc(p, '\n');
skip:
- spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ spin_unlock_irqrestore(&desc->lock, flags);
} else if (i == NR_IRQS) {
seq_printf(p, "NMI: ");
for_each_cpu(j)
@@ -298,9 +316,9 @@ void fixup_irqs(cpumask_t map)
barrier();
#else
/* That doesn't seem sufficient. Give it 1ms. */
- local_irq_enable();
+ raw_local_irq_enable();
mdelay(1);
- local_irq_disable();
+ raw_local_irq_disable();
#endif
}
#endif
Index: linux/arch/i386/kernel/mca.c
===================================================================
--- linux.orig/arch/i386/kernel/mca.c
+++ linux/arch/i386/kernel/mca.c
@@ -472,3 +472,22 @@ void mca_handle_nmi(void)
mca_nmi_hook();
} /* mca_handle_nmi */
+
+void mca_timer_ack(void *priv)
+{
+ int irq;
+
+ if (MCA_bus) {
+ /* The PS/2 uses level-triggered interrupts. You can't
+ turn them off, nor would you want to (any attempt to
+ enable edge-triggered interrupts usually gets intercepted by a
+ special hardware circuit). Hence we have to acknowledge
+ the timer interrupt. Through some incredibly stupid
+ design idea, the reset for IRQ 0 is done by setting the
+ high bit of the PPI port B (0x61). Note that some PS/2s,
+ notably the 55SX, work fine if this is removed. */
+
+ irq = inb_p( 0x61 ); /* read the current state */
+ outb_p( irq|0x80, 0x61 ); /* reset the IRQ */
+ }
+}
Index: linux/arch/i386/kernel/mcount-wrapper.S
===================================================================
--- /dev/null
+++ linux/arch/i386/kernel/mcount-wrapper.S
@@ -0,0 +1,27 @@
+/*
+ * linux/arch/i386/mcount-wrapper.S
+ *
+ * Copyright (C) 2004 Ingo Molnar
+ */
+
+.globl mcount
+mcount:
+
+ cmpl $0, mcount_enabled
+ jz out
+
+ push %ebp
+ mov %esp, %ebp
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+
+ call __mcount
+
+ popl %edx
+ popl %ecx
+ popl %eax
+ popl %ebp
+out:
+ ret
+
Index: linux/arch/i386/kernel/microcode.c
===================================================================
--- linux.orig/arch/i386/kernel/microcode.c
+++ linux/arch/i386/kernel/microcode.c
@@ -109,7 +109,7 @@ MODULE_LICENSE("GPL");
#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
/* serialize access to the physical write to MSR 0x79 */
-static DEFINE_SPINLOCK(microcode_update_lock);
+static DEFINE_RAW_SPINLOCK(microcode_update_lock);
/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
static DECLARE_MUTEX(microcode_sem);
Index: linux/arch/i386/kernel/mpparse.c
===================================================================
--- linux.orig/arch/i386/kernel/mpparse.c
+++ linux/arch/i386/kernel/mpparse.c
@@ -271,6 +271,7 @@ static void __init MP_ioapic_info (struc
return;
}
mp_ioapics[nr_ioapics] = *m;
+ setup_IO_APIC_early(nr_ioapics);
nr_ioapics++;
}
@@ -919,7 +920,7 @@ void __init mp_register_ioapic (
mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
mp_ioapics[idx].mpc_apicaddr = address;
- set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+ setup_IO_APIC_early(idx);
if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15))
mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id);
else
Index: linux/arch/i386/kernel/nmi.c
===================================================================
--- linux.orig/arch/i386/kernel/nmi.c
+++ linux/arch/i386/kernel/nmi.c
@@ -34,7 +34,7 @@
unsigned int nmi_watchdog = NMI_NONE;
extern int unknown_nmi_panic;
-static unsigned int nmi_hz = HZ;
+static unsigned int nmi_hz = 1000;
static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
static unsigned int nmi_p4_cccr_val;
extern void show_registers(struct pt_regs *regs);
@@ -112,8 +112,8 @@ static int __init check_nmi_watchdog(voi
for (cpu = 0; cpu < NR_CPUS; cpu++)
prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
- local_irq_enable();
- mdelay((10*1000)/nmi_hz); // wait 10 ticks
+ raw_local_irq_enable();
+ mdelay((100*1000)/nmi_hz); // wait 100 ticks
for (cpu = 0; cpu < NR_CPUS; cpu++) {
#ifdef CONFIG_SMP
@@ -134,7 +134,7 @@ static int __init check_nmi_watchdog(voi
/* now that we know it works we can reduce NMI frequency to
something more reasonable; makes a difference in some configs */
if (nmi_watchdog == NMI_LOCAL_APIC)
- nmi_hz = 1;
+ nmi_hz = 10000;
return 0;
}
@@ -486,9 +486,34 @@ void touch_nmi_watchdog (void)
extern void die_nmi(struct pt_regs *, const char *msg);
-void nmi_watchdog_tick (struct pt_regs * regs)
+int nmi_show_regs[NR_CPUS];
+
+void nmi_show_all_regs(void)
{
+ int i;
+
+ if (nmi_watchdog == NMI_NONE)
+ return;
+ if (system_state != SYSTEM_RUNNING) {
+ printk("nmi_show_all_regs(): system state %d, not doing.\n",
+ system_state);
+ return;
+ }
+ printk("nmi_show_all_regs(): start at CPU#%d.\n",
+ raw_smp_processor_id());
+ dump_stack();
+
+ for_each_online_cpu(i)
+ nmi_show_regs[i] = 1;
+ for_each_online_cpu(i)
+ while (nmi_show_regs[i] == 1)
+ barrier();
+}
+
+static DEFINE_RAW_SPINLOCK(nmi_print_lock);
+void notrace nmi_watchdog_tick (struct pt_regs * regs)
+{
/*
* Since current_thread_info()-> is always on the stack, and we
* always switch the stack NMI-atomically, it's safe to use
@@ -496,7 +521,16 @@ void nmi_watchdog_tick (struct pt_regs *
*/
int sum, cpu = smp_processor_id();
- sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
+ sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0);
+
+ profile_tick(CPU_PROFILING, regs);
+ if (nmi_show_regs[cpu]) {
+ nmi_show_regs[cpu] = 0;
+ spin_lock(&nmi_print_lock);
+ printk("NMI show regs on CPU#%d:\n", cpu);
+ show_regs(regs);
+ spin_unlock(&nmi_print_lock);
+ }
if (last_irq_sums[cpu] == sum) {
/*
@@ -504,12 +538,25 @@ void nmi_watchdog_tick (struct pt_regs *
* wait a few IRQs (5 seconds) before doing the oops ...
*/
alert_counter[cpu]++;
- if (alert_counter[cpu] == 5*nmi_hz)
- /*
- * die_nmi will return ONLY if NOTIFY_STOP happens..
- */
- die_nmi(regs, "NMI Watchdog detected LOCKUP");
+ if (alert_counter[cpu] && !(alert_counter[cpu] % (5*nmi_hz))) {
+ int i;
+
+ bust_spinlocks(1);
+ spin_lock(&nmi_print_lock);
+ printk("NMI watchdog detected lockup on CPU#%d (%d/%d)\n", cpu, alert_counter[cpu], 5*nmi_hz);
+ show_regs(regs);
+ spin_unlock(&nmi_print_lock);
+
+ for_each_online_cpu(i)
+ if (i != cpu)
+ nmi_show_regs[i] = 1;
+ for_each_online_cpu(i)
+ while (nmi_show_regs[i] == 1)
+ barrier();
+ die_nmi(regs, "NMI Watchdog detected LOCKUP");
+ }
+ } else {
last_irq_sums[cpu] = sum;
alert_counter[cpu] = 0;
}
Index: linux/arch/i386/kernel/process.c
===================================================================
--- linux.orig/arch/i386/kernel/process.c
+++ linux/arch/i386/kernel/process.c
@@ -39,6 +39,7 @@
#include
#include
#include
+#include
#include
#include
@@ -64,6 +65,12 @@ static int hlt_counter;
unsigned long boot_option_idle_override = 0;
EXPORT_SYMBOL(boot_option_idle_override);
+DEFINE_SPINLOCK(pm_idle_switch_lock);
+EXPORT_SYMBOL_GPL(pm_idle_switch_lock);
+
+int pm_idle_locked = 0;
+EXPORT_SYMBOL_GPL(pm_idle_locked);
+
/*
* Return saved PC of a blocked thread.
*/
@@ -100,12 +107,13 @@ EXPORT_SYMBOL(enable_hlt);
void default_idle(void)
{
if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
- local_irq_disable();
- if (!need_resched())
- safe_halt();
+ raw_local_irq_disable();
+ if (!need_resched() && !need_resched_delayed())
+ raw_safe_halt();
else
- local_irq_enable();
+ raw_local_irq_enable();
} else {
+ raw_local_irq_enable();
cpu_relax();
}
}
@@ -118,11 +126,11 @@ EXPORT_SYMBOL(default_idle);
* to poll the ->work.need_resched flag instead of waiting for the
* cross-CPU IPI to arrive. Use this option with caution.
*/
-static void poll_idle (void)
+void poll_idle (void)
{
int oldval;
- local_irq_enable();
+ raw_local_irq_enable();
/*
* Deal with another CPU just having chosen a thread to
@@ -137,7 +145,7 @@ static void poll_idle (void)
"testl %0, %1;"
"rep; nop;"
"je 2b;"
- : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags));
+ : : "i"(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), "m" (current_thread_info()->flags));
clear_thread_flag(TIF_POLLING_NRFLAG);
} else {
@@ -160,7 +168,7 @@ static inline void play_dead(void)
/*
* With physical CPU hotplug, we should halt the cpu
*/
- local_irq_disable();
+ raw_local_irq_disable();
while (1)
halt();
}
@@ -183,7 +191,9 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
- while (!need_resched()) {
+ BUG_ON(raw_irqs_disabled());
+
+ while (!need_resched() && !need_resched_delayed()) {
void (*idle)(void);
if (__get_cpu_var(cpu_idle_state))
@@ -199,9 +209,13 @@ void cpu_idle(void)
play_dead();
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
+ stop_critical_timing();
+ propagate_preempt_locks_value();
idle();
}
- schedule();
+ raw_local_irq_disable();
+ __schedule();
+ raw_local_irq_enable();
}
}
@@ -242,16 +256,16 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
*/
static void mwait_idle(void)
{
- local_irq_enable();
+ raw_local_irq_enable();
- if (!need_resched()) {
+ if (!need_resched() && !need_resched_delayed()) {
set_thread_flag(TIF_POLLING_NRFLAG);
do {
__monitor((void *)¤t_thread_info()->flags, 0, 0);
- if (need_resched())
+ if (need_resched() || need_resched_delayed())
break;
__mwait(0, 0);
- } while (!need_resched());
+ } while (!need_resched() && !need_resched_delayed());
clear_thread_flag(TIF_POLLING_NRFLAG);
}
}
@@ -378,11 +392,16 @@ void exit_thread(void)
/* The process may have allocated an io port bitmap... nuke it. */
if (unlikely(NULL != t->io_bitmap_ptr)) {
- int cpu = get_cpu();
- struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ int cpu;
+ struct tss_struct *tss;
+ void *io_bitmap_ptr = t->io_bitmap_ptr;
- kfree(t->io_bitmap_ptr);
t->io_bitmap_ptr = NULL;
+ mb();
+ kfree(io_bitmap_ptr);
+
+ cpu = get_cpu();
+ tss = &per_cpu(init_tss, cpu);
/*
* Careful, clear this in the TSS too:
*/
Index: linux/arch/i386/kernel/reboot.c
===================================================================
--- linux.orig/arch/i386/kernel/reboot.c
+++ linux/arch/i386/kernel/reboot.c
@@ -194,7 +194,7 @@ void machine_real_restart(unsigned char
{
unsigned long flags;
- local_irq_disable();
+ raw_local_irq_disable();
/* Write zero to CMOS register number 0x0f, which the BIOS POST
routine will recognize as telling it to do a proper reboot. (Well
Index: linux/arch/i386/kernel/semaphore.c
===================================================================
--- linux.orig/arch/i386/kernel/semaphore.c
+++ linux/arch/i386/kernel/semaphore.c
@@ -13,6 +13,7 @@
* rw semaphores implemented November 1999 by Benjamin LaHaise
*/
#include
+#include
#include
/*
@@ -28,15 +29,15 @@
asm(
".section .sched.text\n"
".align 4\n"
-".globl __down_failed\n"
-"__down_failed:\n\t"
+".globl __compat_down_failed\n"
+"__compat_down_failed:\n\t"
#if defined(CONFIG_FRAME_POINTER)
"pushl %ebp\n\t"
"movl %esp,%ebp\n\t"
#endif
"pushl %edx\n\t"
"pushl %ecx\n\t"
- "call __down\n\t"
+ "call __compat_down\n\t"
"popl %ecx\n\t"
"popl %edx\n\t"
#if defined(CONFIG_FRAME_POINTER)
@@ -49,15 +50,15 @@ asm(
asm(
".section .sched.text\n"
".align 4\n"
-".globl __down_failed_interruptible\n"
-"__down_failed_interruptible:\n\t"
+".globl __compat_down_failed_interruptible\n"
+"__compat_down_failed_interruptible:\n\t"
#if defined(CONFIG_FRAME_POINTER)
"pushl %ebp\n\t"
"movl %esp,%ebp\n\t"
#endif
"pushl %edx\n\t"
"pushl %ecx\n\t"
- "call __down_interruptible\n\t"
+ "call __compat_down_interruptible\n\t"
"popl %ecx\n\t"
"popl %edx\n\t"
#if defined(CONFIG_FRAME_POINTER)
@@ -70,15 +71,15 @@ asm(
asm(
".section .sched.text\n"
".align 4\n"
-".globl __down_failed_trylock\n"
-"__down_failed_trylock:\n\t"
+".globl __compat_down_failed_trylock\n"
+"__compat_down_failed_trylock:\n\t"
#if defined(CONFIG_FRAME_POINTER)
"pushl %ebp\n\t"
"movl %esp,%ebp\n\t"
#endif
"pushl %edx\n\t"
"pushl %ecx\n\t"
- "call __down_trylock\n\t"
+ "call __compat_down_trylock\n\t"
"popl %ecx\n\t"
"popl %edx\n\t"
#if defined(CONFIG_FRAME_POINTER)
@@ -91,45 +92,13 @@ asm(
asm(
".section .sched.text\n"
".align 4\n"
-".globl __up_wakeup\n"
-"__up_wakeup:\n\t"
+".globl __compat_up_wakeup\n"
+"__compat_up_wakeup:\n\t"
"pushl %edx\n\t"
"pushl %ecx\n\t"
- "call __up\n\t"
+ "call __compat_up\n\t"
"popl %ecx\n\t"
"popl %edx\n\t"
"ret"
);
-/*
- * rw spinlock fallbacks
- */
-#if defined(CONFIG_SMP)
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __write_lock_failed\n"
-"__write_lock_failed:\n\t"
- LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n"
-"1: rep; nop\n\t"
- "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
- "jne 1b\n\t"
- LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
- "jnz __write_lock_failed\n\t"
- "ret"
-);
-
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __read_lock_failed\n"
-"__read_lock_failed:\n\t"
- LOCK "incl (%eax)\n"
-"1: rep; nop\n\t"
- "cmpl $1,(%eax)\n\t"
- "js 1b\n\t"
- LOCK "decl (%eax)\n\t"
- "js __read_lock_failed\n\t"
- "ret"
-);
-#endif
Index: linux/arch/i386/kernel/setup.c
===================================================================
--- linux.orig/arch/i386/kernel/setup.c
+++ linux/arch/i386/kernel/setup.c
@@ -1612,6 +1612,7 @@ void __init setup_arch(char **cmdline_p)
conswitchp = &dummy_con;
#endif
#endif
+ tsc_init();
}
#include "setup_arch_post.h"
Index: linux/arch/i386/kernel/signal.c
===================================================================
--- linux.orig/arch/i386/kernel/signal.c
+++ linux/arch/i386/kernel/signal.c
@@ -604,6 +604,13 @@ int fastcall do_signal(struct pt_regs *r
int signr;
struct k_sigaction ka;
+#ifdef CONFIG_PREEMPT_RT
+ /*
+ * Fully-preemptible kernel does not need interrupts disabled:
+ */
+ raw_local_irq_enable();
+ preempt_check_resched();
+#endif
/*
* We want the common case to go fast, which
* is why we may in certain cases get here from
Index: linux/arch/i386/kernel/smp.c
===================================================================
--- linux.orig/arch/i386/kernel/smp.c
+++ linux/arch/i386/kernel/smp.c
@@ -163,7 +163,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu
unsigned long cfg;
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
/*
* Wait for idle.
@@ -186,7 +186,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu
*/
apic_write_around(APIC_ICR, cfg);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
void send_IPI_mask_sequence(cpumask_t mask, int vector)
@@ -200,7 +200,7 @@ void send_IPI_mask_sequence(cpumask_t ma
* should be modified to do 1 message per cluster ID - mbligh
*/
- local_irq_save(flags);
+ raw_local_irq_save(flags);
for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) {
if (cpu_isset(query_cpu, mask)) {
@@ -227,7 +227,7 @@ void send_IPI_mask_sequence(cpumask_t ma
apic_write_around(APIC_ICR, cfg);
}
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
#include /* must come after the send_IPI functions above for inlining */
@@ -245,7 +245,7 @@ void send_IPI_mask_sequence(cpumask_t ma
static cpumask_t flush_cpumask;
static struct mm_struct * flush_mm;
static unsigned long flush_va;
-static DEFINE_SPINLOCK(tlbstate_lock);
+static DEFINE_RAW_SPINLOCK(tlbstate_lock);
#define FLUSH_ALL 0xffffffff
/*
@@ -390,7 +390,7 @@ static void flush_tlb_others(cpumask_t c
while (!cpus_empty(flush_cpumask))
/* nothing. lockup detection does not belong here */
- mb();
+ cpu_relax();
flush_mm = NULL;
flush_va = 0;
@@ -481,10 +481,20 @@ void smp_send_reschedule(int cpu)
}
/*
+ * this function sends a 'reschedule' IPI to all other CPUs.
+ * This is used when RT tasks are starving and other CPUs
+ * might be able to run them:
+ */
+void smp_send_reschedule_allbutself(void)
+{
+ send_IPI_allbutself(RESCHEDULE_VECTOR);
+}
+
+/*
* Structure and data for smp_call_function(). This is designed to minimise
* static memory requirements. It also looks cleaner.
*/
-static DEFINE_SPINLOCK(call_lock);
+static DEFINE_RAW_SPINLOCK(call_lock);
struct call_data_struct {
void (*func) (void *info);
@@ -538,7 +548,7 @@ int smp_call_function (void (*func) (voi
}
/* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
+ WARN_ON(raw_irqs_disabled());
data.func = func;
data.info = info;
@@ -572,7 +582,7 @@ static void stop_this_cpu (void * dummy)
* Remove this CPU:
*/
cpu_clear(smp_processor_id(), cpu_online_map);
- local_irq_disable();
+ raw_local_irq_disable();
disable_local_APIC();
if (cpu_data[smp_processor_id()].hlt_works_ok)
for(;;) halt();
@@ -587,19 +597,20 @@ void smp_send_stop(void)
{
smp_call_function(stop_this_cpu, NULL, 1, 0);
- local_irq_disable();
+ raw_local_irq_disable();
disable_local_APIC();
- local_irq_enable();
+ raw_local_irq_enable();
}
/*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
+ * Reschedule call back. Trigger a reschedule pass so that
+ * RT-overload balancing can pass tasks around.
*/
-fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
+fastcall notrace void smp_reschedule_interrupt(struct pt_regs *regs)
{
+ trace_special(regs->eip, 0, 0);
ack_APIC_irq();
+ set_tsk_need_resched(current);
}
fastcall void smp_call_function_interrupt(struct pt_regs *regs)
Index: linux/arch/i386/kernel/smpboot.c
===================================================================
--- linux.orig/arch/i386/kernel/smpboot.c
+++ linux/arch/i386/kernel/smpboot.c
@@ -208,142 +208,299 @@ valid_k7:
;
}
-/*
- * TSC synchronization.
- *
- * We first check whether all CPUs have their TSC's synchronized,
- * then we print a warning if not, and always resync.
- */
+static atomic_t tsc_start_flag, tsc_check_start, tsc_check_stop;
-static atomic_t tsc_start_flag = ATOMIC_INIT(0);
-static atomic_t tsc_count_start = ATOMIC_INIT(0);
-static atomic_t tsc_count_stop = ATOMIC_INIT(0);
-static unsigned long long tsc_values[NR_CPUS];
-
-#define NR_LOOPS 5
-
-static void __init synchronize_tsc_bp (void)
+static int __init check_tsc_warp(void)
{
- int i;
- unsigned long long t0;
- unsigned long long sum, avg;
- long long delta;
- unsigned int one_usec;
- int buggy = 0;
-
- printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
-
- /* convert from kcyc/sec to cyc/usec */
- one_usec = cpu_khz / 1000;
+ static DEFINE_RAW_SPINLOCK(warp_lock);
+ static long long prev;
+ static unsigned int error;
- atomic_set(&tsc_start_flag, 1);
- wmb();
+ int cpus = num_booting_cpus(), nr = 0;
+ long long start, now, end, delta;
+ atomic_inc(&tsc_check_start);
+ while (atomic_read(&tsc_check_start) != cpus)
+ cpu_relax();
/*
- * We loop a few times to get a primed instruction cache,
- * then the last pass is more or less synchronized and
- * the BP and APs set their cycle counters to zero all at
- * once. This reduces the chance of having random offsets
- * between the processors, and guarantees that the maximum
- * delay between the cycle counters is never bigger than
- * the latency of information-passing (cachelines) between
- * two CPUs.
+ * Run the check for 500 msecs:
*/
- for (i = 0; i < NR_LOOPS; i++) {
- /*
- * all APs synchronize but they loop on '== num_cpus'
- */
- while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
- mb();
- atomic_set(&tsc_count_stop, 0);
- wmb();
- /*
- * this lets the APs save their current TSC:
- */
- atomic_inc(&tsc_count_start);
+ rdtscll(start);
+ end = start + cpu_khz*500;
- rdtscll(tsc_values[smp_processor_id()]);
+ for (;;) {
/*
- * We clear the TSC in the last loop:
+ * Check for the TSC going backwards (between CPUs):
*/
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
+ spin_lock(&warp_lock);
+ rdtscll(now);
+ delta = now - prev;
+ prev = now;
+ spin_unlock(&warp_lock);
+ if (unlikely(delta < 0))
+ error = 1;
+ if (now > end)
+ break;
/*
- * Wait for all APs to leave the synchronization point:
+ * Take it easy every couple of iterations,
+ * to not starve other CPUs:
*/
- while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
- mb();
- atomic_set(&tsc_count_start, 0);
- wmb();
- atomic_inc(&tsc_count_stop);
+ nr++;
+ if (!(nr % 31))
+ cpu_relax();
}
- sum = 0;
- for (i = 0; i < NR_CPUS; i++) {
- if (cpu_isset(i, cpu_callout_map)) {
- t0 = tsc_values[i];
- sum += t0;
- }
- }
- avg = sum;
- do_div(avg, num_booting_cpus());
+ atomic_inc(&tsc_check_stop);
+ while (atomic_read(&tsc_check_stop) != cpus)
+ cpu_relax();
- sum = 0;
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- delta = tsc_values[i] - avg;
- if (delta < 0)
- delta = -delta;
- /*
- * We report bigger than 2 microseconds clock differences.
- */
- if (delta > 2*one_usec) {
- long realdelta;
- if (!buggy) {
- buggy = 1;
- printk("\n");
- }
- realdelta = delta;
- do_div(realdelta, one_usec);
- if (tsc_values[i] < avg)
- realdelta = -realdelta;
+ return error;
+}
- printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
- }
+/*
+ * TSC synchronization based on ia64 itc synchronization code. Synchronize
+ * pairs of processors rahter than tring to synchronize all of the processors
+ * with a single event. When several processors are all waiting for an
+ * event they don't all see it at the same time. The write will cause
+ * an invalidate on each processors cache and then they all scramble to
+ * re-read that cache line.
+ *
+ * Writing the TSC resets the upper 32-bits, so we need to be careful
+ * that all of the cpus can be synchronized before we overflow the
+ * 32-bit count.
+ */
- sum += delta;
+#define MASTER 0
+#define SLAVE (SMP_CACHE_BYTES/sizeof(long))
+
+#define NUM_ROUNDS 64 /* magic value */
+#define NUM_ITERS 5 /* likewise */
+
+static volatile unsigned long go[2*SLAVE] __cacheline_aligned;
+static volatile int current_slave = -1;
+static volatile int tsc_sync_complete = 0;
+static volatile int tsc_adj_latency = 0;
+static unsigned int max_rt = 0;
+static unsigned int max_delta = 0;
+
+#define DEBUG_TSC_SYNC 0
+#if DEBUG_TSC_SYNC
+struct tsc_sync_debug {
+ long rt; /* roundtrip time */
+ long master; /* master's timestamp */
+ long diff; /* difference between midpoint and master's timestamp */
+ long lat; /* estimate of tsc adjustment latency */
+} tsc_sync_debug[NUM_ROUNDS*NR_CPUS];
+#endif
+
+void
+sync_master(void)
+{
+ unsigned long n, tsc, last_go_master;
+
+ last_go_master = 0;
+ while (1) {
+ while ((n = go[MASTER]) == last_go_master)
+ rep_nop();
+ if (n == ~0)
+ break;
+ rdtscl(tsc);
+ if (unlikely(!tsc))
+ tsc = 1;
+ go[SLAVE] = tsc;
+ last_go_master = n;
}
- if (!buggy)
- printk("passed.\n");
}
-static void __init synchronize_tsc_ap (void)
+/*
+ * Return the number of cycles by which our TSC differs from the TSC on
+ * the master (time-keeper) CPU. A positive number indicates our TSC is
+ * ahead of the master, negative that it is behind.
+ */
+static inline long
+get_delta (long *rt, long *master)
{
- int i;
+ unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
+ unsigned long tcenter, t0, t1, tm, last_go_slave;
+ long i;
+
+ last_go_slave = go[SLAVE];
+ for (i = 0; i < NUM_ITERS; ++i) {
+ rdtscl(t0);
+ go[MASTER] = i+1;
+ while ((tm = go[SLAVE]) == last_go_slave)
+ rep_nop();
+ rdtscl(t1);
+
+ if (t1 - t0 < best_t1 - best_t0)
+ best_t0 = t0, best_t1 = t1, best_tm = tm;
+ last_go_slave = tm;
+ }
+
+ *rt = best_t1 - best_t0;
+ *master = best_tm - best_t0;
+
+ /* average best_t0 and best_t1 without overflow: */
+ tcenter = (best_t0/2 + best_t1/2);
+ if (best_t0 % 2 + best_t1 % 2 == 2)
+ ++tcenter;
+ return tcenter - best_tm;
+}
+
+/*
+ * Synchronize TSC of the current (slave) CPU with the TSC of the MASTER CPU
+ * (normally the time-keeper CPU). We use a closed loop to eliminate the
+ * possibility of unaccounted-for errors (such as getting a machine check in
+ * the middle of a calibration step). The basic idea is for the slave to ask
+ * the master what TSC value it has and to read its own TSC before and after
+ * the master responds. Each iteration gives us three
+ * timestamps:
+ *
+ * slave master
+ *
+ * t0 ---\
+ * ---\
+ * --->
+ * tm
+ * /---
+ * /---
+ * t1 <---
+ *
+ *
+ * The goal is to adjust the slave's TSC such that tm falls exactly half-way
+ * between t0 and t1. If we achieve this, the clocks are synchronized provided
+ * the interconnect between the slave and the master is symmetric. Even if the
+ * interconnect were asymmetric, we would still know that the synchronization
+ * error is smaller than the roundtrip latency (t0 - t1).
+ *
+ * When the interconnect is quiet and symmetric, this lets us synchronize the
+ * TSC to within one or two cycles. However, we can only *guarantee* that the
+ * synchronization is accurate to within a round-trip time, which is typically
+ * in the range of several hundred cycles (e.g., ~500 cycles). In practice,
+ * this means that the TSC's are usually almost perfectly synchronized, but we
+ * shouldn't assume that the accuracy is much better than half a micro second
+ * or so.
+ */
+
+static void __init
+synchronize_tsc_ap (void)
+{
+ long i, delta, adj, adjust_latency, n_rounds;
+ unsigned long rt, master_time_stamp, tsc;
+#if DEBUG_TSC_SYNC
+ struct tsc_sync_debug *t =
+ &tsc_sync_debug[smp_processor_id() * NUM_ROUNDS];
+#endif
+
+ while (!atomic_read(&tsc_start_flag))
+ mb();
+
+ if (!check_tsc_warp())
+ return;
/*
- * Not every cpu is online at the time
- * this gets called, so we first wait for the BP to
- * finish SMP initialization:
+ * Wait for our turn to synchronize with the boot processor.
*/
- while (!atomic_read(&tsc_start_flag)) mb();
+ while (current_slave != smp_processor_id())
+ rep_nop();
+ adjust_latency = tsc_adj_latency;
+
+ go[SLAVE] = 0;
+ go[MASTER] = 0;
+ write_tsc(0,0);
+ for (i = 0; i < NUM_ROUNDS; ++i) {
+ delta = get_delta(&rt, &master_time_stamp);
+ if (delta == 0)
+ break;
+
+ if (i > 0)
+ adjust_latency += -delta;
+ adj = -delta + adjust_latency/8;
+ rdtscl(tsc);
+ write_tsc(tsc + adj, 0);
+#if DEBUG_TSC_SYNC
+ t[i].rt = rt;
+ t[i].master = master_time_stamp;
+ t[i].diff = delta;
+ t[i].lat = adjust_latency/8;
+#endif
+ }
+ n_rounds = i;
+ go[MASTER] = ~0;
+
+#if (DEBUG_TSC_SYNC == 2)
+ for (i = 0; i < n_rounds; ++i)
+ printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
+ t[i].rt, t[i].master, t[i].diff, t[i].lat);
+
+ printk("CPU %d: synchronized TSC (last diff %ld cycles, maxerr %lu cycles)\n",
+ smp_processor_id(), delta, rt);
+
+ printk("It took %ld rounds\n", n_rounds);
+#endif
+ if (rt > max_rt)
+ max_rt = rt;
+ if (delta < 0)
+ delta = -delta;
+ if (delta > max_delta)
+ max_delta = delta;
+ tsc_adj_latency = adjust_latency;
+ current_slave = -1;
+ while (!tsc_sync_complete)
+ rep_nop();
+}
+
+/*
+ * The boot processor set its own TSC to zero and then gives each
+ * slave processor the chance to synchronize itself.
+ */
- for (i = 0; i < NR_LOOPS; i++) {
- atomic_inc(&tsc_count_start);
- while (atomic_read(&tsc_count_start) != num_booting_cpus())
- mb();
+static void __init synchronize_tsc_bp (void)
+{
+ unsigned int tsc_low, tsc_high, error;
+ int cpu;
+
+ atomic_set(&tsc_start_flag, 1);
- rdtscll(tsc_values[smp_processor_id()]);
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
+ printk(KERN_INFO "checking TSC synchronization across %u CPUs: ",
+ num_booting_cpus());
- atomic_inc(&tsc_count_stop);
- while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+ if (!check_tsc_warp()) {
+ printk("passed.\n");
+ return;
+ }
+ printk("failed.\n");
+
+ printk(KERN_INFO "starting TSC synchronization\n");
+ write_tsc(0, 0);
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ if (!cpu_isset(cpu, cpu_callout_map))
+ continue;
+ if (cpu == smp_processor_id())
+ continue;
+ go[MASTER] = 0;
+ current_slave = cpu;
+ sync_master();
+ while (current_slave != -1)
+ rep_nop();
+ }
+ rdtsc(tsc_low, tsc_high);
+ if (tsc_high)
+ printk("TSC overflowed during synchronization\n");
+ else
+ printk("TSC synchronization complete max_delta=%d cycles\n",
+ max_delta);
+ if (max_rt < 4293) {
+ error = (max_rt * 1000000)/cpu_khz;
+ printk("TSC sync round-trip time %d.%03d microseconds\n",
+ error/1000, error%1000);
+ } else {
+ printk("TSC sync round-trip time %d cycles\n", max_rt);
}
+ tsc_sync_complete = 1;
}
-#undef NR_LOOPS
extern void calibrate_delay(void);
@@ -517,7 +674,7 @@ static void __devinit start_secondary(vo
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
/* We can take interrupts now: we're officially "up". */
- local_irq_enable();
+ raw_local_irq_enable();
wmb();
cpu_idle();
@@ -1305,9 +1462,9 @@ int __cpu_disable(void)
/* We enable the timer again on the exit path of the death loop */
disable_APIC_timer();
/* Allow any queued timer interrupts to get serviced */
- local_irq_enable();
+ raw_local_irq_enable();
mdelay(1);
- local_irq_disable();
+ raw_local_irq_disable();
remove_siblinginfo(cpu);
@@ -1351,11 +1508,11 @@ int __devinit __cpu_up(unsigned int cpu)
/* In case one didn't come up */
if (!cpu_isset(cpu, cpu_callin_map)) {
printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
- local_irq_enable();
+ raw_local_irq_enable();
return -EIO;
}
- local_irq_enable();
+ raw_local_irq_enable();
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* Unleash the CPU! */
cpu_set(cpu, smp_commenced_mask);
Index: linux/arch/i386/kernel/switch2poll.c
===================================================================
--- /dev/null
+++ linux/arch/i386/kernel/switch2poll.c
@@ -0,0 +1,5 @@
+/*
+ * Same type of hack used for early_printk. This keeps the code
+ * in one place.
+ */
+#include "../../x86_64/kernel/switch2poll.c"
Index: linux/arch/i386/kernel/time.c
===================================================================
--- linux.orig/arch/i386/kernel/time.c
+++ linux/arch/i386/kernel/time.c
@@ -46,6 +46,7 @@
#include
#include
#include
+#include
#include
#include
@@ -56,6 +57,7 @@
#include
#include
#include
+#include
#include "mach_time.h"
@@ -74,25 +76,14 @@ int pit_latch_buggy; /* ext
#include "do_timer.h"
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
unsigned int cpu_khz; /* Detected as we calibrate the TSC */
EXPORT_SYMBOL(cpu_khz);
extern unsigned long wall_jiffies;
-DEFINE_SPINLOCK(rtc_lock);
+DEFINE_RAW_SPINLOCK(rtc_lock);
EXPORT_SYMBOL(rtc_lock);
-#include
-
-DEFINE_SPINLOCK(i8253_lock);
-EXPORT_SYMBOL(i8253_lock);
-
-struct timer_opts *cur_timer __read_mostly = &timer_none;
-
/*
* This is a special lock that is owned by the CPU and holds the index
* register we are working with. It is required for NMI access to the
@@ -122,118 +113,25 @@ void rtc_cmos_write(unsigned char val, u
}
EXPORT_SYMBOL(rtc_cmos_write);
-/*
- * This version of gettimeofday has microsecond resolution
- * and better than microsecond precision on fast x86 machines with TSC.
- */
-void do_gettimeofday(struct timeval *tv)
-{
- unsigned long seq;
- unsigned long usec, sec;
- unsigned long max_ntp_tick;
-
- do {
- unsigned long lost;
-
- seq = read_seqbegin(&xtime_lock);
-
- usec = cur_timer->get_offset();
- lost = jiffies - wall_jiffies;
-
- /*
- * If time_adjust is negative then NTP is slowing the clock
- * so make sure not to go into next possible interval.
- * Better to lose some accuracy than have time go backwards..
- */
- if (unlikely(time_adjust < 0)) {
- max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
- usec = min(usec, max_ntp_tick);
-
- if (lost)
- usec += lost * max_ntp_tick;
- }
- else if (unlikely(lost))
- usec += lost * (USEC_PER_SEC / HZ);
-
- sec = xtime.tv_sec;
- usec += (xtime.tv_nsec / 1000);
- } while (read_seqretry(&xtime_lock, seq));
-
- while (usec >= 1000000) {
- usec -= 1000000;
- sec++;
- }
-
- tv->tv_sec = sec;
- tv->tv_usec = usec;
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
-{
- time_t wtm_sec, sec = tv->tv_sec;
- long wtm_nsec, nsec = tv->tv_nsec;
-
- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
- return -EINVAL;
-
- write_seqlock_irq(&xtime_lock);
- /*
- * This is revolting. We need to set "xtime" correctly. However, the
- * value in this location is the value at the most recent update of
- * wall time. Discover what correction gettimeofday() would have
- * made, and then undo it!
- */
- nsec -= cur_timer->get_offset() * NSEC_PER_USEC;
- nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
-
- wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
- wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
- set_normalized_timespec(&xtime, sec, nsec);
- set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
- ntp_clear();
- write_sequnlock_irq(&xtime_lock);
- clock_was_set();
- return 0;
-}
-
-EXPORT_SYMBOL(do_settimeofday);
-
static int set_rtc_mmss(unsigned long nowtime)
{
int retval;
-
- WARN_ON(irqs_disabled());
+ unsigned long flags;
/* gets recalled with irq locally disabled */
- spin_lock_irq(&rtc_lock);
+ /* XXX - does irqsave resolve this? -johnstul */
+ spin_lock_irqsave(&rtc_lock, flags);
if (efi_enabled)
retval = efi_set_rtc_mmss(nowtime);
else
retval = mach_set_rtc_mmss(nowtime);
- spin_unlock_irq(&rtc_lock);
+ spin_unlock_irqrestore(&rtc_lock, flags);
return retval;
}
-
-int timer_ack;
-
-/* monotonic_clock(): returns # of nanoseconds passed since time_init()
- * Note: This function is required to return accurate
- * time even in the absence of multiple timer ticks.
- */
-unsigned long long monotonic_clock(void)
-{
- return cur_timer->monotonic_clock();
-}
-EXPORT_SYMBOL(monotonic_clock);
-
#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
-unsigned long profile_pc(struct pt_regs *regs)
+unsigned long notrace profile_pc(struct pt_regs *regs)
{
unsigned long pc = instruction_pointer(regs);
@@ -245,70 +143,6 @@ unsigned long profile_pc(struct pt_regs
EXPORT_SYMBOL(profile_pc);
#endif
-/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
- */
-static inline void do_timer_interrupt(int irq, struct pt_regs *regs)
-{
-#ifdef CONFIG_X86_IO_APIC
- if (timer_ack) {
- /*
- * Subtle, when I/O APICs are used we have to ack timer IRQ
- * manually to reset the IRR bit for do_slow_gettimeoffset().
- * This will also deassert NMI lines for the watchdog if run
- * on an 82489DX-based system.
- */
- spin_lock(&i8259A_lock);
- outb(0x0c, PIC_MASTER_OCW3);
- /* Ack the IRQ; AEOI will end it automatically. */
- inb(PIC_MASTER_POLL);
- spin_unlock(&i8259A_lock);
- }
-#endif
-
- do_timer_interrupt_hook(regs);
-
-
- if (MCA_bus) {
- /* The PS/2 uses level-triggered interrupts. You can't
- turn them off, nor would you want to (any attempt to
- enable edge-triggered interrupts usually gets intercepted by a
- special hardware circuit). Hence we have to acknowledge
- the timer interrupt. Through some incredibly stupid
- design idea, the reset for IRQ 0 is done by setting the
- high bit of the PPI port B (0x61). Note that some PS/2s,
- notably the 55SX, work fine if this is removed. */
-
- irq = inb_p( 0x61 ); /* read the current state */
- outb_p( irq|0x80, 0x61 ); /* reset the IRQ */
- }
-}
-
-/*
- * This is the same as the above, except we _also_ save the current
- * Time Stamp Counter value at the time of the timer interrupt, so that
- * we later on can estimate the time of day more exactly.
- */
-irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
- /*
- * Here we are in the timer irq handler. We just have irqs locally
- * disabled but we don't know if the timer_bh is running on the other
- * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
- * the irq version of write_lock because as just said we have irq
- * locally disabled. -arca
- */
- write_seqlock(&xtime_lock);
-
- cur_timer->mark_offset();
-
- do_timer_interrupt(irq, regs);
-
- write_sequnlock(&xtime_lock);
- return IRQ_HANDLED;
-}
-
/* not static: needed by APM */
unsigned long get_cmos_time(void)
{
@@ -327,139 +161,42 @@ unsigned long get_cmos_time(void)
}
EXPORT_SYMBOL(get_cmos_time);
-static void sync_cmos_clock(unsigned long dummy);
-
-static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
-
-static void sync_cmos_clock(unsigned long dummy)
+/* arch specific timeofday hooks */
+nsec_t read_persistent_clock(void)
{
- struct timeval now, next;
- int fail = 1;
+ return (nsec_t)get_cmos_time() * NSEC_PER_SEC;
+}
+void sync_persistent_clock(struct timespec ts)
+{
+ static unsigned long last_rtc_update;
/*
* If we have an externally synchronized Linux clock, then update
* CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
* called as close as possible to 500 ms before the new second starts.
- * This code is run on a timer. If the clock is set, that timer
- * may not expire at the correct time. Thus, we adjust...
*/
- if (!ntp_synced())
- /*
- * Not synced, exit, do not restart a timer (if one is
- * running, let it run out).
- */
+ if (ts.tv_sec <= last_rtc_update + 660)
return;
- do_gettimeofday(&now);
- if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
- now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
- fail = set_rtc_mmss(now.tv_sec);
-
- next.tv_usec = USEC_AFTER - now.tv_usec;
- if (next.tv_usec <= 0)
- next.tv_usec += USEC_PER_SEC;
-
- if (!fail)
- next.tv_sec = 659;
- else
- next.tv_sec = 0;
-
- if (next.tv_usec >= USEC_PER_SEC) {
- next.tv_sec++;
- next.tv_usec -= USEC_PER_SEC;
+ if((ts.tv_nsec / 1000) >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
+ (ts.tv_nsec / 1000) <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) {
+ /* horrible...FIXME */
+ if (set_rtc_mmss(ts.tv_sec) == 0)
+ last_rtc_update = ts.tv_sec;
+ else
+ last_rtc_update = ts.tv_sec - 600; /* do it again in 60 s */
}
- mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
-}
-
-void notify_arch_cmos_timer(void)
-{
- mod_timer(&sync_cmos_timer, jiffies + 1);
}
-static long clock_cmos_diff, sleep_start;
-
-static struct timer_opts *last_timer;
-static int timer_suspend(struct sys_device *dev, pm_message_t state)
-{
- /*
- * Estimate time zone so that set_time can update the clock
- */
- clock_cmos_diff = -get_cmos_time();
- clock_cmos_diff += get_seconds();
- sleep_start = get_cmos_time();
- last_timer = cur_timer;
- cur_timer = &timer_none;
- if (last_timer->suspend)
- last_timer->suspend(state);
- return 0;
-}
-
-static int timer_resume(struct sys_device *dev)
-{
- unsigned long flags;
- unsigned long sec;
- unsigned long sleep_length;
-
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled())
- hpet_reenable();
-#endif
- setup_pit_timer();
- sec = get_cmos_time() + clock_cmos_diff;
- sleep_length = (get_cmos_time() - sleep_start) * HZ;
- write_seqlock_irqsave(&xtime_lock, flags);
- xtime.tv_sec = sec;
- xtime.tv_nsec = 0;
- write_sequnlock_irqrestore(&xtime_lock, flags);
- jiffies += sleep_length;
- wall_jiffies += sleep_length;
- if (last_timer->resume)
- last_timer->resume();
- cur_timer = last_timer;
- last_timer = NULL;
- touch_softlockup_watchdog();
- return 0;
-}
-
-static struct sysdev_class timer_sysclass = {
- .resume = timer_resume,
- .suspend = timer_suspend,
- set_kset_name("timer"),
-};
-
-
-/* XXX this driverfs stuff should probably go elsewhere later -john */
-static struct sys_device device_timer = {
- .id = 0,
- .cls = &timer_sysclass,
-};
-
-static int time_init_device(void)
-{
- int error = sysdev_class_register(&timer_sysclass);
- if (!error)
- error = sysdev_register(&device_timer);
- return error;
-}
-
-device_initcall(time_init_device);
-
#ifdef CONFIG_HPET_TIMER
extern void (*late_time_init)(void);
/* Duplicate of time_init() below, with hpet_enable part added */
static void __init hpet_time_init(void)
{
- xtime.tv_sec = get_cmos_time();
- xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
-
if ((hpet_enable() >= 0) && hpet_use_timer) {
printk("Using HPET for base-timer\n");
}
- cur_timer = select_timer();
- printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
time_init_hook();
}
@@ -467,6 +204,9 @@ static void __init hpet_time_init(void)
void __init time_init(void)
{
+ /* Set the clock to HZ Hz: */
+ setup_pit_timer();
+
#ifdef CONFIG_HPET_TIMER
if (is_hpet_capable()) {
/*
@@ -477,13 +217,5 @@ void __init time_init(void)
return;
}
#endif
- xtime.tv_sec = get_cmos_time();
- xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
-
- cur_timer = select_timer();
- printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
-
time_init_hook();
}
Index: linux/arch/i386/kernel/time_hpet.c
===================================================================
--- linux.orig/arch/i386/kernel/time_hpet.c
+++ linux/arch/i386/kernel/time_hpet.c
@@ -302,11 +302,11 @@ int hpet_rtc_timer_init(void)
else
hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
cnt = hpet_readl(HPET_COUNTER);
cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
hpet_writel(cnt, HPET_T1_CMP);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
cfg = hpet_readl(HPET_T1_CFG);
cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT;
Index: linux/arch/i386/kernel/timers/Makefile
===================================================================
--- linux.orig/arch/i386/kernel/timers/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-#
-# Makefile for x86 timers
-#
-
-obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o
-
-obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o
-obj-$(CONFIG_HPET_TIMER) += timer_hpet.o
-obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o
Index: linux/arch/i386/kernel/timers/common.c
===================================================================
--- linux.orig/arch/i386/kernel/timers/common.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Common functions used across the timers go here
- */
-
-#include
-#include
-#include
-#include
-#include
-
-#include
-#include
-#include
-
-#include "mach_timer.h"
-
-/* ------ Calibrate the TSC -------
- * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
- * Too much 64-bit arithmetic here to do this cleanly in C, and for
- * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
- * output busy loop as low as possible. We avoid reading the CTC registers
- * directly because of the awkward 8-bit access mechanism of the 82C54
- * device.
- */
-
-#define CALIBRATE_TIME (5 * 1000020/HZ)
-
-unsigned long calibrate_tsc(void)
-{
- mach_prepare_counter();
-
- {
- unsigned long startlow, starthigh;
- unsigned long endlow, endhigh;
- unsigned long count;
-
- rdtsc(startlow,starthigh);
- mach_countup(&count);
- rdtsc(endlow,endhigh);
-
-
- /* Error: ECTCNEVERSET */
- if (count <= 1)
- goto bad_ctc;
-
- /* 64-bit subtract - gcc just messes up with long longs */
- __asm__("subl %2,%0\n\t"
- "sbbl %3,%1"
- :"=a" (endlow), "=d" (endhigh)
- :"g" (startlow), "g" (starthigh),
- "0" (endlow), "1" (endhigh));
-
- /* Error: ECPUTOOFAST */
- if (endhigh)
- goto bad_ctc;
-
- /* Error: ECPUTOOSLOW */
- if (endlow <= CALIBRATE_TIME)
- goto bad_ctc;
-
- __asm__("divl %2"
- :"=a" (endlow), "=d" (endhigh)
- :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME));
-
- return endlow;
- }
-
- /*
- * The CTC wasn't reliable: we got a hit on the very first read,
- * or the CPU was so fast/slow that the quotient wouldn't fit in
- * 32 bits..
- */
-bad_ctc:
- return 0;
-}
-
-#ifdef CONFIG_HPET_TIMER
-/* ------ Calibrate the TSC using HPET -------
- * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq.
- * Second output is parameter 1 (when non NULL)
- * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet().
- * calibrate_tsc() calibrates the processor TSC by comparing
- * it to the HPET timer of known frequency.
- * Too much 64-bit arithmetic here to do this cleanly in C
- */
-#define CALIBRATE_CNT_HPET (5 * hpet_tick)
-#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC)
-
-unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
-{
- unsigned long tsc_startlow, tsc_starthigh;
- unsigned long tsc_endlow, tsc_endhigh;
- unsigned long hpet_start, hpet_end;
- unsigned long result, remain;
-
- hpet_start = hpet_readl(HPET_COUNTER);
- rdtsc(tsc_startlow, tsc_starthigh);
- do {
- hpet_end = hpet_readl(HPET_COUNTER);
- } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET);
- rdtsc(tsc_endlow, tsc_endhigh);
-
- /* 64-bit subtract - gcc just messes up with long longs */
- __asm__("subl %2,%0\n\t"
- "sbbl %3,%1"
- :"=a" (tsc_endlow), "=d" (tsc_endhigh)
- :"g" (tsc_startlow), "g" (tsc_starthigh),
- "0" (tsc_endlow), "1" (tsc_endhigh));
-
- /* Error: ECPUTOOFAST */
- if (tsc_endhigh)
- goto bad_calibration;
-
- /* Error: ECPUTOOSLOW */
- if (tsc_endlow <= CALIBRATE_TIME_HPET)
- goto bad_calibration;
-
- ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET);
- if (remain > (tsc_endlow >> 1))
- result++; /* rounding the result */
-
- if (tsc_hpet_quotient_ptr) {
- unsigned long tsc_hpet_quotient;
-
- ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0,
- CALIBRATE_CNT_HPET);
- if (remain > (tsc_endlow >> 1))
- tsc_hpet_quotient++; /* rounding the result */
- *tsc_hpet_quotient_ptr = tsc_hpet_quotient;
- }
-
- return result;
-bad_calibration:
- /*
- * the CPU was so fast/slow that the quotient wouldn't fit in
- * 32 bits..
- */
- return 0;
-}
-#endif
-
-
-unsigned long read_timer_tsc(void)
-{
- unsigned long retval;
- rdtscl(retval);
- return retval;
-}
-
-
-/* calculate cpu_khz */
-void init_cpu_khz(void)
-{
- if (cpu_has_tsc) {
- unsigned long tsc_quotient = calibrate_tsc();
- if (tsc_quotient) {
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- __asm__("divl %2"
- :"=a" (cpu_khz), "=d" (edx)
- :"r" (tsc_quotient),
- "0" (eax), "1" (edx));
- printk("Detected %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
- }
- }
-}
-
Index: linux/arch/i386/kernel/timers/timer.c
===================================================================
--- linux.orig/arch/i386/kernel/timers/timer.c
+++ /dev/null
@@ -1,75 +0,0 @@
-#include
-#include
-#include
-#include
-
-#ifdef CONFIG_HPET_TIMER
-/*
- * HPET memory read is slower than tsc reads, but is more dependable as it
- * always runs at constant frequency and reduces complexity due to
- * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use
- * timer_pit when HPET is active. So, we default to timer_tsc.
- */
-#endif
-/* list of timers, ordered by preference, NULL terminated */
-static struct init_timer_opts* __initdata timers[] = {
-#ifdef CONFIG_X86_CYCLONE_TIMER
- &timer_cyclone_init,
-#endif
-#ifdef CONFIG_HPET_TIMER
- &timer_hpet_init,
-#endif
-#ifdef CONFIG_X86_PM_TIMER
- &timer_pmtmr_init,
-#endif
- &timer_tsc_init,
- &timer_pit_init,
- NULL,
-};
-
-static char clock_override[10] __initdata;
-
-static int __init clock_setup(char* str)
-{
- if (str)
- strlcpy(clock_override, str, sizeof(clock_override));
- return 1;
-}
-__setup("clock=", clock_setup);
-
-
-/* The chosen timesource has been found to be bad.
- * Fall back to a known good timesource (the PIT)
- */
-void clock_fallback(void)
-{
- cur_timer = &timer_pit;
-}
-
-/* iterates through the list of timers, returning the first
- * one that initializes successfully.
- */
-struct timer_opts* __init select_timer(void)
-{
- int i = 0;
-
- /* find most preferred working timer */
- while (timers[i]) {
- if (timers[i]->init)
- if (timers[i]->init(clock_override) == 0)
- return timers[i]->opts;
- ++i;
- }
-
- panic("select_timer: Cannot find a suitable timer\n");
- return NULL;
-}
-
-int read_current_timer(unsigned long *timer_val)
-{
- if (cur_timer->read_timer) {
- *timer_val = cur_timer->read_timer();
- return 0;
- }
- return -1;
-}
Index: linux/arch/i386/kernel/timers/timer_cyclone.c
===================================================================
--- linux.orig/arch/i386/kernel/timers/timer_cyclone.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/* Cyclone-timer:
- * This code implements timer_ops for the cyclone counter found
- * on IBM x440, x360, and other Summit based systems.
- *
- * Copyright (C) 2002 IBM, John Stultz (johnstul@us.ibm.com)
- */
-
-
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include
-#include
-#include
-#include
-#include
-
-#include "io_ports.h"
-
-/* Number of usecs that the last interrupt was delayed */
-static int delay_at_last_interrupt;
-
-#define CYCLONE_CBAR_ADDR 0xFEB00CD0
-#define CYCLONE_PMCC_OFFSET 0x51A0
-#define CYCLONE_MPMC_OFFSET 0x51D0
-#define CYCLONE_MPCS_OFFSET 0x51A8
-#define CYCLONE_TIMER_FREQ 100000000
-#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */
-int use_cyclone = 0;
-
-static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */
-static u32 last_cyclone_low;
-static u32 last_cyclone_high;
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* helper macro to atomically read both cyclone counter registers */
-#define read_cyclone_counter(low,high) \
- do{ \
- high = cyclone_timer[1]; low = cyclone_timer[0]; \
- } while (high != cyclone_timer[1]);
-
-
-static void mark_offset_cyclone(void)
-{
- unsigned long lost, delay;
- unsigned long delta = last_cyclone_low;
- int count;
- unsigned long long this_offset, last_offset;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
-
- spin_lock(&i8253_lock);
- read_cyclone_counter(last_cyclone_low,last_cyclone_high);
-
- /* read values for delay_at_last_interrupt */
- outb_p(0x00, 0x43); /* latch the count ASAP */
-
- count = inb_p(0x40); /* read the latched count */
- count |= inb(0x40) << 8;
-
- /*
- * VIA686a test code... reset the latch if count > max + 1
- * from timer_pit.c - cjb
- */
- if (count > LATCH) {
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff, PIT_CH0);
- outb(LATCH >> 8, PIT_CH0);
- count = LATCH - 1;
- }
- spin_unlock(&i8253_lock);
-
- /* lost tick compensation */
- delta = last_cyclone_low - delta;
- delta /= (CYCLONE_TIMER_FREQ/1000000);
- delta += delay_at_last_interrupt;
- lost = delta/(1000000/HZ);
- delay = delta%(1000000/HZ);
- if (lost >= 2)
- jiffies_64 += lost-1;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
- monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK;
- write_sequnlock(&monotonic_lock);
-
- /* calculate delay_at_last_interrupt */
- count = ((LATCH-1) - count) * TICK_SIZE;
- delay_at_last_interrupt = (count + LATCH/2) / LATCH;
-
-
- /* catch corner case where tick rollover occured
- * between cyclone and pit reads (as noted when
- * usec delta is > 90% # of usecs/tick)
- */
- if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
- jiffies_64++;
-}
-
-static unsigned long get_offset_cyclone(void)
-{
- u32 offset;
-
- if(!cyclone_timer)
- return delay_at_last_interrupt;
-
- /* Read the cyclone timer */
- offset = cyclone_timer[0];
-
- /* .. relative to previous jiffy */
- offset = offset - last_cyclone_low;
-
- /* convert cyclone ticks to microseconds */
- /* XXX slow, can we speed this up? */
- offset = offset/(CYCLONE_TIMER_FREQ/1000000);
-
- /* our adjusted time offset in microseconds */
- return delay_at_last_interrupt + offset;
-}
-
-static unsigned long long monotonic_clock_cyclone(void)
-{
- u32 now_low, now_high;
- unsigned long long last_offset, this_offset, base;
- unsigned long long ret;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
-
- /* Read the cyclone counter */
- read_cyclone_counter(now_low,now_high);
- this_offset = ((unsigned long long)now_high<<32)|now_low;
-
- /* convert to nanoseconds */
- ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK);
- return ret * (1000000000 / CYCLONE_TIMER_FREQ);
-}
-
-static int __init init_cyclone(char* override)
-{
- u32* reg;
- u32 base; /* saved cyclone base address */
- u32 pageaddr; /* page that contains cyclone_timer register */
- u32 offset; /* offset from pageaddr to cyclone_timer register */
- int i;
-
- /* check clock override */
- if (override[0] && strncmp(override,"cyclone",7))
- return -ENODEV;
-
- /*make sure we're on a summit box*/
- if(!use_cyclone) return -ENODEV;
-
- printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
-
- /* find base address */
- pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK;
- offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
- return -ENODEV;
- }
- base = *reg;
- if(!base){
- printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
- return -ENODEV;
- }
-
- /* setup PMCC */
- pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
- return -ENODEV;
- }
- reg[0] = 0x00000001;
-
- /* setup MPCS */
- pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
- return -ENODEV;
- }
- reg[0] = 0x00000001;
-
- /* map in cyclone_timer */
- pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!cyclone_timer){
- printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
- return -ENODEV;
- }
-
- /*quick test to make sure its ticking*/
- for(i=0; i<3; i++){
- u32 old = cyclone_timer[0];
- int stall = 100;
- while(stall--) barrier();
- if(cyclone_timer[0] == old){
- printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
- cyclone_timer = 0;
- return -ENODEV;
- }
- }
-
- init_cpu_khz();
-
- /* Everything looks good! */
- return 0;
-}
-
-
-static void delay_cyclone(unsigned long loops)
-{
- unsigned long bclock, now;
- if(!cyclone_timer)
- return;
- bclock = cyclone_timer[0];
- do {
- rep_nop();
- now = cyclone_timer[0];
- } while ((now-bclock) < loops);
-}
-/************************************************************/
-
-/* cyclone timer_opts struct */
-static struct timer_opts timer_cyclone = {
- .name = "cyclone",
- .mark_offset = mark_offset_cyclone,
- .get_offset = get_offset_cyclone,
- .monotonic_clock = monotonic_clock_cyclone,
- .delay = delay_cyclone,
-};
-
-struct init_timer_opts __initdata timer_cyclone_init = {
- .init = init_cyclone,
- .opts = &timer_cyclone,
-};
Index: linux/arch/i386/kernel/timers/timer_hpet.c
===================================================================
--- linux.orig/arch/i386/kernel/timers/timer_hpet.c
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- */
-
-#include