Index: linux/Documentation/DocBook/Makefile =================================================================== --- linux.orig/Documentation/DocBook/Makefile +++ linux/Documentation/DocBook/Makefile @@ -10,7 +10,7 @@ DOCBOOKS := wanbook.xml z8530book.xml mc kernel-hacking.xml kernel-locking.xml deviceiobook.xml \ procfs-guide.xml writing_usb_driver.xml \ sis900.xml kernel-api.xml journal-api.xml lsm.xml usb.xml \ - gadget.xml libata.xml mtdnand.xml librs.xml + gadget.xml libata.xml mtdnand.xml librs.xml genericirq.xml ### # The build process is as follows (targets): Index: linux/Documentation/DocBook/genericirq.tmpl =================================================================== --- /dev/null +++ linux/Documentation/DocBook/genericirq.tmpl @@ -0,0 +1,560 @@ + + + + + + Linux generic IRQ handling + + + + Thomas + Gleixner + +
+ tglx@linutronix.de +
+
+
+ + Ingo + Molnar + +
+ mingo@elte.hu +
+
+
+
+ + + 2005 + Thomas Gleixner + + + 2005 + Ingo Molnar + + + + + This documentation is free software; you can redistribute + it and/or modify it under the terms of the GNU General Public + License version 2 as published by the Free Software Foundation. + + + + This program is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + + + You should have received a copy of the GNU General Public + License along with this program; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, + MA 02111-1307 USA + + + + For more details see the file COPYING in the source + distribution of Linux. + + +
+ + + + + Introduction + + The generic interrupt handling layer is designed to provide a + complete abstraction of interrupt handling for device drivers + and is able to handle all different types of interrupt controller + hardware. Device drivers use generic API function to request, enable, + disable and free interrupts. The drivers do not have to know anything + about interrupt hardware, so they can be used on different hardware + platforms without code changes. + + + This documentation is provided for developers who want to implement + architecture interrupt support based on the Generic IRQ handling layer. + + + + + Rationale + + The original implementation of interrupt handling in Linux is using + the __do_IRQ() super-handler, which must be able to deal with every + type of interrupt logic. This is achieved by an 'interrupt type' + structure and runtime flags to handle special cases. + Furthermore the superhandler assumed a certain type of interrupt + handling hardware and turned out to be not capable of handling all + kind of interrupt controller hardware which can be found through + the architectures. The all in one approach also adds unnecessary + complexity for every user. + + + Originally, Russell King identified different types of handlers to + build a quite universal set for the ARM interrupt handler + implementation in Linux 2.5/2.6. He distiguished between: + + Level type + Edge type + Simple type + + In the SMP world of the __do_IRQ() super-handler another type + was identified: + + Per CPU type + + + + This split implementation of handlers allows to optimize the flow + of the interrupt handling for each specific interrupt type. + This reduces complexitiy in that particular code path and allows + the optimized handling of a given type. + + + The original general implementation uses interrupt_type structures + to differentiate the flow control in the super-handler. This + leads to a mix of flow logic and code related to hardware details. + Russell Kings ARM implementation which replaced the type by a chip + abstraction did the mix the other way around. + + + The natural conclusion was a clean seperation of the 'type flow' + and the 'chip'. Analysing a couple of architecture implementations + reveals that many of them can use a generic set of 'type flow' + implementations and only need to add the chip level specific code. + The seperation is also valuable for the (sub)architectures, + which need specific quirks in the type flow itself, because it + provides a more transparent design. + + + Each interrupt type implementation has assigned its own flow + handler, which should be normally one of the generic + implementations. The flow handler implementation makes it + simple to provide demultiplexing handlers which can be found in + embedded platforms on various architectures. + + + The seperation makes the generic interrupt handling more flexible + and extensible. An (sub)architecture can use a generic type flow + implementation for e.g. 'level type' interrupts and add a + (sub)architecture specific 'edge type' implementation. + + + To make the transition to the new model easier and prevent the + breakage of existing implementations the __do_IRQ() super-handler + is still available. This leads to a kind of duality for the time + being. Over time the new model should achieve a homogeneous + implementation scheme over all architectures with enhanced + maintainability and cleanliness. + + + + Known Bugs And Assumptions + + None (hopefully). + + + + + Abstraction layers + + There are three main levels of abstraction in the interrupt code: + + Highlevel driver API + Abstract interrupt type + Chiplevel hardware encapsulation + + + + The seperation of interrupt type and chip level functionality + provides the most flexible design. This implementation can handle + all kinds of interrupt hardware and the necessary workarounds for + the interrupt types without the need of redundant implementations. + The seperation handles also edge and level type interrupts + on the same hardware chip. + + + Interrupt control flow + + Each interrupt is described by an interrupt description structure + irq_desc. The interrupt is referenced by an 'unsigned int' numeric + value which selects the corresponding interrupt decription structure + in the description structures array. + The description structure contains status information and pointers + to the interrupt type structure and the interrupt chip structure + which are assigned to this interrupt. + + + Whenever an interrupt triggers, the lowlevel arch code calls into + the generic interrupt code by calling desc->handler->handle_irq(). + This highlevel IRQ handling function only uses other + desc->handler primitives which describe the control flow operation + necessary for the interrupt type. These operations are calling + the chip primitives referenced by the assigned chip description + structure. + + + + Highlevel Driver API + + The highlevel Driver API consists of following functions: + + request_irq() + free_irq() + disable_irq() + enable_irq() + disable_irq_nosync() (SMP only) + synchronize_irq() (SMP only) + set_irq_type() + set_irq_wake() + set_irq_data() + set_irq_chip() + set_irq_chip_data() + + See the autogenerated function documentation for details. + + + + Abstract interrupt type + + The 'interrupt type' (struct irq_type) abstraction mainly consists of + methods which implement the 'interrupt handling flow'. The generic + layer provides a set of pre-defined types: + + default_level_type + default_edge_type + default_simple_type + default_percpu_type + + The default type implementations use the generic type handlers. + + handle_level_type + handle_edge_type + handle_simple_type + handle_percpu_type + + The interrupt types (either predefined or architecture specific) are + assigned to specific interrupts by the architecture either during + bootup or during device initialization. + + + Default type implementations + + Helper functions + + The helper functions call the chip primitives and + are used by the default type implementations. + Following helper functions are implemented (simplified excerpt): + +default_enable(irq) +{ + desc->chip->unmask(irq); +} + +default_disable(irq) +{ + desc->chip->mask(irq); +} + +default_ack(irq) +{ + chip->ack(irq); +} + +default_mask_ack(irq) +{ + if (chip->mask_ack) { + chip->mask_ack(irq); + } else { + chip->mask(irq); + chip->ack(irq); + } +} + +noop(irq) +{ +} + +default_set_type(irq, type) +{ + if (desc->chip->set_type) { + if (desc->chip->set_type(irq, type)) + return NULL; + } + + return default_handler for type; +} + + + + + Default Level IRQ type + + The default Level IRQ type implements the functions + + enabledefault_enable + disabledefault_disable + startdefault_mask_ack + enddefault_enable + handle_irqhandle_level_irq + set_typedefault_set_type + + + + + Default Edge IRQ type + + The default Edge IRQ type implements the functions + + enabledefault_enable + disabledefault_disable + startdefault_ack + holddefault_mask_ack + endnoop + handle_irqhandle_edge_irq + set_typedefault_set_type + + + + + Default simple IRQ type + + The default simple IRQ type implements the functions + + enablenoop + disablenoop + handle_irqhandle_simple_irq + + + + + Default per CPU IRQ type + + The default per CPU IRQ type implements the functions + + enabledefault_enable + disabledefault_disable + startdefault_ack + enddefault_enable + handle_irqhandle_percpu_irq + + + + + + Default type handler implementations + + Default Level IRQ type handler + + handle_level_type provides a generic implementation + for level type interrupts. + + + Following control flow is implemented (simplified excerpt): + +desc->handler->start(); +handle_IRQ_event(desc->action); +desc->handler->end(); + + + + + Default Edge IRQ type handler + + handle_edge_type provides a generic implementation + for edge type interrupts. + + + Following control flow is implemented (simplified excerpt): + +if (desc->status & running) { + desc->handler->hold(); + desc->status |= pending | masked; + return; +} +desc->handler->start(); +desc->status |= running; +do { + if (desc->status & masked) + desc->handler->enable(); + desc-status &= ~pending; + handle_IRQ_event(desc->action); +} while (status & pending); +desc-status &= ~running; +desc->handler->end(); + + + + + Default simple IRQ type handler + + handle_simple_type provides a generic implementation + for simple type interrupts. + + + Note: The simple type handler does not call any + handler/chip primitives. + + + Following control flow is implemented (simplified excerpt): + +handle_IRQ_event(desc->action); + + + + + Default per CPU type handler + + handle_percpu_type provides a generic implementation + for per CPU type interrupts. + + + Per CPU interrupts are only available on SMP and + the handler provides a simplified version without + locking. + + + Following control flow is implemented (simplified excerpt): + +desc->handler->start(); +handle_IRQ_event(desc->action); +desc->handler->end(); + + + + + + Architecture specific type implementation + + If an architecture needs to implement its own type structures, then + the following primitives have to be implemented: + + handle_irq() - The handle_irq function pointer should preferably point to + one of the generic type handler functions + startup() - Optional + shutdown() - Optional + enable() + disable() + start() + hold() - For edge type interupts only + end() + set_type - Optional + set_affinity - SMP only + + + + + Quirks and optimizations + + The generic functions are intended for 'clean' architectures and chips, + which have no platform-specific IRQ handling quirks. If an architecture + needs to implement quirks on the 'flow' level then it can do so by + overriding the irqtype. This is also done for compatibility reasons, as + most architectures use irqtypes only at the moment. + + + An architecture could implement all of its IRQ logic via pushing + chip handling details into the irqtype's ->start()/->end()/->hold() + functions. This is only recommended when the underlying primitives + are pure chip primitives without additional quirks. The direct pointer + to the chip functions reduces the indirection level by one. + + + + + Chiplevel hardware encapsulation + + The chip level hardware description structure irq_chip + contains all the direct chip relevant functions, which + can be utilized by the irq_type implementations. + + ack() + mask_ack() - Optional, recommended for performance + mask() + unmask() + retrigger() - Optional + set_type() - Optional + set_wake() - Optional + + These primitives are strictly intended to mean what they say: ack means + ACK, masking means masking of an IRQ line, etc. It is up to the flow + handler(s) to use these basic units of lowlevel functionality. + + + + + + __do_IRQ entry point + + The original implementation __do_IRQ() is an alternative entry + point for all types of interrupts. + + + This handler turned out to be not suitable for all + interrupt hardware and was therefor reimplemented with split + functionality for egde/level/simple/percpu interrupts. This is not + only a functional optimization. It also shortenes code pathes for + interrupts. + + + To make use of the split implementation, replace the call to + __do_IRQ by a call to desc->handler->handle_irq() and associate + the appropriate handler function to desc->handler->handle_irq(). + In most cases the generic type and handler implementations should + be sufficient. + + + + + Locking on SMP + + The locking of chip registers is up to the architecture that + defines the chip primitives. There is a chip->lock field that can be used + for serialization, but the generic layer does not touch it. The per-irq + structure is protected via desc->lock, by the generic layer. + + + + Structures + + This chapter contains the autogenerated documentation of the structures which are + used in the generic IRQ layer. + +!Iinclude/linux/irq.h + + + + Public Functions Provided + + This chapter contains the autogenerated documentation of the kernel API functions + which are exported. + +!Ekernel/irq/manage.c + + + + Internal Functions Provided + + This chapter contains the autogenerated documentation of the internal functions. + +!Ikernel/irq/handle.c + + + + Credits + + The following people have contributed to this document: + + Thomas Gleixnertglx@linutronix.de + Ingo Molnarmingo@elte.hu + + + +
Index: linux/Documentation/DocBook/kernel-api.tmpl =================================================================== --- linux.orig/Documentation/DocBook/kernel-api.tmpl +++ linux/Documentation/DocBook/kernel-api.tmpl @@ -54,6 +54,11 @@ !Ekernel/sched.c !Ekernel/timer.c + High-precision timers +!Iinclude/linux/ktime.h +!Iinclude/linux/ktimer.h +!Ekernel/ktimers.c + Internal Functions !Ikernel/exit.c !Ikernel/signal.c Index: linux/Documentation/RCU/proc.txt =================================================================== --- /dev/null +++ linux/Documentation/RCU/proc.txt @@ -0,0 +1,119 @@ +/proc Filesystem Entries for RCU + + +CONFIG_RCU_STATS + +The CONFIG_RCU_STATS config option is available only in conjunction with +CONFIG_PREEMPT_RCU. It makes four /proc entries available, namely: rcuctrs, +rcuptrs, rcugp, and rcustats. + +/proc/rcuctrs + + CPU last cur + 0 1 1 + 1 1 1 + 2 1 1 + 3 0 2 + ggp = 230725 + +This displays the number of processes that started RCU read-side critical +sections on each CPU. In absence of preemption, the "last" and "cur" +counts for a given CPU will always sum to one. Therefore, in the example +output above, each CPU has started one RCU read-side critical section +that was later preempted. The "last" column counts RCU read-side critical +sections that started prior to the last counter flip, while the "cur" +column counts critical sections that started after the last counter flip. + +The "ggp" count is a count of the number of counter flips since boot. +Since this is shown as an odd number, the "cur" counts are stored in +the zero-th element of each of the per-CPU arrays, and the "last" counts +are stored in the first element of each of the per-CPU arrays. + + +/proc/rcuptrs + + nl=c04c7160/c04c7960 nt=c04c72d0 + wl=c04c7168/c04c794c wt=c04c72bc dl=c04c7170/00000000 dt=c04c7170 + +This displays the head and tail of each of CONFIG_PREEMPT_RCU's three +callback lists. This will soon change to display this on a per-CPU +basis, since each CPU will soon have its own set of callback lists. +In the example above, the "next" list header is located at hex address +0xc04c7160, the first element on the list at hex address 0xc04c7960, +and the last element on the list at hex address 0xc04c72d0. The "wl=" +and "wt=" output is similar for the "wait" list, and the "dl=" and "dt=" +output for the "done" list. The "done" list is normally emptied very +quickly after being filled, so will usually be empty as shown above. +Note that the tail pointer points into the list header in this case. + +Callbacks are placed in the "next" list by call_rcu(), moved to the +"wait" list after the next counter flip, and moved to the "done" list +on the counter flip after that. Once on the "done" list, the callbacks +are invoked. + + +/proc/rcugp + + oldggp=241419 newggp=241421 + +This entry invokes synchronize_rcu() and prints out the number of counter +flips since boot before and after the synchronize_rcu(). These two +numbers will always differ by at least two. Unless RCU is broken. ;-) + + +/proc/rcustats + + ggp=242416 lgp=242416 sr=0 rcc=396233 + na=2090938 nl=9 wa=2090929 wl=9 dl=0 dr=2090920 di=2090920 + rtf1=22230730 rtf2=20139162 rtf3=242416 rtfe1=2085911 rtfe2=5657 rtfe3=19896746 + +The quantities printed are as follows: + +o "ggp=": The number of flips since boot. + +o "lgp=": The number of flips sensed by the local structure since + boot. This will soon be per-CPU. + +o "sr=": The number of explicit call to synchronize_rcu(). + Except that this is currently broken, so always reads as zero. + It is likely to be removed... + +o "rcc=": The number of calls to rcu_check_callbacks(). + +o "na=": The number of callbacks that call_rcu() has registered + since boot. + +o "nl=": The number of callbacks currently on the "next" list. + +o "wa=": The number of callbacks that have moved to the "wait" + list since boot. + +o "wl=": The number of callbacks currently on the "wait" list. + +o "da=": The number of callbacks that have been moved to the + "done" list since boot. + +o "dl=": The number of callbacks currently on the "done" list. + +o "dr=": The number of callbacks that have been removed from the + "done" list since boot. + +o "di=": The number of callbacks that have been invoked after being + removed from the "done" list. + +o "rtf1=": The number of attempts to flip the counters. + +o "rtf2=": The number of attempts to flip the counters that successfully + acquired the fliplock. + +o "rtf3=": The number of successful counter flips. + +o "rtfe1=": The number of attempts to flip the counters that failed + due to the lock being held by someone else. + +o "rtfe2=": The number of attempts to flip the counters that were + abandoned due to someone else doing the job for us. + +o "rtfe3=": The number of attempts to flip the counters that failed + due to some task still being in an RCU read-side critical section + starting from before the last successful counter flip. Index: linux/Documentation/RCU/torture.txt =================================================================== --- /dev/null +++ linux/Documentation/RCU/torture.txt @@ -0,0 +1,127 @@ +RCU Torture Test Operation + + +CONFIG_RCU_TORTURE_TEST + +The CONFIG_RCU_TORTURE_TEST config option is available for all RCU +implementations. It creates an rcutorture kernel module that can +be loaded to run a torture test. The test periodically outputs +status messages via printk(), which can be examined via the dmesg +command (perhaps grepping for "rcutorture"). The test is started +when the module is loaded, and stops when the module is unloaded. + +However, actually setting this config option to "y" results in the system +running the test immediately upon boot, and ending only when the system +is taken down. Normally, one will instead want to build the system +with CONFIG_RCU_TORTURE_TEST=m and to use modprobe and rmmod to control +the test, perhaps using a script similar to the one shown at the end of +this document. Note that you will need CONFIG_MODULE_UNLOAD in order +to be able to end the test. + + +MODULE PARAMETERS + +This module has the following parameters: + +nreaders This is the number of RCU reading threads supported. + The default is twice the number of CPUs. Why twice? + To properly exercise RCU implementations with preemptible + read-side critical sections. + +stat_interval The number of seconds between output of torture + statistics (via printk()). Regardless of the interval, + statistics are printed when the module is unloaded. + Setting the interval to zero causes the statistics to + be printed -only- when the module is unloaded, and this + is the default. + +verbose Enable debug printk()s. Default is disabled. + + +OUTPUT + +The statistics output is as follows: + + rcutorture: --- Start of test: nreaders=16 stat_interval=0 verbose=0 + rcutorture: rtc: 0000000000000000 ver: 1916 tfle: 0 rta: 1916 rtaf: 0 rtf: 1915 rtbme: 0 + rcutorture: Reader Pipe: 1466408 9747 0 0 0 0 0 0 0 0 0 + rcutorture: Reader Batch: 1464477 11678 0 0 0 0 0 0 0 0 + rcutorture: Free-Block Circulation: 1915 1915 1915 1915 1915 1915 1915 1915 1915 1915 0 + rcutorture: --- End of test: SUCCESS + +The command "dmesg | grep rcutorture:" will extract this information on +most systems. On more esoteric configurations, it may be necessary to +use other commands to access the output of the printk()s used by +the RCU torture test. The printk()s use KERN_ALERT, so they should +be evident. ;-) + +The entries are as follows: + +o "ggp": The number of counter flips (or batches) since boot. + +o "rtc": The hexadecimal address of the structure currently visible + to readers. + +o "ver": The number of times since boot that the rcutw writer task + has changed the structure visible to readers. + +o "tfle": If non-zero, indicates that the "torture freelist" + containing structure to be placed into the "rtc" area is empty. + This condition is important, since it can fool you into thinking + that RCU is working when it is not. :-/ + +o "rta": Number of structures allocated from the torture freelist. + +o "rtaf": Number of allocations from the torture freelist that have + failed due to the list being empty. + +o "rtf": Number of frees into the torture freelist. + +o "rtmbe": Number of memory-barrier failures detected (which would + indicate problems with either the test itself or the underlying + memory-barrier primitives for the CPU architecture on which the + failure occurred. + +o "Reader Pipe": Histogram of "ages" of structures seen by readers. + If any entries past the first two are non-zero, RCU is broken. + And rcutorture prints the error flag string "!!!" to make sure + you notice. The age of a newly allocated structure is zero, + it becomes one when removed from reader visibility, and is + incremented once per grace period subsequently -- and is freed + after passing through (RCU_TORTURE_PIPE_LEN-2) grace periods. + + The output displayed above was taken from a correctly working + RCU. If you want to see what it looks like when broken, break + it yourself. ;-) + +o "Reader Batch": Another histogram of "ages" of structures seen + by readers, but in terms of counter flips (or batches) rather + than in terms of grace periods. The legal number of non-zero + entries is again two. The reason for this separate view is + that it is easier to get the third entry to show up in the + "Reader Batch" list than in the "Reader Pipe" list. + +o "Free-Block Circulation": Shows the number of torture structures + that have reached a given point in the pipeline. The first element + should closely correspond to the number of structures allocated, + the second to the number that have been removed from reader view, + and all but the last remaining to the corresponding number of + passes through a grace period. The last entry should be zero, + as it is only incremented if a torture structure's counter + somehow gets incremented farther than it should. + + +USAGE + +The following script may be used to torture RCU: + + #!/bin/sh + + modprobe rcutorture + sleep 100 + rmmod rcutorture + dmesg | grep rcutorture: + +The output can be manually inspected for the error flag of "!!!". +One could of course create a more elaborate script that automatically +checked for such errors. Index: linux/Documentation/kernel-parameters.txt =================================================================== --- linux.orig/Documentation/kernel-parameters.txt +++ linux/Documentation/kernel-parameters.txt @@ -52,6 +52,7 @@ restrictions referred to are that the re MTD MTD support is enabled. NET Appropriate network support is enabled. NUMA NUMA support is enabled. + GENERIC_TIME The generic timeofday code is enabled. NFS Appropriate NFS support is enabled. OSS OSS sound support is enabled. PARIDE The ParIDE subsystem is enabled. @@ -329,10 +330,11 @@ running once the system is up. Value can be changed at runtime via /selinux/checkreqprot. - clock= [BUGS=IA-32,HW] gettimeofday timesource override. - Forces specified timesource (if avaliable) to be used - when calculating gettimeofday(). If specicified - timesource is not avalible, it defaults to PIT. + clock= [BUGS=IA-32, HW] gettimeofday clocksource override. + [Deprecated] + Forces specified clocksource (if avaliable) to be used + when calculating gettimeofday(). If specified + clocksource is not avalible, it defaults to PIT. Format: { pit | tsc | cyclone | pmtmr } hpet= [IA-32,HPET] option to disable HPET and use PIT. @@ -1479,6 +1481,10 @@ running once the system is up. time Show timing data prefixed to each printk message line + clocksource= [GENERIC_TIME] Override the default clocksource + Override the default clocksource and use the clocksource + with the name specified. + tipar.timeout= [HW,PPT] Set communications timeout in tenths of a second (default 15). Index: linux/Documentation/ktimers.txt =================================================================== --- /dev/null +++ linux/Documentation/ktimers.txt @@ -0,0 +1,239 @@ + +ktimers - subsystem for high-precision kernel timers +---------------------------------------------------- + +This patch introduces a new subsystem for high-precision kernel timers. + +Why two timer subsystems? After a lot of back and forth trying to +integrate high-precision and high-resolution features into the existing +timer framework, and after testing various such high-resolution timer +implementations in practice, we came to the conclusion that the timer +wheel code is fundamentally not suitable for such an approach. We +initially didnt believe this ('there must be a way to solve this'), and +we spent a considerable effort trying to integrate things into the timer +wheel, but we failed. There are several reasons why such integration is +impossible: + +- the forced handling of low-resolution and high-resolution timers in + the same way leads to a lot of compromises, macro magic and #ifdef + mess. The timers.c code is very "tightly coded" around jiffies and + 32-bitness assumptions, and has been honed and micro-optimized for a + narrow use case for many years - and thus even small extensions to it + frequently break the wheel concept, leading to even worse + compromises. + +- the unpredictable [O(N)] overhead of cascading leads to delays which + necessiate a more complex handling of high resolution timers, which + decreases robustness. Such a design still led to rather large timing + inaccuracies. Cascading is a fundamental property of the timer wheel + concept, it cannot be 'designed out' without unevitabling degrading + other portions of the timers.c code in an unacceptable way. + +- the implementation of the current posix-timer subsystem on top of + the timer wheel has already introduced a quite complex handling of + the required readjusting of absolute CLOCK_REALTIME timers at + settimeofday or NTP time - showing the rigidity of the timer wheel + data structure. + +- the timer wheel code is most optimal for use cases which can be + identified as "timeouts". Such timeouts are usually set up to cover + error conditions in various I/O paths, such as networking and block + I/O. The vast majority of those timers never expire and are rarely + recascaded because the expected correct event arrives in time so they + can be removed from the timer wheel before any further processing of + them becomes necessary. Thus the users of these timeouts can accept + the granularity and precision tradeoffs of the timer wheel, and + largely expect the timer subsystem to have near-zero overhead. Timing + for them is not a core purpose, it's most a necessary evil to + guarantee the processing of requests, which should be as cheap and + unintrusive as possible. + +The primary users of precision timers are user-space applications that +utilize nanosleep, posix-timers and itimer interfaces. Also, in-kernel +users like drivers and subsystems with a requirement for precise timed +events can benefit from the availability of a seperate high-precision +timer subsystem as well. + +The ktimer subsystem is easily extended with high-resolution +capabilities, and patches for that exist and are maturing quickly. The +increasing demand for realtime and multimedia applications along with +other potential users for precise timers gives another reason to +separate the "timeout" and "precise timer" subsystems. + +Another potential benefit is that such seperation allows for future +optimizations of the existing timer wheel implementation for the low +resolution and low precision use cases - once the precision-sensitive +APIs are separated from the timer wheel and are migrated over to +ktimers. E.g. we could decrease the frequency of the timeout subsystem +from 250 Hz to 100 HZ (or even smaller). + +ktimer subsystem implementation details +--------------------------------------- + +the basic design considerations were: + +- simplicity +- robust, extensible abstractions +- data structure not bound to jiffies or any other granularity +- simplification of existing, timing related kernel code + +From our previous experience with various approaches of high-resolution +timers another basic requirement was the immediate enqueueing and +ordering of timers at activation time. After looking at several possible +solutions such as radix trees and hashes, the red black tree was choosen +as the basic data structure. Rbtrees are available as a library in the +kernel and are used in various performance-critical areas of e.g. memory +management and file systems. The rbtree is solely used for the time +sorted ordering, while a seperate list is used to give the expiry code +fast access to the queued timers, without having to walk the rbtree. +(This seperate list is also useful for high-resolution timers where we +need seperate pending and expired queues while keeping the time-order +intact.) + +The time-ordered enqueueing is not purely for the purposes of the +high-resolution timers extension though, it also simplifies the handling +of absolute timers based on CLOCK_REALTIME. The existing implementation +needed to keep an extra list of all armed absolute CLOCK_REALTIME timers +along with complex locking. In case of settimeofday and NTP, all the +timers (!) had to be dequeued, the time-changing code had to fix them up +one by one, and all of them had to be enqueued again. The time-ordered +enqueueing and the storage of the expiry time in absolute time units +removes all this complex and poorly scaling code from the posix-timer +implementation - the clock can simply be set without having to touch the +rbtree. This also makes the handling of posix-timers simpler in general. + +The locking and per-CPU behavior of ktimers was mostly taken from the +existing timer wheel code, as it is mature and well suited. Sharing code +was not really a win, due to the different data structures. Also, the +ktimer functions now have clearer behavior and clearer names - such as +ktimer_try_to_cancel() and ktimer_cancel() [which are roughly equivalent +to del_timer() and del_timer_sync()] - and there's no direct 1:1 mapping +between them on the algorithmical level. + +The internal representation of time values (ktime_t) is implemented via +macros and inline functions, and can be switched between a "hybrid +union" type and a plain "scalar" 64bit nanoseconds representation (at +compile time). The hybrid union type exists to optimize time conversions +on 32bit CPUs. This build-time-selectable ktime_t storage format was +implemented to avoid the performance impact of 64-bit multiplications +and divisions on 32bit CPUs. Such operations are frequently necessary to +convert between the storage formats provided by kernel and userspace +interfaces and the internal time format. (See include/linux/ktime.h for +further details.) + +ktimers - rounding of timer values +---------------------------------- + +Why do we need rounding at all ? + +Firstly, the POSIX specification requires rounding to the resolution - +whatever that means. The POSIX specification is quite imprecise on the +details of rounding though, so a practical interpretation had to be +found. + +The first question is which resolution value should be returned to the +user by the clock_getres() interface. + +The simplest case is when the hardware is capable of 1 nsec resolution: +in that case we can fulfill all wishes and there is no rounding :-) + +Another simple case is when the clock hardware has a limited resolution +that the kernel wants to fully offer to user-space: in this case that +limited resolution is returned to userspace. + +The hairy case is when the underlying hardware is capable of finer +grained resolution, but the kernel is not willing to offer that +resolution. Why would the kernel want to do that? Because e.g. the +system could easily be DoS-ed with high-frequency timer interrupts. Or +the kernel might want to cluster high-res timer interrupts into groups +for performance reasons, so that extremely high interrupt rates are +avoided. So the kernel needs some leeway in deciding the 'effective' +resolution that it is willing to expose to userspace. + +In this case, the clock_getres() decision is easy: we want to return the +'effective' resolution, not the 'theoretical' resolution. Thus an +application programmer gets correct information about what granularity +and accuracy to expect from the system. + +What is much less obvious in both the 'hardware is low-res' and 'kernel +wants to offer low-res' cases is the actual behavior of timers, and +where and how to round time values to the 'effective' resolution of the +clock. + +For this we first need to see what types of expiries there exist for +ktimers, and how rounding affects them. Ktimers have the following +variants: + +- relative one-shot timers +- absolute one-shot timers +- relative interval timers +- absolute interval timers + +Interval timers can be led back to one-shot timers: they are a series of +one-shot timers with the same interval. Relative one-shot timers can be +handled identically to absolute one-shot timers after adding the +relative expiry time to the current time of the respective clock. + +We picked to handle two cases of rounding: + +- the rounding of the absolute value of the first expiry time +- the rounding of the timer interval + +An alternative implementation would be to not round the interval and to +implicitly round at every timer event, but it's not clear what the +advantages would be from doing that. There are a couple of +disadvantages: + +- the technique seems to contradict the standard's requirement that + 'time values ... be rounded' (which the interval clearly is). + +- other OSs implement the rounding in the way we implemented it. + +- also, there is an application surprise factor, the 'do not round + intervals' technique can lead to the following sample sequence of + events: + + Interval: 1.7ms + Resolution: 1ms + + Event timeline: + + 2ms - 4ms - 6ms - 7ms - 9ms - 11ms - 12ms - 14ms - 16ms - 17ms ... + + this 2,2,1,2,2,1...msec 'unpredictable and uneven' relative distance + of events could surprise applications. + +(as a sidenote, current POSIX APIs could be extended with a method of +periodic timers to have an 'average' frequency, where there is no +rounding of the interval. No such API exists at the moment.) + +ktimers - testing and verification +---------------------------------- + +We used the high-resolution timer subsystem ontop of ktimers to verify +the ktimer implementation details in praxis, and we also ran the posix +timer tests in order to ensure specification compliance. + +The ktimer patch converts the following kernel functionality to use +ktimers: + + - nanosleep + - itimers + - posix-timers + +The conversion of nanosleep and posix-timers enabled the unification of +nanosleep and clock_nanosleep. + +The code was successfully compiled for the following platforms: + + i386, x86_64, ARM, PPC, PPC64, IA64 + +The code was run-tested on the following platforms: + + i386(UP/SMP), x86_64(UP/SMP), ARM, PPC + +ktimers were also integrated into the -rt tree, along with a +ktimers-based high-resolution timer implementation, so the ktimers code +got a healthy amount of testing and use in practice. + + Thomas Gleixner, Ingo Molnar Index: linux/Documentation/timekeeping.txt =================================================================== --- /dev/null +++ linux/Documentation/timekeeping.txt @@ -0,0 +1,246 @@ +How timekeeping works with CONFIG_GENERIC_TIME +======================================================================== + +The generic timekeeping code maintains and allows access to the systems understanding of how much time has passed from a certain point. However, in order to measure the passing of time, the generic timekeeping code relies on the clocksource abstraction. A clocksource abstracts a free running counter who's value increases at a known frequency. + +In the generic timekeeping code, we use a pointer to a selected clocksource to measure the passing of time. + +struct clocksource *clock + +The clocksource has some limitations however. Since its likely of fixed width, it will not increment forever and will overflow. In order to still properly keep time, we must occasionally accumulate an interval of time. In the generic timekeeping code, we accumulate the amount of time system the system booted into the value system_time, which keeps nanosecond resolution in a ktime_t storage. + +ktime_t system_time + +Since its likely your system has not been running continually since midnight on the 1st of January in 1970, we must provide an offset from that time in accordance with conventions. This only occasionally changed (via settimeofday()) offset is the wall_time_offset value, which is also stored as a ktime_t. + +ktime_t wall_time_offset + + +Since we accumulate time in intervals, we need a base cycle value that we can use to generate an offset from the time value kept in system_time. We store this value in cycle_last. + +cycle_t cycle_last; + + +Further since all clocks drift somewhat from each other, we use the adjustment values provided via adjtimex() to correct our clocksource frequency for each interval. This frequency adjustment value is stored in ntp_adj. + +long ntp_adj; + +Now that we've covered the core global variables for timekeeping, lets look at how we maintain these values. + +As stated above, we want to avoid the clocksource from overflowing on us, so we accumulate a time interval periodically. This periodic accumulation function is called timeofday_periodic_hook(). In simplified pseudo code, it logically is presented as: + +timeofday_periodic_hook(): + cycle_now = read_clocksource(clock) + cycle_delta = (cycle_now - cycle_last) & clock->mask + nsec = cyc2ns(clock, cycle_delta, ntp_adj) + system_time += nsec + cycle_last = cycle_now + + /* do other stuff */ + +You can see we read the cycle value from the clocksource, calculate a cycle delta for the interval since we last called timeofday_periodic_hook(), convert that cycle delta to a nanosecond interval (for now ignore ntp_adj), add it to the system time and finally set our cycle_last value to cycle_now for the next interval. Using this simple algorithm we can correctly measure and record the passing of time. + +But just storing this info isn't very useful, we also want to make it available to be used elsewhere. So how do we provide a notion of how much time has passed inbetween calls to timeofday_periodic_hook()? + +First, lets create a function that calculates the time since the last call to timeofday_peridoic_hook(). + +get_nsec_offset(): + cycle_now = read_clocksource(clock) + cycle_delta = (cycle_now - cycle_last) & clock->mask + nsec = cyc2ns(clock, cycle_delta, ntp_adj) + return nsec + +Here you can see, we read the clocksource, calculate a cycle interval, and convert that to a nanosecond interval. Just like how it is done in timeofday_periodic_hook! + +Now lets use this function to provide the number of nanoseconds that the system has been running: + +do_monotonic_clock(): + return system_time + get_nsec_offset() + +Here we trivially add the nanosecond offset since the last timeofday_periodic_hook() to the value of system_time which was stored at the last timeofday_periodic_hook(). + +Note that since we use the same method to calculate time intervals, assuming each function is atomic and the clocksource functions as it should, time cannot go backward! + +Now to get the time of day using the standard convention: + +do_gettimeofday(): + return do_monotonic_clock() + wall_time_offset + +We simply add the wall_time_offset, and we have the number of nanoseconds since 1970 began! + + +Of course, in real life, things are not so static. We have to handle a number of dynamic values that may change and affect timekeeping. In order to do these safely, we must only change values in-between intervals. This means the periodic_hook call must handle these changes. + +Since clocksources can be changed while the system is running, we need to check for and possibly switch to using new clocksources in the periodic_hook call. Further, clocksources may change their frequency. Since this must be done only at a safe point, we use the update_callback function pointer (for more details, see "How to write a clocksource driver" below), this too must be done in-between intervals in the periodic_hook call. Finally, since the ntp adjustment made in the cyc2ns conversion is not static, we need to update the ntp state machine and get a calculate a new adjustment value. + +This adds some extra pseudo code to the timeofday_periodic_hook function: + +timeofday_periodic_hook(): + cycle_now = read_clocksource(clock) + cycle_delta = (cycle_now - cycle_last) & clock->mask + nsec = cyc2ns(clock, cycle_delta, ntp_adj) + system_time += nsec + cycle_last = cycle_now + + next = get_next_clocksource() + if (next != clock): + cycle_last = read_clocksource(next) + clock = next + + if (clock->update_callback): + clock->update_callback() + + ntp_advance(nsec) + ppm = ntp_get_ppm_adjustment() + ntp_adj = ppm_to_mult_adj(clock, ppm) + + +Unfortunately, the actual timeofday_periodic_hook code is not as simple as this pseudo code. For performance concerns, much has been done to pre-calculate values and use them repeatedly. Thus be aware that the code in timeofday.c is more complex, however the functional logic is the same. + + +How to port an architecture to GENERIC_TIME +======================================================================== +Porting an architecture to the GENERIC_TIME timekeeping code consists of moving a little bit of code around then deleting a fair amount. It is my hope that this will reduce the arch specific maintenance work around timekeeping. + +Porting an arch usually requires the following steps. + +1. Define CONFIG_GENERIC_TIME in the arches Kconfig +2. Implmenting the following functions + nsec_t read_persistent_clock(void) + void sync_persistent_clock(struct timespec ts) +3. Removing all of the arch specific timekeeping code + do_gettimeofday() + do_settimeofday() + etc +4. Implementing clocksource drivers + See "How to write a clocksource driver" for more details + +The exeptions to the above are: + +5. If the arch is has no continuous clocksource + A) Implement 1-3 in the above list. + B) Define CONFIG_IS_TICK_BASED in arches Kconfig + C) Implement the "long arch_getoffset(void)" function + +6. If the arch supports vsyscall gettimeofday (see x86_64 for reference) + A) Implement 1-4 in the above list + B) Define GENERIC_TIME_VSYSCALL + C) Implement arch_update_vsyscall_gtod() + D) Implement vsyscall gettimeofday (similar to __get_realtime_clock_ts) + E) Implement vread functions for supported clocksources + + + +How to write a clocksource driver. +======================================================================== +First, a quick summary of what a clocksource driver provides. + +Simply put, a clocksource is a abstraction of a free running increasing counter. The abstraction provides the minimal amount of info for that counter to be usable for timekeeping. Those required values are: + 1. It's name + 2. A rating value for selection priority + 3. A read function pointer + 4. A mask value for correct twos-complement subtraction + 5. A mult and shift pair that aproximate the counter frequency + mult/(2^shift) ~= nanoseconds per cycle + +Additionally, there are other optionally set values that allow for advanced functinoality. Those values are: + 6. The update_callback function. + 7. The is_continuous flag. + 8. The vread function pointer + 9. The vdata pointer value + + +Now lets go over these values in detail. + +1. Name. + The clocksource's name should be unique since it is used for both identification as well as for manually overriding the default clocksource selection. The name length must be shorter then 32 characters in order for it to be properly overrided. + +2. Rating value + This rating value is used as a priority value for clocksource selection. It has no direct connection to quality or physical properties of the clocksource, but is to be set and manipulated to guarantee that the best (by no specific metric) clocksource that will provide correct timekeeping is automatically selected. Rating suggestions can be found in include/linux/clocksource.h + +3. Read function pointer + This pointer should point to a function that returns an unsigned increasing cycle value from the clocksource. The value should have a coverage from zero to the maximum cycle value the clocksource can provide. This does not have to be direct hardware value and can also be a software counter. An example of a software counter is the jiffies clocksource. + +4. The mask value + This value should be the largest power of two that is smaller then the maximum cycle value. This allows twos complement subtraction to work on overflow boundary conditions if the max value is less then (cycle_t)-1. So for example, if we have a 16 bit counter (ie: one that loops to zero after 0x0000FFFF), the mask would be 0xFFFF. So then when finding the cycle difference around a overflow, where now = 0x0013 and then = 0xFFEE, we can compute the cycle delta properly using the equation: + delta = (now - then)&mask + delta = (0x0013 - 0xFFEE) & 0xFFFF + delta = 0xFFFF0025 & 0xFFFF /* note the unmasked negative value */ + delta = 0x25 + +5. The mult and shift pair + These 32bit values approximate the nanosecond per cycle frequency of the clocksource using the equation: mult/(2^shift). If you have a khz or hz frequency value, the mult value for a given shift value can be easily calculated using the clocksource_hz2mult() and clocksource_khz2mult() helper functions. When selecting a shift value, it is important to be careful. Larger shift values give a finer precision in the cycle to nanosecond conversion and allows for more exact NTP adjustments. However if you select too large a shift value, the resulting mult value might overflow a cycle_t * mult computation. + + +So if you have a simple hardware counter that does not change frequency, filling in the above should be sufficient for a functional clocksource. But read on for details on implementing a more complex clocksource. + +6. The update_callback function pointer. + If this function pointer is non-NULL, it will be called every periodic hook when it is safe for the clocksource to change its state. This would be necessary in the case where the counter frequency changes, for example. One user of this function pointer is the TSC clocksource. When the TSC frequency changes (which may occur if the cpu changes frequency) we need to notify the clocksource at a safe point where that state may change. Thus, if the TSC has changed frequency we set the new mult/shift values in the update_callback function. + +7. The is_continuous flag. + This flag variable (0 if false, 1 if true) denotes that the clocksource is continuous. This means that it is a purely hardware driven clocksource and is not dependent on any software code to run for it to increment properly. This denotation will be useful in the future when timer ticks may be disabled for long periods of time. Doing so using software clocksources, like the jiffies clocksource, would cause timekeeping problems. + +8. The vread function pointer. + This function pointer points to a user-space accessible function that reads the clocksource. This is used in userspace gettimeofday implementations to improve performance. See the x86-64 TSC clocksource implementation for an example. + +8. The vdata pointer. + This pointer is passed to the vread function pointer in a userspace gettimeofday implementation. Its usage is dependent on the vread implementation, but if the pointer points to data, that data must be readable from userspace. + + +Now lets write a quick clocksource for an imaginary bit of hardware. Here are the specs: + + A 32bit counter can be found at the MMIO address 0xFEEDF000. It runs at 100Mhz. To enable it, the the low bit of the address 0xFEEDF0F0 must be set to one. + +So lets start out an empty cool-counter.c file, and define the clocksource. + +#include +#include +#include + +#define COOL_READ_PTR 0xFEEDF000 +#define COOL_START_PTR 0xFEEDF0F0 + +static __iomem *cool_ptr = COOL_READ_PTR; + +struct clocksource clocksource_cool +{ + .name = "cool", + .rating = 200, /* its a pretty decent clock */ + .mask = 0xFFFFFFFF, /* 32 bits */ + .mult = 0, /*to be computed */ + .shift = 10, +} + + +Now let's write the read function: + +cycle_t cool_counter_read(void) +{ + cycle_t ret = readl(cool_ptr); + return ret; +} + +Finally, lets write the init function: + +void cool_counter_init(void) +{ + __iomem *ptr = COOL_START_PTR; + u32 val; + + /* start the counter */ + val = readl(ptr); + val |= 0x1; + writel(val, ptr); + + /* finish initializing the clocksource */ + clocksource_cool.read = cool_counter_read; + clocksource_cool.mult = clocksource_khz2mult(100000, + clocksource_cool.shift); + + /* register the clocksource */ + register_clocksource(&clocksource_cool); +} +module_init(cool_counter_init); + + +Now wasn't that easy! Index: linux/Makefile =================================================================== --- linux.orig/Makefile +++ linux/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 14 -EXTRAVERSION = +EXTRAVERSION = -rt22 NAME=Affluent Albatross # *DOCUMENTATION* @@ -517,10 +517,14 @@ CFLAGS += $(call add-align,CONFIG_CC_AL CFLAGS += $(call add-align,CONFIG_CC_ALIGN_LOOPS,-loops) CFLAGS += $(call add-align,CONFIG_CC_ALIGN_JUMPS,-jumps) -ifdef CONFIG_FRAME_POINTER -CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) +ifdef CONFIG_MCOUNT +CFLAGS += -pg -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) else -CFLAGS += -fomit-frame-pointer + ifdef CONFIG_FRAME_POINTER + CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) + else + CFLAGS += -fomit-frame-pointer + endif endif ifdef CONFIG_DEBUG_INFO Index: linux/arch/alpha/kernel/time.c =================================================================== --- linux.orig/arch/alpha/kernel/time.c +++ linux/arch/alpha/kernel/time.c @@ -55,10 +55,6 @@ #include "proto.h" #include "irq_impl.h" -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - extern unsigned long wall_jiffies; /* kernel/timer.c */ static int set_rtc_mmss(unsigned long); Index: linux/arch/arm/Kconfig =================================================================== --- linux.orig/arch/arm/Kconfig +++ linux/arch/arm/Kconfig @@ -50,6 +50,10 @@ config UID16 bool default y +config GENERIC_HARDIRQS + bool + default y + config RWSEM_GENERIC_SPINLOCK bool default y @@ -339,18 +343,7 @@ config NR_CPUS depends on SMP default "4" -config PREEMPT - bool "Preemptible Kernel (EXPERIMENTAL)" - depends on EXPERIMENTAL - help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is - under load. - - Say Y here if you are building a kernel for a desktop, embedded - or real-time system. Say N if you are unsure. +source kernel/Kconfig.preempt config NO_IDLE_HZ bool "Dynamic tick timer" Index: linux/arch/arm/boot/compressed/head.S =================================================================== --- linux.orig/arch/arm/boot/compressed/head.S +++ linux/arch/arm/boot/compressed/head.S @@ -718,6 +718,19 @@ memdump: mov r12, r0 mov pc, r10 #endif +#ifdef CONFIG_MCOUNT +/* CONFIG_MCOUNT causes boot header to be built with -pg requiring this + * trampoline + */ + .text + .align 0 + .type mcount %function + .global mcount +mcount: + mov pc, lr @ just return +#endif + + reloc_end: .align Index: linux/arch/arm/boot/compressed/misc.c =================================================================== --- linux.orig/arch/arm/boot/compressed/misc.c +++ linux/arch/arm/boot/compressed/misc.c @@ -199,6 +199,7 @@ static ulg free_mem_ptr_end; #define HEAP_SIZE 0x2000 +#define ZLIB_INFLATE_NO_INFLATE_LOCK #include "../../../../lib/inflate.c" #ifndef STANDALONE_DEBUG Index: linux/arch/arm/common/dmabounce.c =================================================================== --- linux.orig/arch/arm/common/dmabounce.c +++ linux/arch/arm/common/dmabounce.c @@ -403,11 +403,11 @@ dma_map_single(struct device *dev, void BUG_ON(dir == DMA_NONE); - local_irq_save(flags); + raw_local_irq_save(flags); dma_addr = map_single(dev, ptr, size, dir); - local_irq_restore(flags); + raw_local_irq_restore(flags); return dma_addr; } @@ -430,11 +430,11 @@ dma_unmap_single(struct device *dev, dma BUG_ON(dir == DMA_NONE); - local_irq_save(flags); + raw_local_irq_save(flags); unmap_single(dev, dma_addr, size, dir); - local_irq_restore(flags); + raw_local_irq_restore(flags); } int @@ -449,7 +449,7 @@ dma_map_sg(struct device *dev, struct sc BUG_ON(dir == DMA_NONE); - local_irq_save(flags); + raw_local_irq_save(flags); for (i = 0; i < nents; i++, sg++) { struct page *page = sg->page; @@ -461,7 +461,7 @@ dma_map_sg(struct device *dev, struct sc map_single(dev, ptr, length, dir); } - local_irq_restore(flags); + raw_local_irq_restore(flags); return nents; } @@ -478,7 +478,7 @@ dma_unmap_sg(struct device *dev, struct BUG_ON(dir == DMA_NONE); - local_irq_save(flags); + raw_local_irq_save(flags); for (i = 0; i < nents; i++, sg++) { dma_addr_t dma_addr = sg->dma_address; @@ -487,7 +487,7 @@ dma_unmap_sg(struct device *dev, struct unmap_single(dev, dma_addr, length, dir); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void @@ -499,11 +499,11 @@ dma_sync_single_for_cpu(struct device *d dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n", __func__, (void *) dma_addr, size, dir); - local_irq_save(flags); + raw_local_irq_save(flags); sync_single(dev, dma_addr, size, dir); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void @@ -515,11 +515,11 @@ dma_sync_single_for_device(struct device dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n", __func__, (void *) dma_addr, size, dir); - local_irq_save(flags); + raw_local_irq_save(flags); sync_single(dev, dma_addr, size, dir); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void @@ -534,7 +534,7 @@ dma_sync_sg_for_cpu(struct device *dev, BUG_ON(dir == DMA_NONE); - local_irq_save(flags); + raw_local_irq_save(flags); for (i = 0; i < nents; i++, sg++) { dma_addr_t dma_addr = sg->dma_address; @@ -543,7 +543,7 @@ dma_sync_sg_for_cpu(struct device *dev, sync_single(dev, dma_addr, length, dir); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void @@ -558,7 +558,7 @@ dma_sync_sg_for_device(struct device *de BUG_ON(dir == DMA_NONE); - local_irq_save(flags); + raw_local_irq_save(flags); for (i = 0; i < nents; i++, sg++) { dma_addr_t dma_addr = sg->dma_address; @@ -567,7 +567,7 @@ dma_sync_sg_for_device(struct device *de sync_single(dev, dma_addr, length, dir); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } int Index: linux/arch/arm/common/locomo.c =================================================================== --- linux.orig/arch/arm/common/locomo.c +++ linux/arch/arm/common/locomo.c @@ -425,6 +425,12 @@ static struct irqchip locomo_spi_chip = .unmask = locomo_spi_unmask_irq, }; +static DEFINE_IRQ_CHAINED_TYPE(locomo_handler); +static DEFINE_IRQ_CHAINED_TYPE(locomo_key_handler); +static DEFINE_IRQ_CHAINED_TYPE(locomo_gpio_handler); +static DEFINE_IRQ_CHAINED_TYPE(locomo_lt_handler); +static DEFINE_IRQ_CHAINED_TYPE(locomo_spi_handler); + static void locomo_setup_irq(struct locomo *lchip) { int irq; Index: linux/arch/arm/common/sa1111.c =================================================================== --- linux.orig/arch/arm/common/sa1111.c +++ linux/arch/arm/common/sa1111.c @@ -159,11 +159,11 @@ sa1111_irq_handler(unsigned int irq, str for (i = IRQ_SA1111_START; stat0; i++, stat0 >>= 1) if (stat0 & 1) - do_edge_IRQ(i, irq_desc + i, regs); + handle_edge_irq(i, irq_desc + i, regs); for (i = IRQ_SA1111_START + 32; stat1; i++, stat1 >>= 1) if (stat1 & 1) - do_edge_IRQ(i, irq_desc + i, regs); + handle_edge_irq(i, irq_desc + i, regs); /* For level-based interrupts */ desc->chip->unmask(irq); @@ -368,6 +368,8 @@ static struct irqchip sa1111_high_chip = .set_wake = sa1111_wake_highirq, }; +static DEFINE_IRQ_CHAINED_TYPE(sa1111_irq_handler); + static void sa1111_setup_irq(struct sa1111 *sachip) { void __iomem *irqbase = sachip->base + SA1111_INTC; Index: linux/arch/arm/common/time-acorn.c =================================================================== --- linux.orig/arch/arm/common/time-acorn.c +++ linux/arch/arm/common/time-acorn.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -76,7 +77,7 @@ ioc_timer_interrupt(int irq, void *dev_i static struct irqaction ioc_timer_irq = { .name = "timer", - .flags = SA_INTERRUPT, + .flags = SA_INTERRUPT | SA_NODELAY, .handler = ioc_timer_interrupt }; Index: linux/arch/arm/kernel/calls.S =================================================================== --- linux.orig/arch/arm/kernel/calls.S +++ linux/arch/arm/kernel/calls.S @@ -7,11 +7,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * - * This file is included twice in entry-common.S + * NR_syscalls now defined in include/asm-arm/unistd.h - tglx */ -#ifndef NR_syscalls -#define NR_syscalls 328 -#else __syscall_start: /* 0 */ .long sys_restart_syscall @@ -341,4 +338,3 @@ __syscall_end: .rept NR_syscalls - (__syscall_end - __syscall_start) / 4 .long sys_ni_syscall .endr -#endif Index: linux/arch/arm/kernel/dma.c =================================================================== --- linux.orig/arch/arm/kernel/dma.c +++ linux/arch/arm/kernel/dma.c @@ -22,7 +22,7 @@ #include -DEFINE_SPINLOCK(dma_spin_lock); +DEFINE_RAW_SPINLOCK(dma_spin_lock); #if MAX_DMA_CHANNELS > 0 Index: linux/arch/arm/kernel/ecard.c =================================================================== --- linux.orig/arch/arm/kernel/ecard.c +++ linux/arch/arm/kernel/ecard.c @@ -619,7 +619,7 @@ ecard_irqexp_handler(unsigned int irq, s ecard_t *ec = slot_to_ecard(slot); if (ec->claimed) { - struct irqdesc *d = irqdesc + ec->irq; + struct irqdesc *d = irq_desc + ec->irq; /* * this ugly code is so that we can operate a * prioritorising system: @@ -1052,6 +1052,9 @@ ecard_probe(int slot, card_type_t type) return rc; } +static DEFINE_IRQ_CHAINED_TYPE(ecard_irqexp_handler); +static DEFINE_IRQ_CHAINED_TYPE(ecard_irq_handler); + /* * Initialise the expansion card system. * Locate all hardware - interrupt management and @@ -1081,8 +1084,10 @@ static int __init ecard_init(void) irqhw = ecard_probeirqhw(); - set_irq_chained_handler(IRQ_EXPANSIONCARD, - irqhw ? ecard_irqexp_handler : ecard_irq_handler); + if (irqhw) + set_irq_chained_handler(IRQ_EXPANSIONCARD, ecard_irqexp_handler); + else + set_irq_chained_handler(IRQ_EXPANSIONCARD, ecard_irq_handler); ecard_proc_init(); Index: linux/arch/arm/kernel/entry-armv.S =================================================================== --- linux.orig/arch/arm/kernel/entry-armv.S +++ linux/arch/arm/kernel/entry-armv.S @@ -184,7 +184,7 @@ __irq_svc: irq_handler #ifdef CONFIG_PREEMPT ldr r0, [tsk, #TI_FLAGS] @ get flags - tst r0, #_TIF_NEED_RESCHED + tst r0, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED blne svc_preempt preempt_return: ldr r0, [tsk, #TI_PREEMPT] @ read preempt value @@ -211,7 +211,7 @@ svc_preempt: str r7, [tsk, #TI_PREEMPT] @ expects preempt_count == 0 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS - tst r0, #_TIF_NEED_RESCHED + tst r0, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED beq preempt_return @ go again b 1b #endif Index: linux/arch/arm/kernel/entry-common.S =================================================================== --- linux.orig/arch/arm/kernel/entry-common.S +++ linux/arch/arm/kernel/entry-common.S @@ -3,6 +3,8 @@ * * Copyright (C) 2000 Russell King * + * LATENCY_TRACE/mcount support (C) 2005 Timesys john.cooper@timesys.com + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -41,7 +43,7 @@ ret_fast_syscall: fast_work_pending: str r0, [sp, #S_R0+S_OFF]! @ returned r0 work_pending: - tst r1, #_TIF_NEED_RESCHED + tst r1, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED bne work_resched tst r1, #_TIF_NOTIFY_RESUME | _TIF_SIGPENDING beq no_work_pending @@ -52,7 +54,8 @@ work_pending: b no_work_pending work_resched: - bl schedule + bl __schedule + /* * "slow" syscall return path. "why" tells us if this was a real syscall. */ @@ -88,8 +91,6 @@ ENTRY(ret_from_fork) b ret_slow_syscall -#include "calls.S" - /*============================================================================= * SWI handler *----------------------------------------------------------------------------- @@ -288,3 +289,110 @@ sys_mmap2: str r5, [sp, #4] b do_mmap2 #endif + +#ifdef CONFIG_FRAME_POINTER + +#ifdef CONFIG_MCOUNT +/* + * At the point where we are in mcount() we maintain the + * frame of the prologue code and keep the call to mcount() + * out of the stack frame list: + + saved pc <---\ caller of instrumented routine + saved lr | + ip/prev_sp | + fp -----^ | + : | + | + -> saved pc | instrumented routine + | saved lr | + | ip/prev_sp | + | fp ---------/ + | : + | + | mcount + | saved pc + | saved lr + | ip/prev sp + -- fp + r3 + r2 + r1 + sp-> r0 + : + */ + + .text + .align 0 + .type mcount %function + .global mcount + +/* gcc -pg generated FUNCTION_PROLOGUE references mcount() + * and has already created the stack frame invocation for + * the routine we have been called to instrument. We create + * a complete frame nevertheless, as we want to use the same + * call to mcount() from c code. + */ +mcount: + + ldr ip, =mcount_enabled @ leave early, if disabled + ldr ip, [ip] + cmp ip, #0 + moveq pc,lr + + mov ip, sp + stmdb sp!, {r0 - r3, fp, ip, lr, pc} @ create stack frame + + ldr r1, [fp, #-4] @ get lr (the return address + @ of the caller of the + @ instrumented function) + mov r0, lr @ get lr - (the return address + @ of the instrumented function) + + sub fp, ip, #4 @ point fp at this frame + + bl __trace +1: + ldmdb fp, {r0 - r3, fp, sp, pc} @ pop entry frame and return + +#endif + +/* ARM replacement for unsupported gcc __builtin_return_address(n) + * where 0 < n. n == 0 is supported here as well. + * + * Walk up the stack frame until the desired frame is found or a NULL + * fp is encountered, return NULL in the latter case. + * + * Note: it is possible under code optimization for the stack invocation + * of an ancestor function (level N) to be removed before calling a + * descendant function (level N+1). No easy means is available to deduce + * this scenario with the result being [for example] caller_addr(0) when + * called from level N+1 returning level N-1 rather than the expected + * level N. This optimization issue appears isolated to the case of + * a call to a level N+1 routine made at the tail end of a level N + * routine -- the level N frame is deleted and a simple branch is made + * to the level N+1 routine. + */ + + .text + .align 0 + .type arm_return_addr %function + .global arm_return_addr + +arm_return_addr: + mov ip, r0 + mov r0, fp +3: + cmp r0, #0 + beq 1f @ frame list hit end, bail + cmp ip, #0 + beq 2f @ reached desired frame + ldr r0, [r0, #-12] @ else continue, get next fp + sub ip, ip, #1 + b 3b +2: + ldr r0, [r0, #-4] @ get target return address +1: + mov pc, lr + +#endif Index: linux/arch/arm/kernel/fiq.c =================================================================== --- linux.orig/arch/arm/kernel/fiq.c +++ linux/arch/arm/kernel/fiq.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -88,7 +89,7 @@ void set_fiq_handler(void *start, unsign * disable irqs for the duration. Note - these functions are almost * entirely coded in assembly. */ -void __attribute__((naked)) set_fiq_regs(struct pt_regs *regs) +void notrace __attribute__((naked)) set_fiq_regs(struct pt_regs *regs) { register unsigned long tmp; asm volatile ( @@ -106,7 +107,7 @@ void __attribute__((naked)) set_fiq_regs : "r" (®s->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | FIQ_MODE)); } -void __attribute__((naked)) get_fiq_regs(struct pt_regs *regs) +void notrace __attribute__((naked)) get_fiq_regs(struct pt_regs *regs) { register unsigned long tmp; asm volatile ( Index: linux/arch/arm/kernel/init_task.c =================================================================== --- linux.orig/arch/arm/kernel/init_task.c +++ linux/arch/arm/kernel/init_task.c @@ -12,8 +12,8 @@ #include #include -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); Index: linux/arch/arm/kernel/irq.c =================================================================== --- linux.orig/arch/arm/kernel/irq.c +++ linux/arch/arm/kernel/irq.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -38,193 +39,11 @@ #include #include -#include #include -#include #include -/* - * Maximum IRQ count. Currently, this is arbitary. However, it should - * not be set too low to prevent false triggering. Conversely, if it - * is set too high, then you could miss a stuck IRQ. - * - * Maybe we ought to set a timer and re-enable the IRQ at a later time? - */ -#define MAX_IRQ_CNT 100000 - -static int noirqdebug; -static volatile unsigned long irq_err_count; -static DEFINE_SPINLOCK(irq_controller_lock); -static LIST_HEAD(irq_pending); - -struct irqdesc irq_desc[NR_IRQS]; void (*init_arch_irq)(void) __initdata = NULL; -/* - * No architecture-specific irq_finish function defined in arm/arch/irqs.h. - */ -#ifndef irq_finish -#define irq_finish(irq) do { } while (0) -#endif - -/* - * Dummy mask/unmask handler - */ -void dummy_mask_unmask_irq(unsigned int irq) -{ -} - -irqreturn_t no_action(int irq, void *dev_id, struct pt_regs *regs) -{ - return IRQ_NONE; -} - -void do_bad_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) -{ - irq_err_count += 1; - printk(KERN_ERR "IRQ: spurious interrupt %d\n", irq); -} - -static struct irqchip bad_chip = { - .ack = dummy_mask_unmask_irq, - .mask = dummy_mask_unmask_irq, - .unmask = dummy_mask_unmask_irq, -}; - -static struct irqdesc bad_irq_desc = { - .chip = &bad_chip, - .handle = do_bad_IRQ, - .pend = LIST_HEAD_INIT(bad_irq_desc.pend), - .disable_depth = 1, -}; - -#ifdef CONFIG_SMP -void synchronize_irq(unsigned int irq) -{ - struct irqdesc *desc = irq_desc + irq; - - while (desc->running) - barrier(); -} -EXPORT_SYMBOL(synchronize_irq); - -#define smp_set_running(desc) do { desc->running = 1; } while (0) -#define smp_clear_running(desc) do { desc->running = 0; } while (0) -#else -#define smp_set_running(desc) do { } while (0) -#define smp_clear_running(desc) do { } while (0) -#endif - -/** - * disable_irq_nosync - disable an irq without waiting - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Enables and disables - * are nested. We do this lazily. - * - * This function may be called from IRQ context. - */ -void disable_irq_nosync(unsigned int irq) -{ - struct irqdesc *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&irq_controller_lock, flags); - desc->disable_depth++; - list_del_init(&desc->pend); - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -EXPORT_SYMBOL(disable_irq_nosync); - -/** - * disable_irq - disable an irq and wait for completion - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Enables and disables - * are nested. This functions waits for any pending IRQ - * handlers for this interrupt to complete before returning. - * If you use this function while holding a resource the IRQ - * handler may need you will deadlock. - * - * This function may be called - with care - from IRQ context. - */ -void disable_irq(unsigned int irq) -{ - struct irqdesc *desc = irq_desc + irq; - - disable_irq_nosync(irq); - if (desc->action) - synchronize_irq(irq); -} -EXPORT_SYMBOL(disable_irq); - -/** - * enable_irq - enable interrupt handling on an irq - * @irq: Interrupt to enable - * - * Re-enables the processing of interrupts on this IRQ line. - * Note that this may call the interrupt handler, so you may - * get unexpected results if you hold IRQs disabled. - * - * This function may be called from IRQ context. - */ -void enable_irq(unsigned int irq) -{ - struct irqdesc *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&irq_controller_lock, flags); - if (unlikely(!desc->disable_depth)) { - printk("enable_irq(%u) unbalanced from %p\n", irq, - __builtin_return_address(0)); - } else if (!--desc->disable_depth) { - desc->probing = 0; - desc->chip->unmask(irq); - - /* - * If the interrupt is waiting to be processed, - * try to re-run it. We can't directly run it - * from here since the caller might be in an - * interrupt-protected region. - */ - if (desc->pending && list_empty(&desc->pend)) { - desc->pending = 0; - if (!desc->chip->retrigger || - desc->chip->retrigger(irq)) - list_add(&desc->pend, &irq_pending); - } - } - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -EXPORT_SYMBOL(enable_irq); - -/* - * Enable wake on selected irq - */ -void enable_irq_wake(unsigned int irq) -{ - struct irqdesc *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&irq_controller_lock, flags); - if (desc->chip->set_wake) - desc->chip->set_wake(irq, 1); - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -EXPORT_SYMBOL(enable_irq_wake); - -void disable_irq_wake(unsigned int irq) -{ - struct irqdesc *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&irq_controller_lock, flags); - if (desc->chip->set_wake) - desc->chip->set_wake(irq, 0); - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -EXPORT_SYMBOL(disable_irq_wake); - int show_interrupts(struct seq_file *p, void *v) { int i = *(loff_t *) v, cpu; @@ -243,7 +62,7 @@ int show_interrupts(struct seq_file *p, } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_controller_lock, flags); + spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto unlock; @@ -257,7 +76,7 @@ int show_interrupts(struct seq_file *p, seq_putc(p, '\n'); unlock: - spin_unlock_irqrestore(&irq_controller_lock, flags); + spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { #ifdef CONFIG_ARCH_ACORN show_fiq_list(p, v); @@ -265,374 +84,83 @@ unlock: #ifdef CONFIG_SMP show_ipi_list(p); #endif +#ifdef FIXME_TGLX seq_printf(p, "Err: %10lu\n", irq_err_count); - } - return 0; -} - -/* - * IRQ lock detection. - * - * Hopefully, this should get us out of a few locked situations. - * However, it may take a while for this to happen, since we need - * a large number if IRQs to appear in the same jiffie with the - * same instruction pointer (or within 2 instructions). - */ -static int check_irq_lock(struct irqdesc *desc, int irq, struct pt_regs *regs) -{ - unsigned long instr_ptr = instruction_pointer(regs); - - if (desc->lck_jif == jiffies && - desc->lck_pc >= instr_ptr && desc->lck_pc < instr_ptr + 8) { - desc->lck_cnt += 1; - - if (desc->lck_cnt > MAX_IRQ_CNT) { - printk(KERN_ERR "IRQ LOCK: IRQ%d is locking the system, disabled\n", irq); - return 1; - } - } else { - desc->lck_cnt = 0; - desc->lck_pc = instruction_pointer(regs); - desc->lck_jif = jiffies; - } - return 0; -} - -static void -report_bad_irq(unsigned int irq, struct pt_regs *regs, struct irqdesc *desc, int ret) -{ - static int count = 100; - struct irqaction *action; - - if (!count || noirqdebug) - return; - - count--; - - if (ret != IRQ_HANDLED && ret != IRQ_NONE) { - printk("irq%u: bogus retval mask %x\n", irq, ret); - } else { - printk("irq%u: nobody cared\n", irq); - } - show_regs(regs); - dump_stack(); - printk(KERN_ERR "handlers:"); - action = desc->action; - do { - printk("\n" KERN_ERR "[<%p>]", action->handler); - print_symbol(" (%s)", (unsigned long)action->handler); - action = action->next; - } while (action); - printk("\n"); -} - -static int -__do_irq(unsigned int irq, struct irqaction *action, struct pt_regs *regs) -{ - unsigned int status; - int ret, retval = 0; - - spin_unlock(&irq_controller_lock); - -#ifdef CONFIG_NO_IDLE_HZ - if (!(action->flags & SA_TIMER) && system_timer->dyn_tick != NULL) { - write_seqlock(&xtime_lock); - if (system_timer->dyn_tick->state & DYN_TICK_ENABLED) - system_timer->dyn_tick->handler(irq, 0, regs); - write_sequnlock(&xtime_lock); - } #endif - - if (!(action->flags & SA_INTERRUPT)) - local_irq_enable(); - - status = 0; - do { - ret = action->handler(irq, action->dev_id, regs); - if (ret == IRQ_HANDLED) - status |= action->flags; - retval |= ret; - action = action->next; - } while (action); - - if (status & SA_SAMPLE_RANDOM) - add_interrupt_randomness(irq); - - spin_lock_irq(&irq_controller_lock); - - return retval; -} - -/* - * This is for software-decoded IRQs. The caller is expected to - * handle the ack, clear, mask and unmask issues. - */ -void -do_simple_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) -{ - struct irqaction *action; - const unsigned int cpu = smp_processor_id(); - - desc->triggered = 1; - - kstat_cpu(cpu).irqs[irq]++; - - smp_set_running(desc); - - action = desc->action; - if (action) { - int ret = __do_irq(irq, action, regs); - if (ret != IRQ_HANDLED) - report_bad_irq(irq, regs, desc, ret); - } - - smp_clear_running(desc); -} - -/* - * Most edge-triggered IRQ implementations seem to take a broken - * approach to this. Hence the complexity. - */ -void -do_edge_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) -{ - const unsigned int cpu = smp_processor_id(); - - desc->triggered = 1; - - /* - * If we're currently running this IRQ, or its disabled, - * we shouldn't process the IRQ. Instead, turn on the - * hardware masks. - */ - if (unlikely(desc->running || desc->disable_depth)) - goto running; - - /* - * Acknowledge and clear the IRQ, but don't mask it. - */ - desc->chip->ack(irq); - - /* - * Mark the IRQ currently in progress. - */ - desc->running = 1; - - kstat_cpu(cpu).irqs[irq]++; - - do { - struct irqaction *action; - - action = desc->action; - if (!action) - break; - - if (desc->pending && !desc->disable_depth) { - desc->pending = 0; - desc->chip->unmask(irq); - } - - __do_irq(irq, action, regs); - } while (desc->pending && !desc->disable_depth); - - desc->running = 0; - - /* - * If we were disabled or freed, shut down the handler. - */ - if (likely(desc->action && !check_irq_lock(desc, irq, regs))) - return; - - running: - /* - * We got another IRQ while this one was masked or - * currently running. Delay it. - */ - desc->pending = 1; - desc->chip->mask(irq); - desc->chip->ack(irq); -} - -/* - * Level-based IRQ handler. Nice and simple. - */ -void -do_level_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) -{ - struct irqaction *action; - const unsigned int cpu = smp_processor_id(); - - desc->triggered = 1; - - /* - * Acknowledge, clear _AND_ disable the interrupt. - */ - desc->chip->ack(irq); - - if (likely(!desc->disable_depth)) { - kstat_cpu(cpu).irqs[irq]++; - - smp_set_running(desc); - - /* - * Return with this interrupt masked if no action - */ - action = desc->action; - if (action) { - int ret = __do_irq(irq, desc->action, regs); - - if (ret != IRQ_HANDLED) - report_bad_irq(irq, regs, desc, ret); - - if (likely(!desc->disable_depth && - !check_irq_lock(desc, irq, regs))) - desc->chip->unmask(irq); - } - - smp_clear_running(desc); } + return 0; } -static void do_pending_irqs(struct pt_regs *regs) -{ - struct list_head head, *l, *n; - - do { - struct irqdesc *desc; - - /* - * First, take the pending interrupts off the list. - * The act of calling the handlers may add some IRQs - * back onto the list. - */ - head = irq_pending; - INIT_LIST_HEAD(&irq_pending); - head.next->prev = &head; - head.prev->next = &head; - - /* - * Now run each entry. We must delete it from our - * list before calling the handler. - */ - list_for_each_safe(l, n, &head) { - desc = list_entry(l, struct irqdesc, pend); - list_del_init(&desc->pend); - desc_handle_irq(desc - irq_desc, desc, regs); - } - - /* - * The list must be empty. - */ - BUG_ON(!list_empty(&head)); - } while (!list_empty(&irq_pending)); -} +/* Handle bad interrupts */ +static struct irq_desc bad_irq = { + .handler = &no_irq_type, + .lock = RAW_SPIN_LOCK_UNLOCKED +}; /* - * do_IRQ handles all hardware IRQ's. Decoded IRQs should not + * asm_do_IRQ handles all hardware IRQ's. Decoded IRQs should not * come via this function. Instead, they should provide their * own 'handler' */ -asmlinkage void asm_do_IRQ(unsigned int irq, struct pt_regs *regs) +asmlinkage notrace void asm_do_IRQ(unsigned int irq, struct pt_regs *regs) { struct irqdesc *desc = irq_desc + irq; + trace_special(instruction_pointer(regs), irq, 0); + /* * Some hardware gives randomly wrong interrupts. Rather * than crashing, do something sensible. */ if (irq >= NR_IRQS) - desc = &bad_irq_desc; + desc = &bad_irq; irq_enter(); - spin_lock(&irq_controller_lock); - desc_handle_irq(irq, desc, regs); - - /* - * Now re-run any pending interrupts. - */ - if (!list_empty(&irq_pending)) - do_pending_irqs(regs); - irq_finish(irq); + desc_handle_irq(irq, desc, regs); - spin_unlock(&irq_controller_lock); irq_exit(); } -void __set_irq_handler(unsigned int irq, irq_handler_t handle, int is_chained) +void __set_irq_handler(unsigned int irq, struct irq_type *type, int is_chained) { struct irqdesc *desc; unsigned long flags; if (irq >= NR_IRQS) { - printk(KERN_ERR "Trying to install handler for IRQ%d\n", irq); + printk(KERN_ERR "Trying to install type control for IRQ%d\n", irq); return; } - if (handle == NULL) - handle = do_bad_IRQ; - desc = irq_desc + irq; - if (is_chained && desc->chip == &bad_chip) - printk(KERN_WARNING "Trying to install chained handler for IRQ%d\n", irq); - - spin_lock_irqsave(&irq_controller_lock, flags); - if (handle == do_bad_IRQ) { - desc->chip->mask(irq); - desc->chip->ack(irq); - desc->disable_depth = 1; - } - desc->handle = handle; - if (handle != do_bad_IRQ && is_chained) { - desc->valid = 0; - desc->probe_ok = 0; - desc->disable_depth = 0; - desc->chip->unmask(irq); + /* Uninstall ? */ + if (type == NULL || type == &no_irq_type) { + spin_lock_irqsave(&desc->lock, flags); + if (desc->chip) { + desc->chip->mask(irq); + desc->chip->ack(irq); + } + desc->depth = 1; + spin_unlock_irqrestore(&desc->lock, flags); } - spin_unlock_irqrestore(&irq_controller_lock, flags); -} - -void set_irq_chip(unsigned int irq, struct irqchip *chip) -{ - struct irqdesc *desc; - unsigned long flags; - if (irq >= NR_IRQS) { - printk(KERN_ERR "Trying to install chip for IRQ%d\n", irq); + /* Install the irq_type */ + if (generic_set_irq_type(irq, type)) return; - } - - if (chip == NULL) - chip = &bad_chip; - - desc = irq_desc + irq; - spin_lock_irqsave(&irq_controller_lock, flags); - desc->chip = chip; - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -int set_irq_type(unsigned int irq, unsigned int type) -{ - struct irqdesc *desc; - unsigned long flags; - int ret = -ENXIO; + spin_lock_irqsave(&desc->lock, flags); + if (is_chained && (desc->handler == &no_irq_type || !desc->chip)) + printk(KERN_WARNING "Trying to install chained interrupt type for IRQ%d\n", irq); - if (irq >= NR_IRQS) { - printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq); - return -ENODEV; - } - - desc = irq_desc + irq; - if (desc->chip->set_type) { - spin_lock_irqsave(&irq_controller_lock, flags); - ret = desc->chip->set_type(irq, type); - spin_unlock_irqrestore(&irq_controller_lock, flags); + if (type != NULL && is_chained) { + desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE; + desc->depth = 0; + if (desc->chip) + desc->chip->unmask(irq); } - - return ret; + spin_unlock_irqrestore(&desc->lock, flags); } -EXPORT_SYMBOL(set_irq_type); void set_irq_flags(unsigned int irq, unsigned int iflags) { @@ -645,408 +173,28 @@ void set_irq_flags(unsigned int irq, uns } desc = irq_desc + irq; - spin_lock_irqsave(&irq_controller_lock, flags); - desc->valid = (iflags & IRQF_VALID) != 0; - desc->probe_ok = (iflags & IRQF_PROBE) != 0; - desc->noautoenable = (iflags & IRQF_NOAUTOEN) != 0; - spin_unlock_irqrestore(&irq_controller_lock, flags); -} - -int setup_irq(unsigned int irq, struct irqaction *new) -{ - int shared = 0; - struct irqaction *old, **p; - unsigned long flags; - struct irqdesc *desc; - - /* - * Some drivers like serial.c use request_irq() heavily, - * so we have to be careful not to interfere with a - * running system. - */ - if (new->flags & SA_SAMPLE_RANDOM) { - /* - * This function might sleep, we want to call it first, - * outside of the atomic block. - * Yes, this might clear the entropy pool if the wrong - * driver is attempted to be loaded, without actually - * installing a new handler, but is this really a problem, - * only the sysadmin is able to do this. - */ - rand_initialize_irq(irq); - } - - /* - * The following block of code has to be executed atomically - */ - desc = irq_desc + irq; - spin_lock_irqsave(&irq_controller_lock, flags); - p = &desc->action; - if ((old = *p) != NULL) { - /* Can't share interrupts unless both agree to */ - if (!(old->flags & new->flags & SA_SHIRQ)) { - spin_unlock_irqrestore(&irq_controller_lock, flags); - return -EBUSY; - } - - /* add new interrupt at end of irq queue */ - do { - p = &old->next; - old = *p; - } while (old); - shared = 1; - } - - *p = new; - - if (!shared) { - desc->probing = 0; - desc->running = 0; - desc->pending = 0; - desc->disable_depth = 1; - if (!desc->noautoenable) { - desc->disable_depth = 0; - desc->chip->unmask(irq); - } - } - - spin_unlock_irqrestore(&irq_controller_lock, flags); - return 0; -} - -/** - * request_irq - allocate an interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. From the point this - * call is made your handler function may be invoked. Since - * your handler function must clear any interrupt the board - * raises, you must take care both to initialise your hardware - * and to set up the interrupt handler in the right order. - * - * Dev_id must be globally unique. Normally the address of the - * device data structure is used as the cookie. Since the handler - * receives this value it makes sense to use it. - * - * If your interrupt is shared you must pass a non NULL dev_id - * as this is required when freeing the interrupt. - * - * Flags: - * - * SA_SHIRQ Interrupt is shared - * - * SA_INTERRUPT Disable local interrupts while processing - * - * SA_SAMPLE_RANDOM The interrupt can be used for entropy - * - */ -int request_irq(unsigned int irq, irqreturn_t (*handler)(int, void *, struct pt_regs *), - unsigned long irq_flags, const char * devname, void *dev_id) -{ - unsigned long retval; - struct irqaction *action; - - if (irq >= NR_IRQS || !irq_desc[irq].valid || !handler || - (irq_flags & SA_SHIRQ && !dev_id)) - return -EINVAL; - - action = (struct irqaction *)kmalloc(sizeof(struct irqaction), GFP_KERNEL); - if (!action) - return -ENOMEM; - - action->handler = handler; - action->flags = irq_flags; - cpus_clear(action->mask); - action->name = devname; - action->next = NULL; - action->dev_id = dev_id; - - retval = setup_irq(irq, action); - - if (retval) - kfree(action); - return retval; -} - -EXPORT_SYMBOL(request_irq); - -/** - * free_irq - free an interrupt - * @irq: Interrupt line to free - * @dev_id: Device identity to free - * - * Remove an interrupt handler. The handler is removed and if the - * interrupt line is no longer in use by any driver it is disabled. - * On a shared IRQ the caller must ensure the interrupt is disabled - * on the card it drives before calling this function. - * - * This function must not be called from interrupt context. - */ -void free_irq(unsigned int irq, void *dev_id) -{ - struct irqaction * action, **p; - unsigned long flags; - - if (irq >= NR_IRQS || !irq_desc[irq].valid) { - printk(KERN_ERR "Trying to free IRQ%d\n",irq); - dump_stack(); - return; - } - - spin_lock_irqsave(&irq_controller_lock, flags); - for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) { - if (action->dev_id != dev_id) - continue; - - /* Found it - now free it */ - *p = action->next; - break; - } - spin_unlock_irqrestore(&irq_controller_lock, flags); - - if (!action) { - printk(KERN_ERR "Trying to free free IRQ%d\n",irq); - dump_stack(); - } else { - synchronize_irq(irq); - kfree(action); - } -} - -EXPORT_SYMBOL(free_irq); - -static DECLARE_MUTEX(probe_sem); - -/* Start the interrupt probing. Unlike other architectures, - * we don't return a mask of interrupts from probe_irq_on, - * but return the number of interrupts enabled for the probe. - * The interrupts which have been enabled for probing is - * instead recorded in the irq_desc structure. - */ -unsigned long probe_irq_on(void) -{ - unsigned int i, irqs = 0; - unsigned long delay; - - down(&probe_sem); - - /* - * first snaffle up any unassigned but - * probe-able interrupts - */ - spin_lock_irq(&irq_controller_lock); - for (i = 0; i < NR_IRQS; i++) { - if (!irq_desc[i].probe_ok || irq_desc[i].action) - continue; - - irq_desc[i].probing = 1; - irq_desc[i].triggered = 0; - if (irq_desc[i].chip->set_type) - irq_desc[i].chip->set_type(i, IRQT_PROBE); - irq_desc[i].chip->unmask(i); - irqs += 1; - } - spin_unlock_irq(&irq_controller_lock); - - /* - * wait for spurious interrupts to mask themselves out again - */ - for (delay = jiffies + HZ/10; time_before(jiffies, delay); ) - /* min 100ms delay */; - - /* - * now filter out any obviously spurious interrupts - */ - spin_lock_irq(&irq_controller_lock); - for (i = 0; i < NR_IRQS; i++) { - if (irq_desc[i].probing && irq_desc[i].triggered) { - irq_desc[i].probing = 0; - irqs -= 1; - } - } - spin_unlock_irq(&irq_controller_lock); - - return irqs; -} - -EXPORT_SYMBOL(probe_irq_on); - -unsigned int probe_irq_mask(unsigned long irqs) -{ - unsigned int mask = 0, i; - - spin_lock_irq(&irq_controller_lock); - for (i = 0; i < 16 && i < NR_IRQS; i++) - if (irq_desc[i].probing && irq_desc[i].triggered) - mask |= 1 << i; - spin_unlock_irq(&irq_controller_lock); - - up(&probe_sem); - - return mask; -} -EXPORT_SYMBOL(probe_irq_mask); - -/* - * Possible return values: - * >= 0 - interrupt number - * -1 - no interrupt/many interrupts - */ -int probe_irq_off(unsigned long irqs) -{ - unsigned int i; - int irq_found = NO_IRQ; - - /* - * look at the interrupts, and find exactly one - * that we were probing has been triggered - */ - spin_lock_irq(&irq_controller_lock); - for (i = 0; i < NR_IRQS; i++) { - if (irq_desc[i].probing && - irq_desc[i].triggered) { - if (irq_found != NO_IRQ) { - irq_found = NO_IRQ; - goto out; - } - irq_found = i; - } - } - - if (irq_found == -1) - irq_found = NO_IRQ; -out: - spin_unlock_irq(&irq_controller_lock); - - up(&probe_sem); - - return irq_found; -} - -EXPORT_SYMBOL(probe_irq_off); - -#ifdef CONFIG_SMP -static void route_irq(struct irqdesc *desc, unsigned int irq, unsigned int cpu) -{ - pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->cpu, cpu); - - spin_lock_irq(&irq_controller_lock); - desc->cpu = cpu; - desc->chip->set_cpu(desc, irq, cpu); - spin_unlock_irq(&irq_controller_lock); -} - -#ifdef CONFIG_PROC_FS -static int -irq_affinity_read_proc(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct irqdesc *desc = irq_desc + ((int)data); - int len = cpumask_scnprintf(page, count, desc->affinity); - - if (count - len < 2) - return -EINVAL; - page[len++] = '\n'; - page[len] = '\0'; - - return len; -} - -static int -irq_affinity_write_proc(struct file *file, const char __user *buffer, - unsigned long count, void *data) -{ - unsigned int irq = (unsigned int)data; - struct irqdesc *desc = irq_desc + irq; - cpumask_t affinity, tmp; - int ret = -EIO; - - if (!desc->chip->set_cpu) - goto out; - - ret = cpumask_parse(buffer, count, affinity); - if (ret) - goto out; - - cpus_and(tmp, affinity, cpu_online_map); - if (cpus_empty(tmp)) { - ret = -EINVAL; - goto out; - } - - desc->affinity = affinity; - route_irq(desc, irq, first_cpu(tmp)); - ret = count; - - out: - return ret; -} -#endif -#endif - -void __init init_irq_proc(void) -{ -#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS) - struct proc_dir_entry *dir; - int irq; - - dir = proc_mkdir("irq", 0); - if (!dir) - return; - - for (irq = 0; irq < NR_IRQS; irq++) { - struct proc_dir_entry *entry; - struct irqdesc *desc; - char name[16]; - - desc = irq_desc + irq; - memset(name, 0, sizeof(name)); - snprintf(name, sizeof(name) - 1, "%u", irq); - - desc->procdir = proc_mkdir(name, dir); - if (!desc->procdir) - continue; - - entry = create_proc_entry("smp_affinity", 0600, desc->procdir); - if (entry) { - entry->nlink = 1; - entry->data = (void *)irq; - entry->read_proc = irq_affinity_read_proc; - entry->write_proc = irq_affinity_write_proc; - } - } -#endif + spin_lock_irqsave(&desc->lock, flags); + desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE; + if (iflags & IRQF_VALID) + desc->status &= ~IRQ_NOREQUEST; + if (iflags & IRQF_PROBE) + desc->status &= ~IRQ_NOPROBE; + spin_unlock_irqrestore(&desc->lock, flags); } void __init init_IRQ(void) { - struct irqdesc *desc; extern void init_dma(void); int irq; + for (irq = 0; irq < NR_IRQS; irq++) + irq_desc[irq].status |= IRQ_NOREQUEST; + #ifdef CONFIG_SMP bad_irq_desc.affinity = CPU_MASK_ALL; bad_irq_desc.cpu = smp_processor_id(); #endif - for (irq = 0, desc = irq_desc; irq < NR_IRQS; irq++, desc++) { - *desc = bad_irq_desc; - INIT_LIST_HEAD(&desc->pend); - } - init_arch_irq(); init_dma(); } - -static int __init noirqdebug_setup(char *str) -{ - noirqdebug = 1; - return 1; -} - -__setup("noirqdebug", noirqdebug_setup); Index: linux/arch/arm/kernel/process.c =================================================================== --- linux.orig/arch/arm/kernel/process.c +++ linux/arch/arm/kernel/process.c @@ -85,12 +85,12 @@ EXPORT_SYMBOL(pm_power_off); */ void default_idle(void) { - local_irq_disable(); + raw_local_irq_disable(); if (!need_resched() && !hlt_counter) { timer_dyn_reprogram(); arch_idle(); } - local_irq_enable(); + raw_local_irq_enable(); } /* @@ -112,8 +112,8 @@ void cpu_idle(void) while (!need_resched()) idle(); leds_event(led_idle_end); - preempt_enable(); - schedule(); + __preempt_enable_no_resched(); + __schedule(); } } Index: linux/arch/arm/kernel/semaphore.c =================================================================== --- linux.orig/arch/arm/kernel/semaphore.c +++ linux/arch/arm/kernel/semaphore.c @@ -49,14 +49,14 @@ * we cannot lose wakeup events. */ -void __up(struct semaphore *sem) +fastcall void __attribute_used__ __compat_up(struct compat_semaphore *sem) { wake_up(&sem->wait); } static DEFINE_SPINLOCK(semaphore_lock); -void __sched __down(struct semaphore * sem) +fastcall void __attribute_used__ __sched __compat_down(struct compat_semaphore * sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -89,7 +89,7 @@ void __sched __down(struct semaphore * s wake_up(&sem->wait); } -int __sched __down_interruptible(struct semaphore * sem) +fastcall int __attribute_used__ __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -148,7 +148,7 @@ int __sched __down_interruptible(struct * single "cmpxchg" without failure cases, * but then it wouldn't work on a 386. */ -int __down_trylock(struct semaphore * sem) +fastcall int __attribute_used__ __compat_down_trylock(struct compat_semaphore * sem) { int sleepers; unsigned long flags; @@ -168,6 +168,11 @@ int __down_trylock(struct semaphore * se return 1; } +fastcall int compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} + /* * The semaphore operations have a special calling sequence that * allow us to do a simpler in-line version of them. These routines @@ -184,7 +189,7 @@ asm(" .section .sched.text,\"ax\",%progb __down_failed: \n\ stmfd sp!, {r0 - r3, lr} \n\ mov r0, ip \n\ - bl __down \n\ + bl __compat_down \n\ ldmfd sp!, {r0 - r3, pc} \n\ \n\ .align 5 \n\ @@ -192,7 +197,7 @@ __down_failed: \n\ __down_interruptible_failed: \n\ stmfd sp!, {r0 - r3, lr} \n\ mov r0, ip \n\ - bl __down_interruptible \n\ + bl __compat_down_interruptible \n\ mov ip, r0 \n\ ldmfd sp!, {r0 - r3, pc} \n\ \n\ @@ -201,7 +206,7 @@ __down_interruptible_failed: \n\ __down_trylock_failed: \n\ stmfd sp!, {r0 - r3, lr} \n\ mov r0, ip \n\ - bl __down_trylock \n\ + bl __compat_down_trylock \n\ mov ip, r0 \n\ ldmfd sp!, {r0 - r3, pc} \n\ \n\ @@ -210,7 +215,7 @@ __down_trylock_failed: \n\ __up_wakeup: \n\ stmfd sp!, {r0 - r3, lr} \n\ mov r0, ip \n\ - bl __up \n\ + bl __compat_up \n\ ldmfd sp!, {r0 - r3, pc} \n\ "); Index: linux/arch/arm/kernel/signal.c =================================================================== --- linux.orig/arch/arm/kernel/signal.c +++ linux/arch/arm/kernel/signal.c @@ -689,6 +689,14 @@ static int do_signal(sigset_t *oldset, s siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + raw_local_irq_enable(); + preempt_check_resched(); +#endif + /* * We want the common case to go fast, which * is why we may in certain cases get here from Index: linux/arch/arm/kernel/smp.c =================================================================== --- linux.orig/arch/arm/kernel/smp.c +++ linux/arch/arm/kernel/smp.c @@ -56,6 +56,7 @@ struct ipi_data { unsigned long bits; }; +/* FIXME */ static DEFINE_PER_CPU(struct ipi_data, ipi_data) = { .lock = SPIN_LOCK_UNLOCKED, }; @@ -246,7 +247,7 @@ static void send_ipi_message(cpumask_t c unsigned long flags; unsigned int cpu; - local_irq_save(flags); + raw_local_irq_save(flags); for_each_cpu_mask(cpu, callmap) { struct ipi_data *ipi = &per_cpu(ipi_data, cpu); @@ -261,7 +262,7 @@ static void send_ipi_message(cpumask_t c */ smp_cross_call(callmap); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -394,7 +395,7 @@ static void ipi_call_function(unsigned i cpu_clear(cpu, data->unfinished); } -static DEFINE_SPINLOCK(stop_lock); +static DEFINE_RAW_SPINLOCK(stop_lock); /* * ipi_cpu_stop - handle IPI from smp_send_stop() @@ -409,7 +410,7 @@ static void ipi_cpu_stop(unsigned int cp cpu_clear(cpu, cpu_online_map); local_fiq_disable(); - local_irq_disable(); + raw_local_irq_disable(); while (1) cpu_relax(); Index: linux/arch/arm/kernel/time.c =================================================================== --- linux.orig/arch/arm/kernel/time.c +++ linux/arch/arm/kernel/time.c @@ -36,10 +36,6 @@ #include #include -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - /* * Our system timer. */ Index: linux/arch/arm/kernel/traps.c =================================================================== --- linux.orig/arch/arm/kernel/traps.c +++ linux/arch/arm/kernel/traps.c @@ -177,6 +177,8 @@ void dump_stack(void) { #ifdef CONFIG_DEBUG_ERRORS __backtrace(); + print_traces(current); + show_held_locks(current); #endif } @@ -198,7 +200,7 @@ void show_stack(struct task_struct *tsk, barrier(); } -DEFINE_SPINLOCK(die_lock); +DEFINE_RAW_SPINLOCK(die_lock); /* * This function is protected against re-entrancy. @@ -244,7 +246,7 @@ void notify_die(const char *str, struct } static LIST_HEAD(undef_hook); -static DEFINE_SPINLOCK(undef_lock); +static DEFINE_RAW_SPINLOCK(undef_lock); void register_undef_hook(struct undef_hook *hook) { @@ -336,7 +338,7 @@ asmlinkage void bad_mode(struct pt_regs handler[reason], processor_modes[proc_mode]); die("Oops - bad mode", regs, 0); - local_irq_disable(); + raw_local_irq_disable(); panic("bad mode"); } Index: linux/arch/arm/mach-clps711x/p720t-leds.c =================================================================== --- linux.orig/arch/arm/mach-clps711x/p720t-leds.c +++ linux/arch/arm/mach-clps711x/p720t-leds.c @@ -36,7 +36,7 @@ static void p720t_leds_event(led_event_t unsigned long flags; u32 pddr; - local_irq_save(flags); + raw_local_irq_save(flags); switch(ledevt) { case led_idle_start: break; @@ -53,7 +53,7 @@ static void p720t_leds_event(led_event_t break; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } static int __init leds_init(void) Index: linux/arch/arm/mach-clps711x/time.c =================================================================== --- linux.orig/arch/arm/mach-clps711x/time.c +++ linux/arch/arm/mach-clps711x/time.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include Index: linux/arch/arm/mach-clps7500/core.c =================================================================== --- linux.orig/arch/arm/mach-clps7500/core.c +++ linux/arch/arm/mach-clps7500/core.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include Index: linux/arch/arm/mach-ebsa110/core.c =================================================================== --- linux.orig/arch/arm/mach-ebsa110/core.c +++ linux/arch/arm/mach-ebsa110/core.c @@ -56,14 +56,14 @@ static void __init ebsa110_init_irq(void unsigned long flags; unsigned int irq; - local_irq_save(flags); + raw_local_irq_save(flags); __raw_writeb(0xff, IRQ_MCLR); __raw_writeb(0x55, IRQ_MSET); __raw_writeb(0x00, IRQ_MSET); if (__raw_readb(IRQ_MASK) != 0x55) while (1); __raw_writeb(0xff, IRQ_MCLR); /* clear all interrupt enables */ - local_irq_restore(flags); + raw_local_irq_restore(flags); for (irq = 0; irq < NR_IRQS; irq++) { set_irq_chip(irq, &ebsa110_irq_chip); Index: linux/arch/arm/mach-footbridge/dc21285-timer.c =================================================================== --- linux.orig/arch/arm/mach-footbridge/dc21285-timer.c +++ linux/arch/arm/mach-footbridge/dc21285-timer.c @@ -6,6 +6,7 @@ */ #include #include +#include #include Index: linux/arch/arm/mach-footbridge/isa-irq.c =================================================================== --- linux.orig/arch/arm/mach-footbridge/isa-irq.c +++ linux/arch/arm/mach-footbridge/isa-irq.c @@ -102,6 +102,17 @@ static struct irqaction irq_cascade = { static struct resource pic1_resource = { "pic1", 0x20, 0x3f }; static struct resource pic2_resource = { "pic2", 0xa0, 0xbf }; +static DEFINE_IRQ_CHAINED_TYPE(isa_irq_handler); + +static unsigned int startup_irq_disabled(unsigned int irq) +{ + return 0; +} + +/* Interrupt type for irqs which must not be + * automatically enabled in reqeust_irq */ +static struct irq_type level_type_nostart; + void __init isa_init_irq(unsigned int host_irq) { unsigned int irq; @@ -159,9 +170,11 @@ void __init isa_init_irq(unsigned int ho * There appears to be a missing pull-up * resistor on this line. */ - if (machine_is_netwinder()) - set_irq_flags(_ISA_IRQ(11), IRQF_VALID | - IRQF_PROBE | IRQF_NOAUTOEN); + if (machine_is_netwinder()) { + level_type_nostart = default_level_type; + level_type_nostart.startup = startup_irq_disabled; + set_irq_handler(_ISA_IRQ(11), &level_type_nostart); + } } } Index: linux/arch/arm/mach-footbridge/isa-timer.c =================================================================== --- linux.orig/arch/arm/mach-footbridge/isa-timer.c +++ linux/arch/arm/mach-footbridge/isa-timer.c @@ -6,6 +6,7 @@ */ #include #include +#include #include #include Index: linux/arch/arm/mach-footbridge/netwinder-hw.c =================================================================== --- linux.orig/arch/arm/mach-footbridge/netwinder-hw.c +++ linux/arch/arm/mach-footbridge/netwinder-hw.c @@ -68,7 +68,7 @@ static inline void wb977_ww(int reg, int /* * This is a lock for accessing ports GP1_IO_BASE and GP2_IO_BASE */ -DEFINE_SPINLOCK(gpio_lock); +DEFINE_RAW_SPINLOCK(gpio_lock); static unsigned int current_gpio_op; static unsigned int current_gpio_io; Index: linux/arch/arm/mach-footbridge/netwinder-leds.c =================================================================== --- linux.orig/arch/arm/mach-footbridge/netwinder-leds.c +++ linux/arch/arm/mach-footbridge/netwinder-leds.c @@ -33,7 +33,7 @@ static char led_state; static char hw_led_state; static DEFINE_SPINLOCK(leds_lock); -extern spinlock_t gpio_lock; +extern raw_spinlock_t gpio_lock; static void netwinder_leds_event(led_event_t evt) { Index: linux/arch/arm/mach-h720x/common.c =================================================================== --- linux.orig/arch/arm/mach-h720x/common.c +++ linux/arch/arm/mach-h720x/common.c @@ -163,6 +163,11 @@ h720x_gpiod_demux_handler(unsigned int i h720x_gpio_handler(mask, irq, desc, regs); } +static DEFINE_IRQ_CHAINED_TYPE(h720x_gpioa_demux_handler); +static DEFINE_IRQ_CHAINED_TYPE(h720x_gpiob_demux_handler); +static DEFINE_IRQ_CHAINED_TYPE(h720x_gpioc_demux_handler); +static DEFINE_IRQ_CHAINED_TYPE(h720x_gpiod_demux_handler); + #ifdef CONFIG_CPU_H7202 static void h720x_gpioe_demux_handler(unsigned int irq_unused, struct irqdesc *desc, @@ -175,6 +180,7 @@ h720x_gpioe_demux_handler(unsigned int i IRQDBG("%s mask: 0x%08x irq: %d\n",__FUNCTION__,mask,irq); h720x_gpio_handler(mask, irq, desc, regs); } +static DEFINE_IRQ_CHAINED_TYPE(h720x_gpioe_demux_handler); #endif static struct irqchip h720x_global_chip = { Index: linux/arch/arm/mach-h720x/cpu-h7202.c =================================================================== --- linux.orig/arch/arm/mach-h720x/cpu-h7202.c +++ linux/arch/arm/mach-h720x/cpu-h7202.c @@ -175,6 +175,8 @@ static struct irqaction h7202_timer_irq .handler = h7202_timer_interrupt, }; +static DEFINE_IRQ_CHAINED_TYPE(h7202_timerx_demux_handler); + /* * Setup TIMER0 as system timer */ Index: linux/arch/arm/mach-imx/dma.c =================================================================== --- linux.orig/arch/arm/mach-imx/dma.c +++ linux/arch/arm/mach-imx/dma.c @@ -43,7 +43,7 @@ imx_request_dma(char *name, imx_dma_prio if (!name || !irq_handler) return -EINVAL; - local_irq_save(flags); + raw_local_irq_save(flags); /* try grabbing a DMA channel with the requested priority */ for (i = prio; i < prio + (prio == DMA_PRIO_LOW) ? 8 : 4; i++) { @@ -75,7 +75,7 @@ imx_request_dma(char *name, imx_dma_prio i = -ENODEV; } - local_irq_restore(flags); + raw_local_irq_restore(flags); return i; } @@ -91,10 +91,10 @@ imx_free_dma(int dma_ch) return; } - local_irq_save(flags); + raw_local_irq_save(flags); DIMR &= ~(1 << dma_ch); dma_channels[dma_ch].name = NULL; - local_irq_restore(flags); + raw_local_irq_restore(flags); } static irqreturn_t Index: linux/arch/arm/mach-imx/irq.c =================================================================== --- linux.orig/arch/arm/mach-imx/irq.c +++ linux/arch/arm/mach-imx/irq.c @@ -217,6 +217,11 @@ static struct irqchip imx_gpio_chip = { .set_type = imx_gpio_irq_type, }; +static DEFINE_IRQ_CHAINED_TYPE(imx_gpioa_demux_handler); +static DEFINE_IRQ_CHAINED_TYPE(imx_gpiob_demux_handler); +static DEFINE_IRQ_CHAINED_TYPE(imx_gpioc_demux_handler); +static DEFINE_IRQ_CHAINED_TYPE(imx_gpiod_demux_handler); + void __init imx_init_irq(void) { Index: linux/arch/arm/mach-imx/leds-mx1ads.c =================================================================== --- linux.orig/arch/arm/mach-imx/leds-mx1ads.c +++ linux/arch/arm/mach-imx/leds-mx1ads.c @@ -29,7 +29,7 @@ mx1ads_leds_event(led_event_t ledevt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (ledevt) { #ifdef CONFIG_LEDS_CPU @@ -49,5 +49,5 @@ mx1ads_leds_event(led_event_t ledevt) default: break; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux/arch/arm/mach-imx/time.c =================================================================== --- linux.orig/arch/arm/mach-imx/time.c +++ linux/arch/arm/mach-imx/time.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include Index: linux/arch/arm/mach-integrator/core.c =================================================================== --- linux.orig/arch/arm/mach-integrator/core.c +++ linux/arch/arm/mach-integrator/core.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -117,7 +118,7 @@ arch_initcall(integrator_init); #define CM_CTRL IO_ADDRESS(INTEGRATOR_HDR_BASE) + INTEGRATOR_HDR_CTRL_OFFSET -static DEFINE_SPINLOCK(cm_lock); +static DEFINE_RAW_SPINLOCK(cm_lock); /** * cm_control - update the CM_CTRL register. Index: linux/arch/arm/mach-integrator/leds.c =================================================================== --- linux.orig/arch/arm/mach-integrator/leds.c +++ linux/arch/arm/mach-integrator/leds.c @@ -41,7 +41,7 @@ static void integrator_leds_event(led_ev unsigned int update_alpha_leds; // yup, change the LEDs - local_irq_save(flags); + raw_local_irq_save(flags); update_alpha_leds = 0; switch(ledevt) { @@ -76,7 +76,7 @@ static void integrator_leds_event(led_ev while (__raw_readl(dbg_base + INTEGRATOR_DBG_ALPHA_OFFSET) & 1); __raw_writel(saved_leds, dbg_base + INTEGRATOR_DBG_LEDS_OFFSET); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } static int __init leds_init(void) Index: linux/arch/arm/mach-integrator/pci_v3.c =================================================================== --- linux.orig/arch/arm/mach-integrator/pci_v3.c +++ linux/arch/arm/mach-integrator/pci_v3.c @@ -163,7 +163,7 @@ * 7:2 register number * */ -static DEFINE_SPINLOCK(v3_lock); +static DEFINE_RAW_SPINLOCK(v3_lock); #define PCI_BUS_NONMEM_START 0x00000000 #define PCI_BUS_NONMEM_SIZE SZ_256M Index: linux/arch/arm/mach-integrator/platsmp.c =================================================================== --- linux.orig/arch/arm/mach-integrator/platsmp.c +++ linux/arch/arm/mach-integrator/platsmp.c @@ -31,7 +31,7 @@ extern void integrator_secondary_startup volatile int __cpuinitdata pen_release = -1; unsigned long __cpuinitdata phys_pen_release = 0; -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); void __cpuinit platform_secondary_init(unsigned int cpu) { Index: linux/arch/arm/mach-ixp2000/core.c =================================================================== --- linux.orig/arch/arm/mach-ixp2000/core.c +++ linux/arch/arm/mach-ixp2000/core.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -286,9 +287,9 @@ void gpio_line_config(int line, int dire { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); if (direction == GPIO_OUT) { - irq_desc[line + IRQ_IXP2000_GPIO0].valid = 0; + set_irq_flags(line + IRQ_IXP2000_GPIO0, 0); /* if it's an output, it ain't an interrupt anymore */ GPIO_IRQ_falling_edge &= ~(1 << line); @@ -301,7 +302,7 @@ void gpio_line_config(int line, int dire } else if (direction == GPIO_IN) { ixp2000_reg_write(IXP2000_GPIO_PDCR, 1 << line); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } @@ -354,8 +355,7 @@ static int ixp2000_GPIO_irq_type(unsigne /* * Finally, mark the corresponding IRQ as valid. */ - irq_desc[irq].valid = 1; - + set_irq_flags(irq, IRQF_VALID); return 0; } @@ -425,6 +425,8 @@ static struct irqchip ixp2000_irq_chip = .unmask = ixp2000_irq_unmask }; +static DEFINE_IRQ_CHAINED_TYPE(ixp2000_GPIO_irq_handler); + void __init ixp2000_init_irq(void) { int irq; Index: linux/arch/arm/mach-ixp2000/ixdp2x00.c =================================================================== --- linux.orig/arch/arm/mach-ixp2000/ixdp2x00.c +++ linux/arch/arm/mach-ixp2000/ixdp2x00.c @@ -146,6 +146,8 @@ static struct irqchip ixdp2x00_cpld_irq_ .unmask = ixdp2x00_irq_unmask }; +static DEFINE_IRQ_CHAINED_TYPE(ixdp2x00_irq_handler); + void ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_irqs) { unsigned int irq; @@ -168,7 +170,7 @@ void ixdp2x00_init_irq(volatile unsigned } /* Hook into PCI interrupt */ - set_irq_chained_handler(IRQ_IXP2000_PCIB, &ixdp2x00_irq_handler); + set_irq_chained_handler(IRQ_IXP2000_PCIB, ixdp2x00_irq_handler); } /************************************************************************* Index: linux/arch/arm/mach-ixp2000/ixdp2x01.c =================================================================== --- linux.orig/arch/arm/mach-ixp2000/ixdp2x01.c +++ linux/arch/arm/mach-ixp2000/ixdp2x01.c @@ -95,6 +95,8 @@ static struct irqchip ixdp2x01_irq_chip .unmask = ixdp2x01_irq_unmask }; +static DEFINE_IRQ_CHAINED_TYPE(ixdp2x01_irq_handler); + /* * We only do anything if we are the master NPU on the board. * The slave NPU only has the ethernet chip going directly to @@ -127,7 +129,7 @@ void __init ixdp2x01_init_irq(void) } /* Hook into PCI interrupts */ - set_irq_chained_handler(IRQ_IXP2000_PCIB, &ixdp2x01_irq_handler); + set_irq_chained_handler(IRQ_IXP2000_PCIB, ixdp2x01_irq_handler); } Index: linux/arch/arm/mach-ixp2000/pci.c =================================================================== --- linux.orig/arch/arm/mach-ixp2000/pci.c +++ linux/arch/arm/mach-ixp2000/pci.c @@ -145,7 +145,7 @@ int ixp2000_pci_abort_handler(unsigned l pci_master_aborts = 1; - local_irq_save(flags); + raw_local_irq_save(flags); temp = *(IXP2000_PCI_CONTROL); if (temp & ((1 << 8) | (1 << 5))) { ixp2000_reg_write(IXP2000_PCI_CONTROL, temp); @@ -158,7 +158,7 @@ int ixp2000_pci_abort_handler(unsigned l temp = *(IXP2000_PCI_CMDSTAT); } } - local_irq_restore(flags); + raw_local_irq_restore(flags); /* * If it was an imprecise abort, then we need to correct the @@ -176,7 +176,7 @@ clear_master_aborts(void) volatile u32 temp; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); temp = *(IXP2000_PCI_CONTROL); if (temp & ((1 << 8) | (1 << 5))) { ixp2000_reg_write(IXP2000_PCI_CONTROL, temp); @@ -189,7 +189,7 @@ clear_master_aborts(void) temp = *(IXP2000_PCI_CMDSTAT); } } - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } Index: linux/arch/arm/mach-ixp4xx/common-pci.c =================================================================== --- linux.orig/arch/arm/mach-ixp4xx/common-pci.c +++ linux/arch/arm/mach-ixp4xx/common-pci.c @@ -53,7 +53,7 @@ unsigned long ixp4xx_pci_reg_base = 0; * these transactions are atomic or we will end up * with corrupt data on the bus or in a driver. */ -static DEFINE_SPINLOCK(ixp4xx_pci_lock); +static DEFINE_RAW_SPINLOCK(ixp4xx_pci_lock); /* * Read from PCI config space Index: linux/arch/arm/mach-ixp4xx/coyote-pci.c =================================================================== --- linux.orig/arch/arm/mach-ixp4xx/coyote-pci.c +++ linux/arch/arm/mach-ixp4xx/coyote-pci.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include Index: linux/arch/arm/mach-ixp4xx/ixdp425-pci.c =================================================================== --- linux.orig/arch/arm/mach-ixp4xx/ixdp425-pci.c +++ linux/arch/arm/mach-ixp4xx/ixdp425-pci.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include Index: linux/arch/arm/mach-ixp4xx/ixdpg425-pci.c =================================================================== --- linux.orig/arch/arm/mach-ixp4xx/ixdpg425-pci.c +++ linux/arch/arm/mach-ixp4xx/ixdpg425-pci.c @@ -16,10 +16,10 @@ #include #include #include +#include #include #include -#include #include Index: linux/arch/arm/mach-l7200/core.c =================================================================== --- linux.orig/arch/arm/mach-l7200/core.c +++ linux/arch/arm/mach-l7200/core.c @@ -7,6 +7,7 @@ */ #include #include +#include #include #include Index: linux/arch/arm/mach-lh7a40x/arch-kev7a400.c =================================================================== --- linux.orig/arch/arm/mach-lh7a40x/arch-kev7a400.c +++ linux/arch/arm/mach-lh7a40x/arch-kev7a400.c @@ -72,6 +72,8 @@ static void kev7a400_cpld_handler (unsig } } +static DEFINE_IRQ_CHAINED_TYPE(kev7a400_cpld_handler); + void __init lh7a40x_init_board_irq (void) { int irq; Index: linux/arch/arm/mach-lh7a40x/arch-lpd7a40x.c =================================================================== --- linux.orig/arch/arm/mach-lh7a40x/arch-lpd7a40x.c +++ linux/arch/arm/mach-lh7a40x/arch-lpd7a40x.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -173,6 +174,7 @@ static void lpd7a40x_cpld_handler (unsig desc->chip->unmask (irq); /* Level-triggered need this */ } +static DEFINE_IRQ_CHAINED_TYPE(lpd7a40x_cpld_handler); void __init lh7a40x_init_board_irq (void) { Index: linux/arch/arm/mach-lh7a40x/irq-kev7a400.c =================================================================== --- linux.orig/arch/arm/mach-lh7a40x/irq-kev7a400.c +++ linux/arch/arm/mach-lh7a40x/irq-kev7a400.c @@ -60,6 +60,8 @@ lh7a400_cpld_handler (unsigned int irq, } } +static DEFINE_IRQ_CHAINED_TYPE(kev7a400_cpld_handler); + /* IRQ initialization */ void __init Index: linux/arch/arm/mach-lh7a40x/irq-lpd7a40x.c =================================================================== --- linux.orig/arch/arm/mach-lh7a40x/irq-lpd7a40x.c +++ linux/arch/arm/mach-lh7a40x/irq-lpd7a40x.c @@ -71,6 +71,7 @@ static void lh7a40x_cpld_handler (unsign desc->chip->unmask (irq); /* Level-triggered need this */ } +static DEFINE_IRQ_CHAINED_TYPE(lh7a40x_cpld_handler); /* IRQ initialization */ Index: linux/arch/arm/mach-lh7a40x/time.c =================================================================== --- linux.orig/arch/arm/mach-lh7a40x/time.c +++ linux/arch/arm/mach-lh7a40x/time.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include Index: linux/arch/arm/mach-omap1/board-osk.c =================================================================== --- linux.orig/arch/arm/mach-omap1/board-osk.c +++ linux/arch/arm/mach-omap1/board-osk.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include Index: linux/arch/arm/mach-omap1/fpga.c =================================================================== --- linux.orig/arch/arm/mach-omap1/fpga.c +++ linux/arch/arm/mach-omap1/fpga.c @@ -120,6 +120,8 @@ static struct irqchip omap_fpga_irq = { .unmask = fpga_unmask_irq, }; +static DEFINE_IRQ_CHAINED_TYPE(innovator_fpga_IRQ_demux); + /* * All of the FPGA interrupt request inputs except for the touchscreen are * edge-sensitive; the touchscreen is level-sensitive. The edge-sensitive Index: linux/arch/arm/mach-omap1/leds-h2p2-debug.c =================================================================== --- linux.orig/arch/arm/mach-omap1/leds-h2p2-debug.c +++ linux/arch/arm/mach-omap1/leds-h2p2-debug.c @@ -45,7 +45,7 @@ void h2p2_dbg_leds_event(led_event_t evt static struct h2p2_dbg_fpga __iomem *fpga; static u16 led_state, hw_led_state; - local_irq_save(flags); + raw_local_irq_save(flags); if (!(led_state & LED_STATE_ENABLED) && evt != led_start) goto done; @@ -140,5 +140,5 @@ void h2p2_dbg_leds_event(led_event_t evt __raw_writew(~hw_led_state, &fpga->leds); done: - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux/arch/arm/mach-omap1/serial.c =================================================================== --- linux.orig/arch/arm/mach-omap1/serial.c +++ linux/arch/arm/mach-omap1/serial.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include Index: linux/arch/arm/mach-pxa/dma.c =================================================================== --- linux.orig/arch/arm/mach-pxa/dma.c +++ linux/arch/arm/mach-pxa/dma.c @@ -43,7 +43,7 @@ int pxa_request_dma (char *name, pxa_dma if (!name || !irq_handler) return -EINVAL; - local_irq_save(flags); + raw_local_irq_save(flags); /* try grabbing a DMA channel with the requested priority */ for (i = prio; i < prio + PXA_DMA_NBCH(prio); i++) { @@ -73,7 +73,7 @@ int pxa_request_dma (char *name, pxa_dma i = -ENODEV; } - local_irq_restore(flags); + raw_local_irq_restore(flags); return i; } @@ -88,10 +88,10 @@ void pxa_free_dma (int dma_ch) return; } - local_irq_save(flags); + raw_local_irq_save(flags); DCSR(dma_ch) = DCSR_STARTINTR|DCSR_ENDINTR|DCSR_BUSERR; dma_channels[dma_ch].name = NULL; - local_irq_restore(flags); + raw_local_irq_restore(flags); } static irqreturn_t dma_irq_handler(int irq, void *dev_id, struct pt_regs *regs) Index: linux/arch/arm/mach-pxa/generic.c =================================================================== --- linux.orig/arch/arm/mach-pxa/generic.c +++ linux/arch/arm/mach-pxa/generic.c @@ -49,7 +49,7 @@ void pxa_gpio_mode(int gpio_mode) int fn = (gpio_mode & GPIO_MD_MASK_FN) >> 8; int gafr; - local_irq_save(flags); + raw_local_irq_save(flags); if (gpio_mode & GPIO_DFLT_LOW) GPCR(gpio) = GPIO_bit(gpio); else if (gpio_mode & GPIO_DFLT_HIGH) @@ -60,7 +60,7 @@ void pxa_gpio_mode(int gpio_mode) GPDR(gpio) &= ~GPIO_bit(gpio); gafr = GAFR(gpio) & ~(0x3 << (((gpio) & 0xf)*2)); GAFR(gpio) = gafr | (fn << (((gpio) & 0xf)*2)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(pxa_gpio_mode); @@ -71,14 +71,14 @@ EXPORT_SYMBOL(pxa_gpio_mode); void pxa_set_cken(int clock, int enable) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); if (enable) CKEN |= clock; else CKEN &= ~clock; - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(pxa_set_cken); Index: linux/arch/arm/mach-pxa/idp.c =================================================================== --- linux.orig/arch/arm/mach-pxa/idp.c +++ linux/arch/arm/mach-pxa/idp.c @@ -18,6 +18,7 @@ #include #include +#include #include #include Index: linux/arch/arm/mach-pxa/irq.c =================================================================== --- linux.orig/arch/arm/mach-pxa/irq.c +++ linux/arch/arm/mach-pxa/irq.c @@ -244,6 +244,7 @@ static struct irqchip pxa_muxed_gpio_chi .set_type = pxa_gpio_irq_type, }; +static DEFINE_IRQ_CHAINED_TYPE(pxa_gpio_demux_handler); void __init pxa_init_irq(void) { Index: linux/arch/arm/mach-pxa/leds-idp.c =================================================================== --- linux.orig/arch/arm/mach-pxa/leds-idp.c +++ linux/arch/arm/mach-pxa/leds-idp.c @@ -34,7 +34,7 @@ void idp_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (evt) { case led_start: @@ -113,5 +113,5 @@ void idp_leds_event(led_event_t evt) else IDP_CPLD_LED_CONTROL |= IDP_LEDS_MASK; - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux/arch/arm/mach-pxa/leds-lubbock.c =================================================================== --- linux.orig/arch/arm/mach-pxa/leds-lubbock.c +++ linux/arch/arm/mach-pxa/leds-lubbock.c @@ -48,7 +48,7 @@ void lubbock_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (evt) { case led_start: @@ -122,5 +122,5 @@ void lubbock_leds_event(led_event_t evt) else LUB_DISC_BLNK_LED |= 0xff; - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux/arch/arm/mach-pxa/leds-mainstone.c =================================================================== --- linux.orig/arch/arm/mach-pxa/leds-mainstone.c +++ linux/arch/arm/mach-pxa/leds-mainstone.c @@ -43,7 +43,7 @@ void mainstone_leds_event(led_event_t ev { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (evt) { case led_start: @@ -117,5 +117,5 @@ void mainstone_leds_event(led_event_t ev else MST_LEDCTRL |= 0xff; - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux/arch/arm/mach-pxa/lubbock.c =================================================================== --- linux.orig/arch/arm/mach-pxa/lubbock.c +++ linux/arch/arm/mach-pxa/lubbock.c @@ -47,9 +47,9 @@ void lubbock_set_misc_wr(unsigned int ma { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); LUB_MISC_WR = (LUB_MISC_WR & ~mask) | (set & mask); - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(lubbock_set_misc_wr); @@ -90,6 +90,8 @@ static void lubbock_irq_handler(unsigned } while (pending); } +static DEFINE_IRQ_CHAINED_TYPE(lubbock_irq_handler); + static void __init lubbock_init_irq(void) { int irq; Index: linux/arch/arm/mach-pxa/mainstone.c =================================================================== --- linux.orig/arch/arm/mach-pxa/mainstone.c +++ linux/arch/arm/mach-pxa/mainstone.c @@ -78,6 +78,8 @@ static void mainstone_irq_handler(unsign } while (pending); } +static DEFINE_IRQ_CHAINED_TYPE(mainstone_irq_handler); + static void __init mainstone_init_irq(void) { int irq; Index: linux/arch/arm/mach-rpc/dma.c =================================================================== --- linux.orig/arch/arm/mach-rpc/dma.c +++ linux/arch/arm/mach-rpc/dma.c @@ -171,11 +171,11 @@ static void iomd_disable_dma(dmach_t cha unsigned long dma_base = dma->dma_base; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); if (dma->state != ~DMA_ST_AB) disable_irq(dma->dma_irq); iomd_writeb(0, dma_base + CR); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static int iomd_set_dma_speed(dmach_t channel, dma_t *dma, int cycle) Index: linux/arch/arm/mach-rpc/irq.c =================================================================== --- linux.orig/arch/arm/mach-rpc/irq.c +++ linux/arch/arm/mach-rpc/irq.c @@ -112,6 +112,15 @@ static struct irqchip iomd_fiq_chip = { .unmask = iomd_unmask_irq_fiq, }; +static unsigned int startup_irq_disabled(unsigned int irq) +{ + return 0; +} + +/* Interrupt type for irqs which must not be + * automatically enabled in reqeust_irq */ +static struct irq_type level_type_nostart; + void __init rpc_init_irq(void) { unsigned int irq, flags; @@ -121,16 +130,15 @@ void __init rpc_init_irq(void) iomd_writeb(0, IOMD_FIQMASK); iomd_writeb(0, IOMD_DMAMASK); + level_type_nostart = default_level_type; + level_type_nostart.startup = startup_irq_disabled; + for (irq = 0; irq < NR_IRQS; irq++) { flags = IRQF_VALID; if (irq <= 6 || (irq >= 9 && irq <= 15)) flags |= IRQF_PROBE; - if (irq == 21 || (irq >= 16 && irq <= 19) || - irq == IRQ_KEYBOARDTX) - flags |= IRQF_NOAUTOEN; - switch (irq) { case 0 ... 7: set_irq_chip(irq, &iomd_a_chip); @@ -155,6 +163,10 @@ void __init rpc_init_irq(void) set_irq_flags(irq, IRQF_VALID); break; } + + if (irq == 21 || (irq >= 16 && irq <= 19) || + irq == IRQ_KEYBOARDTX) + set_irq_handler(irq, &level_type_nostart); } init_FIQ(); Index: linux/arch/arm/mach-s3c2410/bast-irq.c =================================================================== --- linux.orig/arch/arm/mach-s3c2410/bast-irq.c +++ linux/arch/arm/mach-s3c2410/bast-irq.c @@ -136,13 +136,15 @@ bast_irq_pc104_demux(unsigned int irq, for (i = 0; stat != 0; i++, stat >>= 1) { if (stat & 1) { irqno = bast_pc104_irqs[i]; - - desc_handle_irq(irqno, irq_desc + irqno, regs); + desc = irq_desc + irqno; + desc_handle_irq(irqno, desc, regs); } } } } +DEFINE_IRQ_CHAINED_TYPE(bast_irq_pc104_demux); + static __init int bast_irq_init(void) { unsigned int i; @@ -156,7 +158,7 @@ static __init int bast_irq_init(void) set_irq_chained_handler(IRQ_ISA, bast_irq_pc104_demux); - /* reigster our IRQs */ + /* register our IRQs */ for (i = 0; i < 4; i++) { unsigned int irqno = bast_pc104_irqs[i]; Index: linux/arch/arm/mach-s3c2410/clock.c =================================================================== --- linux.orig/arch/arm/mach-s3c2410/clock.c +++ linux/arch/arm/mach-s3c2410/clock.c @@ -61,7 +61,7 @@ void inline s3c24xx_clk_enable(unsigned unsigned long clkcon; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); clkcon = __raw_readl(S3C2410_CLKCON); clkcon &= ~clocks; @@ -74,7 +74,7 @@ void inline s3c24xx_clk_enable(unsigned __raw_writel(clkcon, S3C2410_CLKCON); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* enable and disable calls for use with the clk struct */ Index: linux/arch/arm/mach-s3c2410/dma.c =================================================================== --- linux.orig/arch/arm/mach-s3c2410/dma.c +++ linux/arch/arm/mach-s3c2410/dma.c @@ -329,11 +329,11 @@ static int s3c2410_dma_start(s3c2410_dma pr_debug("s3c2410_start_dma: channel=%d\n", chan->number); - local_irq_save(flags); + raw_local_irq_save(flags); if (chan->state == S3C2410_DMA_RUNNING) { pr_debug("s3c2410_start_dma: already running (%d)\n", chan->state); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -348,7 +348,7 @@ static int s3c2410_dma_start(s3c2410_dma printk(KERN_ERR "dma%d: channel has nothing loaded\n", chan->number); chan->state = S3C2410_DMA_IDLE; - local_irq_restore(flags); + raw_local_irq_restore(flags); return -EINVAL; } @@ -385,7 +385,7 @@ static int s3c2410_dma_start(s3c2410_dma dbg_showchan(chan); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -451,7 +451,7 @@ int s3c2410_dma_enqueue(unsigned int cha buf->id = id; buf->magic = BUF_MAGIC; - local_irq_save(flags); + raw_local_irq_save(flags); if (chan->curr == NULL) { /* we've got nothing loaded... */ @@ -485,7 +485,7 @@ int s3c2410_dma_enqueue(unsigned int cha "timeout loading buffer\n", chan->number); dbg_showchan(chan); - local_irq_restore(flags); + raw_local_irq_restore(flags); return -EINVAL; } } @@ -499,7 +499,7 @@ int s3c2410_dma_enqueue(unsigned int cha } } - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -661,9 +661,9 @@ s3c2410_dma_irq(int irq, void *devpw, st return IRQ_HANDLED; } - local_irq_save(flags); + raw_local_irq_save(flags); s3c2410_dma_loadbuffer(chan, chan->next); - local_irq_restore(flags); + raw_local_irq_restore(flags); } else { s3c2410_dma_lastxfer(chan); @@ -698,14 +698,14 @@ int s3c2410_dma_request(unsigned int cha check_channel(channel); - local_irq_save(flags); + raw_local_irq_save(flags); dbg_showchan(chan); if (chan->in_use) { if (client != chan->client) { printk(KERN_ERR "dma%d: already in use\n", channel); - local_irq_restore(flags); + raw_local_irq_restore(flags); return -EBUSY; } else { printk(KERN_ERR "dma%d: client already has channel\n", channel); @@ -724,7 +724,7 @@ int s3c2410_dma_request(unsigned int cha if (err) { chan->in_use = 0; - local_irq_restore(flags); + raw_local_irq_restore(flags); printk(KERN_ERR "%s: cannot get IRQ %d for DMA %d\n", client->name, chan->irq, chan->number); @@ -735,7 +735,7 @@ int s3c2410_dma_request(unsigned int cha chan->irq_enabled = 1; } - local_irq_restore(flags); + raw_local_irq_restore(flags); /* need to setup */ @@ -764,7 +764,7 @@ int s3c2410_dma_free(dmach_t channel, s3 check_channel(channel); - local_irq_save(flags); + raw_local_irq_save(flags); if (chan->client != client) { @@ -789,7 +789,7 @@ int s3c2410_dma_free(dmach_t channel, s3 free_irq(chan->irq, (void *)chan); chan->irq_claimed = 0; - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -805,7 +805,7 @@ static int s3c2410_dma_dostop(s3c2410_dm dbg_showchan(chan); - local_irq_save(flags); + raw_local_irq_save(flags); s3c2410_dma_call_op(chan, S3C2410_DMAOP_STOP); @@ -823,7 +823,7 @@ static int s3c2410_dma_dostop(s3c2410_dm chan->state = S3C2410_DMA_IDLE; chan->load_state = S3C2410_DMALOAD_NONE; - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -840,7 +840,7 @@ static int s3c2410_dma_flush(s3c2410_dma pr_debug("%s:\n", __FUNCTION__); - local_irq_save(flags); + raw_local_irq_save(flags); if (chan->state != S3C2410_DMA_IDLE) { pr_debug("%s: stopping channel...\n", __FUNCTION__ ); @@ -865,7 +865,7 @@ static int s3c2410_dma_flush(s3c2410_dma } } - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } Index: linux/arch/arm/mach-s3c2410/gpio.c =================================================================== --- linux.orig/arch/arm/mach-s3c2410/gpio.c +++ linux/arch/arm/mach-s3c2410/gpio.c @@ -58,7 +58,7 @@ void s3c2410_gpio_cfgpin(unsigned int pi mask = 3 << S3C2410_GPIO_OFFSET(pin)*2; } - local_irq_save(flags); + raw_local_irq_save(flags); con = __raw_readl(base + 0x00); con &= ~mask; @@ -66,7 +66,7 @@ void s3c2410_gpio_cfgpin(unsigned int pi __raw_writel(con, base + 0x00); - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(s3c2410_gpio_cfgpin); @@ -97,14 +97,14 @@ void s3c2410_gpio_pullup(unsigned int pi if (pin < S3C2410_GPIO_BANKB) return; - local_irq_save(flags); + raw_local_irq_save(flags); up = __raw_readl(base + 0x08); up &= ~(1L << offs); up |= to << offs; __raw_writel(up, base + 0x08); - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(s3c2410_gpio_pullup); @@ -116,14 +116,14 @@ void s3c2410_gpio_setpin(unsigned int pi unsigned long flags; unsigned long dat; - local_irq_save(flags); + raw_local_irq_save(flags); dat = __raw_readl(base + 0x04); dat &= ~(1 << offs); dat |= to << offs; __raw_writel(dat, base + 0x04); - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(s3c2410_gpio_setpin); @@ -143,12 +143,12 @@ unsigned int s3c2410_modify_misccr(unsig unsigned long flags; unsigned long misccr; - local_irq_save(flags); + raw_local_irq_save(flags); misccr = __raw_readl(S3C2410_MISCCR); misccr &= ~clear; misccr ^= change; __raw_writel(misccr, S3C2410_MISCCR); - local_irq_restore(flags); + raw_local_irq_restore(flags); return misccr; } @@ -189,7 +189,7 @@ int s3c2410_gpio_irqfilter(unsigned int pin -= S3C2410_GPG8_EINT16; reg += pin & ~3; - local_irq_save(flags); + raw_local_irq_save(flags); /* update filter width and clock source */ @@ -205,7 +205,7 @@ int s3c2410_gpio_irqfilter(unsigned int val |= on << ((pin * 4) + 3); __raw_writel(val, S3C2410_EXTINT2); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } Index: linux/arch/arm/mach-s3c2410/irq.c =================================================================== --- linux.orig/arch/arm/mach-s3c2410/irq.c +++ linux/arch/arm/mach-s3c2410/irq.c @@ -573,6 +573,11 @@ s3c_irq_demux_uart2(unsigned int irq, } +static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_uart0); +static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_uart1); +static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_uart2); +static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_adc); + /* s3c24xx_init_irq * * Initialise S3C2410 IRQ system Index: linux/arch/arm/mach-s3c2410/s3c2440-dsc.c =================================================================== --- linux.orig/arch/arm/mach-s3c2410/s3c2440-dsc.c +++ linux/arch/arm/mach-s3c2410/s3c2440-dsc.c @@ -45,14 +45,14 @@ int s3c2440_set_dsc(unsigned int pin, un base = (pin & S3C2440_SELECT_DSC1) ? S3C2440_DSC1 : S3C2440_DSC0; mask = 3 << S3C2440_DSC_GETSHIFT(pin); - local_irq_save(flags); + raw_local_irq_save(flags); val = __raw_readl(base); val &= ~mask; val |= value & mask; __raw_writel(val, base); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } Index: linux/arch/arm/mach-s3c2410/s3c2440-irq.c =================================================================== --- linux.orig/arch/arm/mach-s3c2410/s3c2440-irq.c +++ linux/arch/arm/mach-s3c2410/s3c2440-irq.c @@ -157,6 +157,9 @@ static struct irqchip s3c_irq_cam = { .ack = s3c_irq_cam_ack, }; +static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_wdtac97); +static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_cam); + static int s3c2440_irq_add(struct sys_device *sysdev) { unsigned int irqno; Index: linux/arch/arm/mach-s3c2410/time.c =================================================================== --- linux.orig/arch/arm/mach-s3c2410/time.c +++ linux/arch/arm/mach-s3c2410/time.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include Index: linux/arch/arm/mach-sa1100/assabet.c =================================================================== --- linux.orig/arch/arm/mach-sa1100/assabet.c +++ linux/arch/arm/mach-sa1100/assabet.c @@ -61,10 +61,10 @@ void ASSABET_BCR_frob(unsigned int mask, { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); BCR_value = (BCR_value & ~mask) | val; ASSABET_BCR = BCR_value; - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(ASSABET_BCR_frob); Index: linux/arch/arm/mach-sa1100/badge4.c =================================================================== --- linux.orig/arch/arm/mach-sa1100/badge4.c +++ linux/arch/arm/mach-sa1100/badge4.c @@ -227,7 +227,7 @@ void badge4_set_5V(unsigned subsystem, i unsigned long flags; unsigned old_5V_bitmap; - local_irq_save(flags); + raw_local_irq_save(flags); old_5V_bitmap = badge4_5V_bitmap; @@ -240,15 +240,22 @@ void badge4_set_5V(unsigned subsystem, i /* detect on->off and off->on transitions */ if ((!old_5V_bitmap) && (badge4_5V_bitmap)) { /* was off, now on */ - printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__); GPSR = BADGE4_GPIO_PCMEN5V; } else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) { /* was on, now off */ - printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__); GPCR = BADGE4_GPIO_PCMEN5V; } - local_irq_restore(flags); + raw_local_irq_restore(flags); + + /* detect on->off and off->on transitions */ + if ((!old_5V_bitmap) && (badge4_5V_bitmap)) { + /* was off, now on */ + printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__); + } else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) { + /* was on, now off */ + printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__); + } } EXPORT_SYMBOL(badge4_set_5V); Index: linux/arch/arm/mach-sa1100/cerf.c =================================================================== --- linux.orig/arch/arm/mach-sa1100/cerf.c +++ linux/arch/arm/mach-sa1100/cerf.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include Index: linux/arch/arm/mach-sa1100/cpu-sa1110.c =================================================================== --- linux.orig/arch/arm/mach-sa1100/cpu-sa1110.c +++ linux/arch/arm/mach-sa1100/cpu-sa1110.c @@ -282,7 +282,7 @@ static int sa1110_target(struct cpufreq_ * This means that we won't access SDRAM for the duration of * the programming. */ - local_irq_save(flags); + raw_local_irq_save(flags); asm("mcr p15, 0, %0, c7, c10, 4" : : "r" (0)); udelay(10); __asm__ __volatile__(" \n\ @@ -303,7 +303,7 @@ static int sa1110_target(struct cpufreq_ : "r" (&MDCNFG), "r" (&PPCR), "0" (sd.mdcnfg), "r" (sd.mdrefr), "r" (sd.mdcas[0]), "r" (sd.mdcas[1]), "r" (sd.mdcas[2]), "r" (ppcr)); - local_irq_restore(flags); + raw_local_irq_restore(flags); /* * Now, return the SDRAM refresh back to normal. Index: linux/arch/arm/mach-sa1100/dma.c =================================================================== --- linux.orig/arch/arm/mach-sa1100/dma.c +++ linux/arch/arm/mach-sa1100/dma.c @@ -227,7 +227,7 @@ int sa1100_start_dma(dma_regs_t *regs, d if (size > MAX_DMA_SIZE) return -EOVERFLOW; - local_irq_save(flags); + raw_local_irq_save(flags); status = regs->RdDCSR; /* If both DMA buffers are started, there's nothing else we can do. */ @@ -262,7 +262,7 @@ int sa1100_start_dma(dma_regs_t *regs, d ret = 0; out: - local_irq_restore(flags); + raw_local_irq_restore(flags); return ret; } Index: linux/arch/arm/mach-sa1100/generic.c =================================================================== --- linux.orig/arch/arm/mach-sa1100/generic.c +++ linux/arch/arm/mach-sa1100/generic.c @@ -135,7 +135,7 @@ unsigned long long sched_clock(void) static void sa1100_power_off(void) { mdelay(100); - local_irq_disable(); + raw_local_irq_disable(); /* disable internal oscillator, float CS lines */ PCFR = (PCFR_OPDE | PCFR_FP | PCFR_FS); /* enable wake-up on GPIO0 (Assabet...) */ @@ -391,7 +391,7 @@ void __init sa1110_mb_disable(void) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); PGSR &= ~GPIO_MBGNT; GPCR = GPIO_MBGNT; @@ -399,7 +399,7 @@ void __init sa1110_mb_disable(void) GAFR &= ~(GPIO_MBGNT | GPIO_MBREQ); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -410,7 +410,7 @@ void __init sa1110_mb_enable(void) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); PGSR &= ~GPIO_MBGNT; GPCR = GPIO_MBGNT; @@ -419,6 +419,6 @@ void __init sa1110_mb_enable(void) GAFR |= (GPIO_MBGNT | GPIO_MBREQ); TUCR |= TUCR_MR; - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux/arch/arm/mach-sa1100/h3600.c =================================================================== --- linux.orig/arch/arm/mach-sa1100/h3600.c +++ linux/arch/arm/mach-sa1100/h3600.c @@ -319,7 +319,7 @@ static void h3100_control_egpio(enum ipa } if (egpio || gpio) { - local_irq_save(flags); + raw_local_irq_save(flags); if (setp) { h3100_egpio |= egpio; GPSR = gpio; @@ -328,7 +328,7 @@ static void h3100_control_egpio(enum ipa GPCR = gpio; } H3100_EGPIO = h3100_egpio; - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -451,13 +451,13 @@ static void h3600_control_egpio(enum ipa } if (egpio) { - local_irq_save(flags); + raw_local_irq_save(flags); if (setp) h3600_egpio |= egpio; else h3600_egpio &= ~egpio; H3600_EGPIO = h3600_egpio; - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -788,6 +788,8 @@ static void h3800_unmask_gpio_irq(unsign H3800_ASIC2_GPIINTSTAT |= mask; } +static DEFINE_IRQ_CHAINED_TYPE(h3800_IRQ_demux); + static void __init h3800_init_irq(void) { int i; @@ -826,7 +828,7 @@ static void __init h3800_init_irq(void) } #endif set_irq_type(IRQ_GPIO_H3800_ASIC, IRQT_RISING); - set_irq_chained_handler(IRQ_GPIO_H3800_ASIC, &h3800_IRQ_demux); + set_irq_chained_handler(IRQ_GPIO_H3800_ASIC, h3800_IRQ_demux); } Index: linux/arch/arm/mach-sa1100/irq.c =================================================================== --- linux.orig/arch/arm/mach-sa1100/irq.c +++ linux/arch/arm/mach-sa1100/irq.c @@ -11,12 +11,13 @@ */ #include #include +#include +#include #include #include #include #include -#include #include #include "generic.h" @@ -281,6 +282,8 @@ static int __init sa1100irq_init_devicef return sysdev_register(&sa1100irq_device); } +static DEFINE_IRQ_CHAINED_TYPE(sa1100_high_gpio_handler); + device_initcall(sa1100irq_init_devicefs); void __init sa1100_init_irq(void) Index: linux/arch/arm/mach-sa1100/leds-assabet.c =================================================================== --- linux.orig/arch/arm/mach-sa1100/leds-assabet.c +++ linux/arch/arm/mach-sa1100/leds-assabet.c @@ -32,7 +32,7 @@ void assabet_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (evt) { case led_start: @@ -111,5 +111,5 @@ void assabet_leds_event(led_event_t evt) if (led_state & LED_STATE_ENABLED) ASSABET_BCR_frob(ASSABET_BCR_LED_MASK, hw_led_state); - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux/arch/arm/mach-sa1100/leds-badge4.c =================================================================== --- linux.orig/arch/arm/mach-sa1100/leds-badge4.c +++ linux/arch/arm/mach-sa1100/leds-badge4.c @@ -36,7 +36,7 @@ void badge4_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (evt) { case led_start: @@ -108,5 +108,5 @@ void badge4_leds_event(led_event_t evt) GPCR = hw_led_state ^ LED_MASK; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux/arch/arm/mach-sa1100/leds-cerf.c =================================================================== --- linux.orig/arch/arm/mach-sa1100/leds-cerf.c +++ linux/arch/arm/mach-sa1100/leds-cerf.c @@ -29,7 +29,7 @@ void cerf_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (evt) { case led_start: @@ -107,5 +107,5 @@ void cerf_leds_event(led_event_t evt) GPCR = hw_led_state ^ LED_MASK; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux/arch/arm/mach-sa1100/leds-hackkit.c =================================================================== --- linux.orig/arch/arm/mach-sa1100/leds-hackkit.c +++ linux/arch/arm/mach-sa1100/leds-hackkit.c @@ -33,7 +33,7 @@ void hackkit_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch(evt) { case led_start: @@ -109,5 +109,5 @@ void hackkit_leds_event(led_event_t evt) GPCR = hw_led_state ^ LED_MASK; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux/arch/arm/mach-sa1100/leds-lart.c =================================================================== --- linux.orig/arch/arm/mach-sa1100/leds-lart.c +++ linux/arch/arm/mach-sa1100/leds-lart.c @@ -32,7 +32,7 @@ void lart_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch(evt) { case led_start: @@ -98,5 +98,5 @@ void lart_leds_event(led_event_t evt) GPCR = hw_led_state ^ LED_MASK; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux/arch/arm/mach-sa1100/neponset.c =================================================================== --- linux.orig/arch/arm/mach-sa1100/neponset.c +++ linux/arch/arm/mach-sa1100/neponset.c @@ -137,6 +137,8 @@ static struct sa1100_port_fns neponset_p .get_mctrl = neponset_get_mctrl, }; +static DEFINE_IRQ_CHAINED_TYPE(neponset_irq_handler); + static int neponset_probe(struct device *dev) { sa1100_register_uart_fns(&neponset_port_fns); Index: linux/arch/arm/mach-sa1100/pleb.c =================================================================== --- linux.orig/arch/arm/mach-sa1100/pleb.c +++ linux/arch/arm/mach-sa1100/pleb.c @@ -7,6 +7,7 @@ #include #include #include +#include #include Index: linux/arch/arm/mach-sa1100/simpad.c =================================================================== --- linux.orig/arch/arm/mach-sa1100/simpad.c +++ linux/arch/arm/mach-sa1100/simpad.c @@ -168,7 +168,7 @@ static void __init simpad_map_io(void) static void simpad_power_off(void) { - local_irq_disable(); // was cli + raw_local_irq_disable(); // was cli set_cs3(0x800); /* only SD_MEDIAQ */ /* disable internal oscillator, float CS lines */ @@ -185,7 +185,7 @@ static void simpad_power_off(void) PMCR = PMCR_SF; while(1); - local_irq_enable(); /* we won't ever call it */ + raw_local_irq_enable(); /* we won't ever call it */ } Index: linux/arch/arm/mach-sa1100/time.c =================================================================== --- linux.orig/arch/arm/mach-sa1100/time.c +++ linux/arch/arm/mach-sa1100/time.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include Index: linux/arch/arm/mach-shark/core.c =================================================================== --- linux.orig/arch/arm/mach-shark/core.c +++ linux/arch/arm/mach-shark/core.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include Index: linux/arch/arm/mach-shark/leds.c =================================================================== --- linux.orig/arch/arm/mach-shark/leds.c +++ linux/arch/arm/mach-shark/leds.c @@ -33,7 +33,7 @@ static char led_state; static short hw_led_state; static short saved_state; -static DEFINE_SPINLOCK(leds_lock); +static DEFINE_RAW_SPINLOCK(leds_lock); short sequoia_read(int addr) { outw(addr,0x24); Index: linux/arch/arm/mach-versatile/core.c =================================================================== --- linux.orig/arch/arm/mach-versatile/core.c +++ linux/arch/arm/mach-versatile/core.c @@ -112,6 +112,8 @@ sic_handle_irq(unsigned int irq, struct } while (status); } +static DEFINE_IRQ_CHAINED_TYPE(sic_handle_irq); + #if 1 #define IRQ_MMCI0A IRQ_VICSOURCE22 #define IRQ_AACI IRQ_VICSOURCE24 @@ -161,7 +163,7 @@ void __init versatile_init_irq(void) } } - set_irq_handler(IRQ_VICSOURCE31, sic_handle_irq); + set_irq_chained_handler(IRQ_VICSOURCE31, sic_handle_irq); vic_unmask_irq(IRQ_VICSOURCE31); /* Do second interrupt controller */ @@ -727,7 +729,7 @@ static void versatile_leds_event(led_eve unsigned long flags; u32 val; - local_irq_save(flags); + raw_local_irq_save(flags); val = readl(VA_LEDS_BASE); switch (ledevt) { @@ -752,7 +754,7 @@ static void versatile_leds_event(led_eve } writel(val, VA_LEDS_BASE); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #endif /* CONFIG_LEDS */ Index: linux/arch/arm/mm/blockops.c =================================================================== --- linux.orig/arch/arm/mm/blockops.c +++ linux/arch/arm/mm/blockops.c @@ -20,7 +20,7 @@ extern struct cpu_cache_fns blk_cache_fn * * - kaddr - kernel address (guaranteed to be page aligned) */ -static void __attribute__((naked)) +static void notrace __attribute__((naked)) blk_flush_kern_dcache_page(void *kaddr) { asm( @@ -45,7 +45,7 @@ blk_flush_kern_dcache_page(void *kaddr) * - start - virtual start address of region * - end - virtual end address of region */ -static void __attribute__((naked)) +static void notrace __attribute__((naked)) blk_dma_inv_range_unified(unsigned long start, unsigned long end) { asm( @@ -61,7 +61,7 @@ blk_dma_inv_range_unified(unsigned long : "I" (L1_CACHE_BYTES - 1)); } -static void __attribute__((naked)) +static void notrace __attribute__((naked)) blk_dma_inv_range_harvard(unsigned long start, unsigned long end) { asm( @@ -82,7 +82,7 @@ blk_dma_inv_range_harvard(unsigned long * - start - virtual start address of region * - end - virtual end address of region */ -static void __attribute__((naked)) +static void notrace __attribute__((naked)) blk_dma_clean_range(unsigned long start, unsigned long end) { asm( @@ -97,7 +97,7 @@ blk_dma_clean_range(unsigned long start, * - start - virtual start address of region * - end - virtual end address of region */ -static void __attribute__((naked)) +static void notrace __attribute__((naked)) blk_dma_flush_range(unsigned long start, unsigned long end) { asm( Index: linux/arch/arm/mm/consistent.c =================================================================== --- linux.orig/arch/arm/mm/consistent.c +++ linux/arch/arm/mm/consistent.c @@ -30,7 +30,7 @@ * This is the page table (2MB) covering uncached, DMA consistent allocations */ static pte_t *consistent_pte; -static DEFINE_SPINLOCK(consistent_lock); +static DEFINE_RAW_SPINLOCK(consistent_lock); /* * VM region handling support. Index: linux/arch/arm/mm/copypage-v4mc.c =================================================================== --- linux.orig/arch/arm/mm/copypage-v4mc.c +++ linux/arch/arm/mm/copypage-v4mc.c @@ -29,7 +29,7 @@ #define TOP_PTE(x) pte_offset_kernel(top_pmd, x) -static DEFINE_SPINLOCK(minicache_lock); +static DEFINE_RAW_SPINLOCK(minicache_lock); /* * ARMv4 mini-dcache optimised copy_user_page @@ -43,7 +43,7 @@ static DEFINE_SPINLOCK(minicache_lock); * instruction. If your processor does not supply this, you have to write your * own copy_user_page that does the right thing. */ -static void __attribute__((naked)) +static void notrace __attribute__((naked)) mc_copy_user_page(void *from, void *to) { asm volatile( @@ -82,7 +82,7 @@ void v4_mc_copy_user_page(void *kto, con /* * ARMv4 optimised clear_user_page */ -void __attribute__((naked)) +void notrace __attribute__((naked)) v4_mc_clear_user_page(void *kaddr, unsigned long vaddr) { asm volatile( Index: linux/arch/arm/mm/copypage-v6.c =================================================================== --- linux.orig/arch/arm/mm/copypage-v6.c +++ linux/arch/arm/mm/copypage-v6.c @@ -28,7 +28,7 @@ #define TOP_PTE(x) pte_offset_kernel(top_pmd, x) -static DEFINE_SPINLOCK(v6_lock); +static DEFINE_RAW_SPINLOCK(v6_lock); /* * Copy the user page. No aliasing to deal with so we can just Index: linux/arch/arm/mm/copypage-xscale.c =================================================================== --- linux.orig/arch/arm/mm/copypage-xscale.c +++ linux/arch/arm/mm/copypage-xscale.c @@ -31,7 +31,7 @@ #define TOP_PTE(x) pte_offset_kernel(top_pmd, x) -static DEFINE_SPINLOCK(minicache_lock); +static DEFINE_RAW_SPINLOCK(minicache_lock); /* * XScale mini-dcache optimised copy_user_page @@ -41,7 +41,7 @@ static DEFINE_SPINLOCK(minicache_lock); * Dcache aliasing issue. The writes will be forwarded to the write buffer, * and merged as appropriate. */ -static void __attribute__((naked)) +static void notrace __attribute__((naked)) mc_copy_user_page(void *from, void *to) { /* @@ -104,7 +104,7 @@ void xscale_mc_copy_user_page(void *kto, /* * XScale optimised clear_user_page */ -void __attribute__((naked)) +void notrace __attribute__((naked)) xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr) { asm volatile( Index: linux/arch/arm/mm/fault-armv.c =================================================================== --- linux.orig/arch/arm/mm/fault-armv.c +++ linux/arch/arm/mm/fault-armv.c @@ -161,7 +161,7 @@ static int __init check_writebuffer(unsi { register unsigned long zero = 0, one = 1, val; - local_irq_disable(); + raw_local_irq_disable(); mb(); *p1 = one; mb(); @@ -169,7 +169,7 @@ static int __init check_writebuffer(unsi mb(); val = *p1; mb(); - local_irq_enable(); + raw_local_irq_enable(); return val != zero; } Index: linux/arch/arm/mm/fault.c =================================================================== --- linux.orig/arch/arm/mm/fault.c +++ linux/arch/arm/mm/fault.c @@ -216,7 +216,7 @@ out: return fault; } -static int +static notrace int do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { struct task_struct *tsk; @@ -316,7 +316,7 @@ no_context: * interrupt or a critical region, and should only copy the information * from the master page table, nothing more. */ -static int +static notrace int do_translation_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { @@ -362,7 +362,7 @@ bad_area: * Some section permission faults need to be handled gracefully. * They can happen due to a __{get,put}_user during an oops. */ -static int +static notrace int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { struct task_struct *tsk = current; @@ -373,7 +373,7 @@ do_sect_fault(unsigned long addr, unsign /* * This abort handler always returns "fault". */ -static int +static notrace int do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { return 1; @@ -428,7 +428,7 @@ static struct fsr_info { { do_bad, SIGBUS, 0, "unknown 31" } }; -void __init +void __init notrace hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), int sig, const char *name) { @@ -442,7 +442,7 @@ hook_fault_code(int nr, int (*fn)(unsign /* * Dispatch a data abort to the relevant handler. */ -asmlinkage void +asmlinkage notrace void do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { const struct fsr_info *inf = fsr_info + (fsr & 15) + ((fsr & (1 << 10)) >> 6); @@ -461,7 +461,7 @@ do_DataAbort(unsigned long addr, unsigne notify_die("", regs, &info, fsr, 0); } -asmlinkage void +asmlinkage notrace void do_PrefetchAbort(unsigned long addr, struct pt_regs *regs) { do_translation_fault(addr, 0, regs); Index: linux/arch/arm/mm/init.c =================================================================== --- linux.orig/arch/arm/mm/init.c +++ linux/arch/arm/mm/init.c @@ -28,7 +28,7 @@ #define TABLE_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t)) -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern void _stext, _text, _etext, __data_start, _end, __init_begin, __init_end; Index: linux/arch/arm/plat-omap/clock.c =================================================================== --- linux.orig/arch/arm/plat-omap/clock.c +++ linux/arch/arm/plat-omap/clock.c @@ -25,7 +25,7 @@ static LIST_HEAD(clocks); static DECLARE_MUTEX(clocks_sem); -static DEFINE_SPINLOCK(clockfw_lock); +static DEFINE_RAW_SPINLOCK(clockfw_lock); static void propagate_rate(struct clk * clk); /* UART clock function */ static int set_uart_rate(struct clk * clk, unsigned long rate); Index: linux/arch/arm/plat-omap/dma.c =================================================================== --- linux.orig/arch/arm/plat-omap/dma.c +++ linux/arch/arm/plat-omap/dma.c @@ -586,7 +586,7 @@ void omap_dma_unlink_lch (int lch_head, static struct lcd_dma_info { - spinlock_t lock; + raw_spinlock_t lock; int reserved; void (* callback)(u16 status, void *data); void *cb_data; @@ -948,11 +948,11 @@ void omap_clear_dma(int lch) unsigned long flags; int status; - local_irq_save(flags); + raw_local_irq_save(flags); omap_writew(omap_readw(OMAP_DMA_CCR(lch)) & ~OMAP_DMA_CCR_EN, OMAP_DMA_CCR(lch)); status = OMAP_DMA_CSR(lch); /* clear pending interrupts */ - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* Index: linux/arch/arm/plat-omap/gpio.c =================================================================== --- linux.orig/arch/arm/plat-omap/gpio.c +++ linux/arch/arm/plat-omap/gpio.c @@ -121,7 +121,7 @@ struct gpio_bank { u32 reserved_map; u32 suspend_wakeup; u32 saved_wakeup; - spinlock_t lock; + raw_spinlock_t lock; }; #define METHOD_MPUIO 0 @@ -736,7 +736,7 @@ static void gpio_irq_handler(unsigned in desc->chip->ack(irq); - bank = (struct gpio_bank *) desc->data; + bank = (struct gpio_bank *) desc->handler_data; if (bank->method == METHOD_MPUIO) isr_reg = bank->base + OMAP_MPUIO_GPIO_INT; #ifdef CONFIG_ARCH_OMAP1510 @@ -837,6 +837,8 @@ static struct irqchip mpuio_irq_chip = { .unmask = mpuio_unmask_irq }; +static DEFINE_IRQ_CHAINED_TYPE(gpio_irq_handler); + static int initialized = 0; static struct clk * gpio_ck = NULL; Index: linux/arch/arm/plat-omap/mux.c =================================================================== --- linux.orig/arch/arm/plat-omap/mux.c +++ linux/arch/arm/plat-omap/mux.c @@ -40,7 +40,7 @@ int __init_or_module omap_cfg_reg(const reg_cfg_t reg_cfg) { - static DEFINE_SPINLOCK(mux_spin_lock); + static DEFINE_RAW_SPINLOCK(mux_spin_lock); unsigned long flags; reg_cfg_set *cfg; Index: linux/arch/arm/plat-omap/pm.c =================================================================== --- linux.orig/arch/arm/plat-omap/pm.c +++ linux/arch/arm/plat-omap/pm.c @@ -81,11 +81,11 @@ void omap_pm_idle(void) * seconds for wait for interrupt. */ - local_irq_disable(); + raw_local_irq_disable(); local_fiq_disable(); if (need_resched()) { local_fiq_enable(); - local_irq_enable(); + raw_local_irq_enable(); return; } mask32 = omap_readl(ARM_SYSST); @@ -110,7 +110,7 @@ void omap_pm_idle(void) omap_sram_idle(); local_fiq_enable(); - local_irq_enable(); + raw_local_irq_enable(); } /* @@ -171,7 +171,7 @@ void omap_pm_suspend(void) * Step 1: turn off interrupts (FIXME: NOTE: already disabled) */ - local_irq_disable(); + raw_local_irq_disable(); local_fiq_disable(); /* @@ -308,7 +308,7 @@ void omap_pm_suspend(void) * Reenable interrupts */ - local_irq_enable(); + raw_local_irq_enable(); local_fiq_enable(); omap_serial_wake_trigger(0); Index: linux/arch/arm26/boot/compressed/misc.c =================================================================== --- linux.orig/arch/arm26/boot/compressed/misc.c +++ linux/arch/arm26/boot/compressed/misc.c @@ -184,6 +184,7 @@ static ulg free_mem_ptr_end; #define HEAP_SIZE 0x2000 +#define ZLIB_INFLATE_NO_INFLATE_LOCK #include "../../../../lib/inflate.c" #ifndef STANDALONE_DEBUG Index: linux/arch/arm26/kernel/time.c =================================================================== --- linux.orig/arch/arm26/kernel/time.c +++ linux/arch/arm26/kernel/time.c @@ -34,10 +34,6 @@ #include #include -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - extern unsigned long wall_jiffies; /* this needs a better home */ Index: linux/arch/cris/kernel/time.c =================================================================== --- linux.orig/arch/cris/kernel/time.c +++ linux/arch/cris/kernel/time.c @@ -32,10 +32,6 @@ #include #include -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - int have_rtc; /* used to remember if we have an RTC or not */; #define TICK_SIZE tick Index: linux/arch/frv/kernel/time.c =================================================================== --- linux.orig/arch/frv/kernel/time.c +++ linux/arch/frv/kernel/time.c @@ -34,9 +34,6 @@ extern unsigned long wall_jiffies; -u64 jiffies_64 = INITIAL_JIFFIES; -EXPORT_SYMBOL(jiffies_64); - unsigned long __nongprelbss __clkin_clock_speed_HZ; unsigned long __nongprelbss __ext_bus_clock_speed_HZ; unsigned long __nongprelbss __res_bus_clock_speed_HZ; Index: linux/arch/h8300/kernel/time.c =================================================================== --- linux.orig/arch/h8300/kernel/time.c +++ linux/arch/h8300/kernel/time.c @@ -32,10 +32,6 @@ #define TICK_SIZE (tick_nsec / 1000) -u64 jiffies_64; - -EXPORT_SYMBOL(jiffies_64); - /* * timer_interrupt() needs to keep up the real-time clock, * as well as call the "do_timer()" routine every clocktick Index: linux/arch/i386/Kconfig =================================================================== --- linux.orig/arch/i386/Kconfig +++ linux/arch/i386/Kconfig @@ -14,6 +14,10 @@ config X86 486, 586, Pentiums, and various instruction-set-compatible chips by AMD, Cyrix, and others. +config GENERIC_TIME + bool + default y + config SEMAPHORE_SLEEPERS bool default y @@ -376,16 +380,6 @@ config X86_L1_CACHE_SHIFT default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1 default "6" if MK7 || MK8 || MPENTIUMM -config RWSEM_GENERIC_SPINLOCK - bool - depends on M386 - default y - -config RWSEM_XCHGADD_ALGORITHM - bool - depends on !M386 - default y - config GENERIC_CALIBRATE_DELAY bool default y @@ -442,7 +436,7 @@ config X86_USE_PPRO_CHECKSUM config X86_USE_3DNOW bool - depends on MCYRIXIII || MK7 + depends on (MCYRIXIII || MK7) && !PREEMPT_RT default y config X86_OOSTORE @@ -466,6 +460,8 @@ config HPET_EMULATE_RTC depends on HPET_TIMER && RTC=y default y +source "kernel/time/Kconfig" + config SMP bool "Symmetric multi-processing support" ---help--- @@ -521,6 +517,20 @@ config SCHED_SMT source "kernel/Kconfig.preempt" +config RWSEM_GENERIC_SPINLOCK + bool + depends on M386 || PREEMPT_RT + default y + +config ASM_SEMAPHORES + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + depends on !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT + default y + config X86_UP_APIC bool "Local APIC support on uniprocessors" depends on !SMP && !(X86_VISWS || X86_VOYAGER) @@ -556,6 +566,16 @@ config X86_IO_APIC depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) default y +config X86_IOAPIC_FAST + bool "enhanced IO-APIC support" + depends on X86_IO_APIC + default y + help + this option will activate further optimizations in the IO-APIC + code. NOTE: this is experimental code, and disabled by default. + Symptoms of non-working systems are boot-time lockups, stray or + screaming interrupts and other interrupt related weirdnesses. + config X86_VISWS_APIC bool depends on X86_VISWS @@ -917,7 +937,7 @@ config BOOT_IOREMAP config REGPARM bool "Use register arguments (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on EXPERIMENTAL && !MCOUNT default n help Compile the kernel with -mregparm=3. This uses a different ABI Index: linux/arch/i386/Kconfig.debug =================================================================== --- linux.orig/arch/i386/Kconfig.debug +++ linux/arch/i386/Kconfig.debug @@ -18,6 +18,7 @@ config EARLY_PRINTK config DEBUG_STACKOVERFLOW bool "Check for stack overflows" depends on DEBUG_KERNEL + default y help This option will cause messages to be printed if free stack space drops below a certain limit. @@ -35,6 +36,7 @@ config KPROBES config DEBUG_STACK_USAGE bool "Stack utilization instrumentation" depends on DEBUG_KERNEL + default y help Enables the display of the minimum amount of free stack which each task has ever had available in the sysrq-T and sysrq-P debug output. @@ -69,7 +71,7 @@ config X86_FIND_SMP_CONFIG config X86_MPPARSE bool - depends on X86_LOCAL_APIC && !X86_VISWS + depends on X86_LOCAL_APIC && X86_IO_APIC && !X86_VISWS default y endmenu Index: linux/arch/i386/boot/compressed/misc.c =================================================================== --- linux.orig/arch/i386/boot/compressed/misc.c +++ linux/arch/i386/boot/compressed/misc.c @@ -15,6 +15,12 @@ #include #include +#ifdef CONFIG_MCOUNT +void notrace mcount(void) +{ +} +#endif + /* * gzip declarations */ @@ -112,7 +118,7 @@ static long free_mem_end_ptr; #define INPLACE_MOVE_ROUTINE 0x1000 #define LOW_BUFFER_START 0x2000 #define LOW_BUFFER_MAX 0x90000 -#define HEAP_SIZE 0x3000 +#define HEAP_SIZE 0x4000 static unsigned int low_buffer_end, low_buffer_size; static int high_loaded =0; static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/; @@ -125,6 +131,7 @@ static int lines, cols; static void * xquad_portio = NULL; #endif +#define ZLIB_INFLATE_NO_INFLATE_LOCK #include "../../../../lib/inflate.c" static void *malloc(int size) Index: linux/arch/i386/kernel/Makefile =================================================================== --- linux.orig/arch/i386/kernel/Makefile +++ linux/arch/i386/kernel/Makefile @@ -4,13 +4,13 @@ extra-y := head.o init_task.o vmlinux.lds -obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ +obj-y := process.o signal.o entry.o traps.o irq.o vm86.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - doublefault.o quirks.o i8237.o + doublefault.o quirks.o i8237.o i8253.o tsc.o +obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o obj-y += cpu/ -obj-y += timers/ obj-$(CONFIG_ACPI) += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca.o @@ -20,6 +20,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o smpboot.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o +obj-$(CONFIG_MCOUNT) += mcount-wrapper.o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o @@ -34,6 +35,8 @@ obj-$(CONFIG_ACPI_SRAT) += srat.o obj-$(CONFIG_HPET_TIMER) += time_hpet.o obj-$(CONFIG_EFI) += efi.o efi_stub.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_SYSFS) += switch2poll.o +obj-$(CONFIG_HPET_TIMER) += hpet.o EXTRA_AFLAGS := -traditional Index: linux/arch/i386/kernel/acpi/boot.c =================================================================== --- linux.orig/arch/i386/kernel/acpi/boot.c +++ linux/arch/i386/kernel/acpi/boot.c @@ -570,7 +570,7 @@ static int __init acpi_parse_sbf(unsigne } #ifdef CONFIG_HPET_TIMER - +#include static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) { struct acpi_table_hpet *hpet_tbl; @@ -592,6 +592,7 @@ static int __init acpi_parse_hpet(unsign #ifdef CONFIG_X86_64 vxtime.hpet_address = hpet_tbl->addr.addrl | ((long)hpet_tbl->addr.addrh << 32); + hpet_address = vxtime.hpet_address; printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", hpet_tbl->id, vxtime.hpet_address); @@ -600,10 +601,10 @@ static int __init acpi_parse_hpet(unsign extern unsigned long hpet_address; hpet_address = hpet_tbl->addr.addrl; - printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", - hpet_tbl->id, hpet_address); } -#endif /* X86 */ +#endif /* X86 */ + printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", + hpet_tbl->id, hpet_address); return 0; } @@ -612,7 +613,8 @@ static int __init acpi_parse_hpet(unsign #endif #ifdef CONFIG_X86_PM_TIMER -extern u32 pmtmr_ioport; +u32 acpi_pmtmr_ioport; +int acpi_pmtmr_buggy; #endif static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) @@ -640,14 +642,22 @@ static int __init acpi_parse_fadt(unsign ACPI_ADR_SPACE_SYSTEM_IO) return 0; - pmtmr_ioport = fadt->xpm_tmr_blk.address; + acpi_pmtmr_ioport = fadt->xpm_tmr_blk.address; + /* + * "X" fields are optional extensions to the original V1.0 + * fields, so we must selectively expand V1.0 fields if the + * corresponding X field is zero. + */ + if (!acpi_pmtmr_ioport) + acpi_pmtmr_ioport = fadt->V1_pm_tmr_blk; } else { /* FADT rev. 1 */ - pmtmr_ioport = fadt->V1_pm_tmr_blk; + acpi_pmtmr_ioport = fadt->V1_pm_tmr_blk; } - if (pmtmr_ioport) - printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", - pmtmr_ioport); + + if (acpi_pmtmr_ioport) + printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", acpi_pmtmr_ioport); + #endif return 0; } Index: linux/arch/i386/kernel/apic.c =================================================================== --- linux.orig/arch/i386/kernel/apic.c +++ linux/arch/i386/kernel/apic.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -50,6 +51,23 @@ int enable_local_apic __initdata = 0; /* */ int apic_verbosity; +static unsigned int calibration_result; + +static void lapic_next_event(unsigned long evt); +static void lapic_timer_setup(int mode); + +static struct clock_event lapic_clockevent = { + .name = "lapic", + .capabilities = CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE | + CLOCK_HAS_IRQHANDLER +#ifdef CONFIG_SMP + | CLOCK_CAP_UPDATE +#endif + , + .shift = 32, + .set_mode = lapic_timer_setup, + .set_next_event = lapic_next_event, +}; static void apic_pm_activate(void); @@ -92,10 +110,6 @@ void __init apic_intr_init(void) /* Using APIC to generate smp_local_timer_interrupt? */ int using_apic_timer = 0; -static DEFINE_PER_CPU(int, prof_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_old_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_counter) = 1; - static int enabled_via_apicbase; void enable_NMI_through_LVT0 (void * dummy) @@ -559,15 +573,21 @@ void __devinit setup_local_APIC(void) * If Linux enabled the LAPIC against the BIOS default * disable it down before re-entering the BIOS on shutdown. * Otherwise the BIOS may get confused and not power-off. + * Additionally clear all LVT entries before disable_local_APIC + * for the case where Linux didn't enable the LAPIC. */ void lapic_shutdown(void) { - if (!cpu_has_apic || !enabled_via_apicbase) + if (!cpu_has_apic) return; - local_irq_disable(); - disable_local_APIC(); - local_irq_enable(); + raw_local_irq_disable(); + clear_local_APIC(); + + if (enabled_via_apicbase) + disable_local_APIC(); + + raw_local_irq_enable(); } #ifdef CONFIG_PM @@ -611,9 +631,9 @@ static int lapic_suspend(struct sys_devi apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); - local_irq_save(flags); + raw_local_irq_save(flags); disable_local_APIC(); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -625,7 +645,7 @@ static int lapic_resume(struct sys_devic if (!apic_pm_state.active) return 0; - local_irq_save(flags); + raw_local_irq_save(flags); /* * Make sure the APICBASE points to the right address @@ -656,7 +676,7 @@ static int lapic_resume(struct sys_devic apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -849,10 +869,10 @@ fake_ioapic_page: ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); + set_fixmap_nocache(idx, ioapic_phys); + printk(KERN_DEBUG "faked IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx), ioapic_phys); } - set_fixmap_nocache(idx, ioapic_phys); - printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); idx++; } } @@ -869,6 +889,11 @@ fake_ioapic_page: */ /* + * FIXME: Move this to i8253.h. There is no need to keep the access to + * the PIT scattered all around the place -tglx + */ + +/* * The timer chip is already set up at HZ interrupts per second here, * but we do not accept timer interrupts yet. We only allow the BP * to calibrate. @@ -926,12 +951,16 @@ void (*wait_timer_tick)(void) __devinitd #define APIC_DIVISOR 16 -static void __setup_APIC_LVTT(unsigned int clocks) +static void __setup_APIC_LVTT(unsigned int clocks, int oneshot) { unsigned int lvtt_value, tmp_value, ver; ver = GET_APIC_VERSION(apic_read(APIC_LVR)); - lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; + + lvtt_value = LOCAL_TIMER_VECTOR; + if (!oneshot) + lvtt_value |= APIC_LVT_TIMER_PERIODIC; + if (!APIC_INTEGRATED(ver)) lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); apic_write_around(APIC_LVTT, lvtt_value); @@ -944,23 +973,27 @@ static void __setup_APIC_LVTT(unsigned i & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | APIC_TDR_DIV_16); - apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); + if (!oneshot) + apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); } -static void __devinit setup_APIC_timer(unsigned int clocks) +static void lapic_next_event(unsigned long evt) { - unsigned long flags; - - local_irq_save(flags); + apic_write_around(APIC_TMICT, evt); +} - /* - * Wait for IRQ0's slice: - */ - wait_timer_tick(); +static void lapic_timer_setup(int mode) +{ + unsigned long flags; - __setup_APIC_LVTT(clocks); + raw_local_irq_save(flags); + __setup_APIC_LVTT(calibration_result, mode == CLOCK_EVT_ONESHOT); + raw_local_irq_restore(flags); +} - local_irq_restore(flags); +static void __devinit setup_APIC_timer(void) +{ + setup_local_clockevent(&lapic_clockevent, CPU_MASK_NONE); } /* @@ -969,6 +1002,8 @@ static void __devinit setup_APIC_timer(u * to calibrate, since some later bootup code depends on getting * the first irq? Ugh. * + * TODO: Fix this rather than saying "Ugh" -tglx + * * We want to do the calibration only once since we * want to have local timer irqs syncron. CPUs connected * by the same APIC bus have the very same bus frequency. @@ -991,7 +1026,7 @@ static int __init calibrate_APIC_clock(v * value into the APIC clock, we just want to get the * counter running for calibration. */ - __setup_APIC_LVTT(1000000000); + __setup_APIC_LVTT(1000000000, 0); /* * The timer chip counts down to zero. Let's wait @@ -1028,6 +1063,13 @@ static int __init calibrate_APIC_clock(v result = (tt1-tt2)*APIC_DIVISOR/LOOPS; + /* Calculate the scaled math multiplication factor */ + lapic_clockevent.mult = div_sc32(tt1-tt2, TICK_NSEC * LOOPS); + lapic_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + lapic_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &lapic_clockevent); + if (cpu_has_tsc) apic_printk(APIC_VERBOSE, "..... CPU clock speed is " "%ld.%04ld MHz.\n", @@ -1042,27 +1084,26 @@ static int __init calibrate_APIC_clock(v return result; } -static unsigned int calibration_result; - void __init setup_boot_APIC_clock(void) { + unsigned long flags; apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"); using_apic_timer = 1; - local_irq_disable(); + raw_local_irq_save(flags); calibration_result = calibrate_APIC_clock(); /* * Now set up the timer for real. */ - setup_APIC_timer(calibration_result); + setup_APIC_timer(); - local_irq_enable(); + raw_local_irq_restore(flags); } void __devinit setup_secondary_APIC_clock(void) { - setup_APIC_timer(calibration_result); + setup_APIC_timer(); } void __devinit disable_APIC_timer(void) @@ -1085,6 +1126,8 @@ void enable_APIC_timer(void) } } +static DEFINE_PER_CPU(int, prof_multiplier) = 1; + /* * the frequency of the profiling timer can be changed * by writing a multiplier value into /proc/profile. @@ -1112,60 +1155,6 @@ int setup_profiling_timer(unsigned int m return 0; } - -#undef APIC_DIVISOR - -/* - * Local timer interrupt handler. It does both profiling and - * process statistics/rescheduling. - * - * We do profiling in every local tick, statistics/rescheduling - * happen only every 'profiling multiplier' ticks. The default - * multiplier is 1 and it can be changed by writing the new multiplier - * value into /proc/profile. - */ - -inline void smp_local_timer_interrupt(struct pt_regs * regs) -{ - int cpu = smp_processor_id(); - - profile_tick(CPU_PROFILING, regs); - if (--per_cpu(prof_counter, cpu) <= 0) { - /* - * The multiplier may have changed since the last time we got - * to this point as a result of the user writing to - * /proc/profile. In this case we need to adjust the APIC - * timer accordingly. - * - * Interrupts are already masked off at this point. - */ - per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu); - if (per_cpu(prof_counter, cpu) != - per_cpu(prof_old_multiplier, cpu)) { - __setup_APIC_LVTT( - calibration_result/ - per_cpu(prof_counter, cpu)); - per_cpu(prof_old_multiplier, cpu) = - per_cpu(prof_counter, cpu); - } - -#ifdef CONFIG_SMP - update_process_times(user_mode_vm(regs)); -#endif - } - - /* - * We take the 'long' return path, and there every subsystem - * grabs the apropriate locks (kernel lock/ irq lock). - * - * we might want to decouple profiling from the 'long path', - * and do the profiling totally in assembly. - * - * Currently this isn't too much of an issue (performance wise), - * we can take more than 100K local irqs per second on a 100 MHz P5. - */ -} - /* * Local APIC timer interrupt. This is the most natural way for doing * local interrupts, but local timer interrupts can be emulated by @@ -1175,7 +1164,7 @@ inline void smp_local_timer_interrupt(st * interrupt as well. Thus we cannot inline the local irq ... ] */ -fastcall void smp_apic_timer_interrupt(struct pt_regs *regs) +fastcall notrace void smp_apic_timer_interrupt(struct pt_regs *regs) { int cpu = smp_processor_id(); @@ -1184,6 +1173,8 @@ fastcall void smp_apic_timer_interrupt(s */ per_cpu(irq_stat, cpu).apic_timer_irqs++; + trace_special(regs->eip, 0, 0); + /* * NOTE! We'd better ACK the irq immediately, * because timer handling can be slow. @@ -1195,7 +1186,17 @@ fastcall void smp_apic_timer_interrupt(s * interrupt lock, which is the WrongThing (tm) to do. */ irq_enter(); - smp_local_timer_interrupt(regs); + /* + * If the task is currently running in user mode, don't + * detect soft lockups. If CONFIG_DETECT_SOFTLOCKUP is not + * configured, this should be optimized out. + */ + if (user_mode(regs)) + touch_light_softlockup_watchdog(); + + if (lapic_clockevent.event_handler) + lapic_clockevent.event_handler(regs); + irq_exit(); } @@ -1250,6 +1251,7 @@ fastcall void smp_error_interrupt(struct */ printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", smp_processor_id(), v , v1); + dump_stack(); irq_exit(); } Index: linux/arch/i386/kernel/apm.c =================================================================== --- linux.orig/arch/i386/kernel/apm.c +++ linux/arch/i386/kernel/apm.c @@ -552,9 +552,9 @@ static inline void apm_restore_cpus(cpum */ #define APM_DO_CLI \ if (apm_info.allow_ints) \ - local_irq_enable(); \ + raw_local_irq_enable(); \ else \ - local_irq_disable(); + raw_local_irq_disable(); #ifdef APM_ZERO_SEGS # define APM_DECL_SEGS \ @@ -604,12 +604,12 @@ static u8 apm_bios_call(u32 func, u32 eb save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8]; per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc; - local_save_flags(flags); + raw_local_save_flags(flags); APM_DO_CLI; APM_DO_SAVE_SEGS; apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi); APM_DO_RESTORE_SEGS; - local_irq_restore(flags); + raw_local_irq_restore(flags); per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = save_desc_40; put_cpu(); apm_restore_cpus(cpus); @@ -647,12 +647,12 @@ static u8 apm_bios_call_simple(u32 func, save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8]; per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc; - local_save_flags(flags); + raw_local_save_flags(flags); APM_DO_CLI; APM_DO_SAVE_SEGS; error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax); APM_DO_RESTORE_SEGS; - local_irq_restore(flags); + raw_local_irq_restore(flags); __get_cpu_var(cpu_gdt_table)[0x40 / 8] = save_desc_40; put_cpu(); apm_restore_cpus(cpus); @@ -1194,7 +1194,7 @@ static int suspend(int vetoable) } device_suspend(PMSG_SUSPEND); - local_irq_disable(); + raw_local_irq_disable(); device_power_down(PMSG_SUSPEND); /* serialize with the timer interrupt */ @@ -1210,14 +1210,14 @@ static int suspend(int vetoable) */ spin_unlock(&i8253_lock); write_sequnlock(&xtime_lock); - local_irq_enable(); + raw_local_irq_enable(); save_processor_state(); err = set_system_power_state(APM_STATE_SUSPEND); ignore_normal_resume = 1; restore_processor_state(); - local_irq_disable(); + raw_local_irq_disable(); write_seqlock(&xtime_lock); spin_lock(&i8253_lock); reinit_timer(); @@ -1232,7 +1232,7 @@ static int suspend(int vetoable) apm_error("suspend", err); err = (err == APM_SUCCESS) ? 0 : -EIO; device_power_up(); - local_irq_enable(); + raw_local_irq_enable(); device_resume(); pm_send_all(PM_RESUME, (void *)0); queue_event(APM_NORMAL_RESUME, NULL); @@ -1251,22 +1251,22 @@ static void standby(void) { int err; - local_irq_disable(); + raw_local_irq_disable(); device_power_down(PMSG_SUSPEND); /* serialize with the timer interrupt */ write_seqlock(&xtime_lock); /* If needed, notify drivers here */ get_time_diff(); write_sequnlock(&xtime_lock); - local_irq_enable(); + raw_local_irq_enable(); err = set_system_power_state(APM_STATE_STANDBY); if ((err != APM_SUCCESS) && (err != APM_NO_ERROR)) apm_error("standby", err); - local_irq_disable(); + raw_local_irq_disable(); device_power_up(); - local_irq_enable(); + raw_local_irq_enable(); } static apm_event_t get_event(void) Index: linux/arch/i386/kernel/cpu/cpufreq/longhaul.c =================================================================== --- linux.orig/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ linux/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -144,7 +144,7 @@ static void do_powersaver(union msr_long longhaul->bits.RevisionKey = 0; preempt_disable(); - local_irq_save(flags); + raw_local_irq_save(flags); /* * get current pci bus master state for all devices @@ -166,11 +166,11 @@ static void do_powersaver(union msr_long outb(0xFE,0x21); /* TMR0 only */ outb(0xFF,0x80); /* delay */ - safe_halt(); + raw_safe_halt(); wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); halt(); - local_irq_disable(); + raw_local_irq_disable(); outb(tmp_mask,0x21); /* restore mask */ @@ -184,7 +184,7 @@ static void do_powersaver(union msr_long pci_write_config_byte(dev, PCI_COMMAND, pci_cmd); } } while (dev != NULL); - local_irq_restore(flags); + raw_local_irq_restore(flags); preempt_enable(); /* disable bus ratio bit */ @@ -245,16 +245,16 @@ static void longhaul_setstate(unsigned i /* Enable software clock multiplier */ bcr2.bits.ESOFTBF = 1; bcr2.bits.CLOCKMUL = clock_ratio_index; - local_irq_disable(); + raw_local_irq_disable(); wrmsrl (MSR_VIA_BCR2, bcr2.val); - safe_halt(); + raw_safe_halt(); /* Disable software clock multiplier */ rdmsrl (MSR_VIA_BCR2, bcr2.val); bcr2.bits.ESOFTBF = 0; - local_irq_disable(); + raw_local_irq_disable(); wrmsrl (MSR_VIA_BCR2, bcr2.val); - local_irq_enable(); + raw_local_irq_enable(); break; /* Index: linux/arch/i386/kernel/cpu/mtrr/cyrix.c =================================================================== --- linux.orig/arch/i386/kernel/cpu/mtrr/cyrix.c +++ linux/arch/i386/kernel/cpu/mtrr/cyrix.c @@ -17,7 +17,7 @@ cyrix_get_arr(unsigned int reg, unsigned arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ /* Save flags and disable interrupts */ - local_irq_save(flags); + raw_local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ @@ -28,7 +28,7 @@ cyrix_get_arr(unsigned int reg, unsigned setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ /* Enable interrupts if it was enabled previously */ - local_irq_restore(flags); + raw_local_irq_restore(flags); shift = ((unsigned char *) base)[1] & 0x0f; *base >>= PAGE_SHIFT; Index: linux/arch/i386/kernel/cpu/mtrr/generic.c =================================================================== --- linux.orig/arch/i386/kernel/cpu/mtrr/generic.c +++ linux/arch/i386/kernel/cpu/mtrr/generic.c @@ -234,7 +234,7 @@ static unsigned long set_mtrr_state(u32 static unsigned long cr4 = 0; static u32 deftype_lo, deftype_hi; -static DEFINE_SPINLOCK(set_atomicity_lock); +static DEFINE_RAW_SPINLOCK(set_atomicity_lock); /* * Since we are disabling the cache don't allow any interrupts - they @@ -296,14 +296,14 @@ static void generic_set_all(void) unsigned long mask, count; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); prepare_set(); /* Actually set the state */ mask = set_mtrr_state(deftype_lo,deftype_hi); post_set(); - local_irq_restore(flags); + raw_local_irq_restore(flags); /* Use the atomic bitops to update the global mask */ for (count = 0; count < sizeof mask * 8; ++count) { @@ -331,7 +331,7 @@ static void generic_set_mtrr(unsigned in vr = &mtrr_state.var_ranges[reg]; - local_irq_save(flags); + raw_local_irq_save(flags); prepare_set(); if (size == 0) { @@ -350,7 +350,7 @@ static void generic_set_mtrr(unsigned in } post_set(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type) Index: linux/arch/i386/kernel/cpu/mtrr/main.c =================================================================== --- linux.orig/arch/i386/kernel/cpu/mtrr/main.c +++ linux/arch/i386/kernel/cpu/mtrr/main.c @@ -146,7 +146,7 @@ static void ipi_handler(void *info) struct set_mtrr_data *data = info; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); atomic_dec(&data->count); while(!atomic_read(&data->gate)) @@ -164,7 +164,7 @@ static void ipi_handler(void *info) cpu_relax(); atomic_dec(&data->count); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #endif @@ -225,7 +225,7 @@ static void set_mtrr(unsigned int reg, u if (smp_call_function(ipi_handler, &data, 1, 0) != 0) panic("mtrr: timed out waiting for other CPUs\n"); - local_irq_save(flags); + raw_local_irq_save(flags); while(atomic_read(&data.count)) cpu_relax(); @@ -259,7 +259,7 @@ static void set_mtrr(unsigned int reg, u while(atomic_read(&data.count)) cpu_relax(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /** @@ -687,11 +687,11 @@ void mtrr_ap_init(void) * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to * prevent mtrr entry changes */ - local_irq_save(flags); + raw_local_irq_save(flags); mtrr_if->set_all(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static int __init mtrr_init_finialize(void) Index: linux/arch/i386/kernel/cpu/mtrr/state.c =================================================================== --- linux.orig/arch/i386/kernel/cpu/mtrr/state.c +++ linux/arch/i386/kernel/cpu/mtrr/state.c @@ -12,7 +12,7 @@ void set_mtrr_prepare_save(struct set_mt unsigned int cr0; /* Disable interrupts locally */ - local_irq_save(ctxt->flags); + raw_local_irq_save(ctxt->flags); if (use_intel() || is_cpu(CYRIX)) { @@ -73,6 +73,6 @@ void set_mtrr_done(struct set_mtrr_conte write_cr4(ctxt->cr4val); } /* Re-enable interrupts locally (if enabled previously) */ - local_irq_restore(ctxt->flags); + raw_local_irq_restore(ctxt->flags); } Index: linux/arch/i386/kernel/entry.S =================================================================== --- linux.orig/arch/i386/kernel/entry.S +++ linux/arch/i386/kernel/entry.S @@ -76,10 +76,10 @@ NT_MASK = 0x00004000 VM_MASK = 0x00020000 #ifdef CONFIG_PREEMPT -#define preempt_stop cli +# define preempt_stop cli #else -#define preempt_stop -#define resume_kernel restore_nocheck +# define preempt_stop +# define resume_kernel restore_nocheck #endif #define SAVE_ALL \ @@ -160,14 +160,17 @@ ENTRY(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) cli + cmpl $0, kernel_preemption + jz restore_nocheck cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_nocheck need_resched: movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl - jz restore_all + jz restore_nocheck testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? - jz restore_all + jz restore_nocheck + cli call preempt_schedule_irq jmp need_resched #endif @@ -200,6 +203,11 @@ sysenter_past_esp: pushl %eax SAVE_ALL +#ifdef CONFIG_LATENCY_TRACE + pushl %edx; pushl %ecx; pushl %ebx; pushl %eax + call sys_call + popl %eax; popl %ebx; popl %ecx; popl %edx +#endif GET_THREAD_INFO(%ebp) /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ @@ -213,6 +221,11 @@ sysenter_past_esp: movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work +#ifdef CONFIG_LATENCY_TRACE + pushl %eax + call sys_ret + popl %eax +#endif /* if something modifies registers it must also disable sysexit */ movl EIP(%esp), %edx movl OLDESP(%esp), %ecx @@ -225,6 +238,11 @@ sysenter_past_esp: ENTRY(system_call) pushl %eax # save orig_eax SAVE_ALL +#ifdef CONFIG_LATENCY_TRACE + pushl %edx; pushl %ecx; pushl %ebx; pushl %eax + call sys_call + popl %eax; popl %ebx; popl %ecx; popl %edx +#endif GET_THREAD_INFO(%ebp) # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ @@ -254,6 +272,17 @@ restore_all: cmpl $((4 << 8) | 3), %eax je ldt_ss # returning to user-space with LDT SS restore_nocheck: +#if defined(CONFIG_CRITICAL_IRQSOFF_TIMING) || defined(CONFIG_LATENCY_TRACE) + pushl %eax +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + call trace_irqs_on +#endif +#ifdef CONFIG_LATENCY_TRACE + call sys_ret +#endif + popl %eax +#endif +restore_nocheck_nmi: RESTORE_REGS addl $4, %esp 1: iret @@ -297,18 +326,19 @@ ldt_ss: # perform work that needs to be done immediately before resumption ALIGN work_pending: - testb $_TIF_NEED_RESCHED, %cl + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %ecx jz work_notifysig work_resched: - call schedule - cli # make sure we don't miss an interrupt + cli + call __schedule + # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? jz restore_all - testb $_TIF_NEED_RESCHED, %cl + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %ecx jnz work_resched work_notifysig: # deal with pending signals and @@ -351,6 +381,11 @@ syscall_trace_entry: syscall_exit_work: testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl jz work_pending +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + pushl %eax + call trace_irqs_on + popl %eax +#endif sti # could let do_syscall_trace() call # schedule() instead movl %esp, %eax @@ -412,9 +447,16 @@ ENTRY(irq_entries_start) vector=vector+1 .endr +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING +# define TRACE_IRQS_OFF call trace_irqs_off_lowlevel; +#else +# define TRACE_IRQS_OFF +#endif + ALIGN common_interrupt: SAVE_ALL + TRACE_IRQS_OFF movl %esp,%eax call do_IRQ jmp ret_from_intr @@ -423,6 +465,7 @@ common_interrupt: ENTRY(name) \ pushl $nr-256; \ SAVE_ALL \ + TRACE_IRQS_OFF \ movl %esp,%eax; \ call smp_/**/name; \ jmp ret_from_intr; @@ -552,7 +595,7 @@ nmi_stack_correct: xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi - jmp restore_all + jmp restore_nocheck_nmi nmi_stack_fixup: FIX_STACK(12,nmi_stack_correct, 1) Index: linux/arch/i386/kernel/hpet.c =================================================================== --- /dev/null +++ linux/arch/i386/kernel/hpet.c @@ -0,0 +1,69 @@ +#include +#include +#include +#include + +#include +#include + +#define HPET_MASK 0xFFFFFFFF +#define HPET_SHIFT 22 + +/* FSEC = 10^-15 NSEC = 10^-9 */ +#define FSEC_PER_NSEC 1000000 + +static void *hpet_ptr; + +static cycle_t read_hpet(void) +{ + return (cycle_t)readl(hpet_ptr); +} + +struct clocksource clocksource_hpet = { + .name = "hpet", + .rating = 250, + .read = read_hpet, + .mask = (cycle_t)HPET_MASK, + .mult = 0, /* set below */ + .shift = HPET_SHIFT, + .is_continuous = 1, +}; + +static int __init init_hpet_clocksource(void) +{ + unsigned long hpet_period; + void __iomem* hpet_base; + u64 tmp; + + if (!hpet_address) + return -ENODEV; + + /* calculate the hpet address: */ + hpet_base = + (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE); + hpet_ptr = hpet_base + HPET_COUNTER; + + /* calculate the frequency: */ + hpet_period = readl(hpet_base + HPET_PERIOD); + + /* + * hpet period is in femto seconds per cycle + * so we need to convert this to ns/cyc units + * aproximated by mult/2^shift + * + * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift + * fsec/cyc * 1ns/1000000fsec * 2^shift = mult + * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult + * (fsec/cyc << shift)/1000000 = mult + * (hpet_period << shift)/FSEC_PER_NSEC = mult + */ + tmp = (u64)hpet_period << HPET_SHIFT; + do_div(tmp, FSEC_PER_NSEC); + clocksource_hpet.mult = (u32)tmp; + + register_clocksource(&clocksource_hpet); + + return 0; +} + +module_init(init_hpet_clocksource); Index: linux/arch/i386/kernel/i386_ksyms.c =================================================================== --- linux.orig/arch/i386/kernel/i386_ksyms.c +++ linux/arch/i386/kernel/i386_ksyms.c @@ -6,10 +6,12 @@ /* This is definitely a GPL-only symbol */ EXPORT_SYMBOL_GPL(cpu_gdt_table); -EXPORT_SYMBOL(__down_failed); -EXPORT_SYMBOL(__down_failed_interruptible); -EXPORT_SYMBOL(__down_failed_trylock); -EXPORT_SYMBOL(__up_wakeup); +#ifdef CONFIG_ASM_SEMAPHORES +EXPORT_SYMBOL(__compat_down_failed); +EXPORT_SYMBOL(__compat_down_failed_interruptible); +EXPORT_SYMBOL(__compat_down_failed_trylock); +EXPORT_SYMBOL(__compat_up_wakeup); +#endif /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); @@ -25,7 +27,7 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(strstr); -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && defined(CONFIG_ASM_SEMAPHORES) extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); EXPORT_SYMBOL(__write_lock_failed); Index: linux/arch/i386/kernel/i8253.c =================================================================== --- /dev/null +++ linux/arch/i386/kernel/i8253.c @@ -0,0 +1,138 @@ +/* + * i8253.c 8253/PIT functions + * + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "io_ports.h" + +DEFINE_RAW_SPINLOCK(i8253_lock); +EXPORT_SYMBOL(i8253_lock); + +static void init_pit_timer(int mode) +{ + unsigned long flags; + + spin_lock_irqsave(&i8253_lock, flags); + + if (mode != CLOCK_EVT_ONESHOT) { + /* binary, mode 2, LSB/MSB, ch 0 */ + outb_p(0x34, PIT_MODE); + udelay(10); + outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ + outb(LATCH >> 8 , PIT_CH0); /* MSB */ + } else { + /* One shot setup */ + outb_p(0x38, PIT_MODE); + udelay(10); + } + + spin_unlock_irqrestore(&i8253_lock, flags); +} + +static void pit_next_event(unsigned long evt) +{ + unsigned long flags; + + spin_lock_irqsave(&i8253_lock, flags); + outb_p(evt & 0xff , PIT_CH0); /* LSB */ + outb(evt >> 8 , PIT_CH0); /* MSB */ + spin_unlock_irqrestore(&i8253_lock, flags); +} + +static struct clock_event pit_clockevent = { + .name = "pit", + .capabilities = CLOCK_CAP_TICK +#ifndef CONFIG_SMP + | CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE | + CLOCK_CAP_UPDATE +#endif + , + .set_mode = init_pit_timer, + .set_next_event = pit_next_event, + .start_event = io_apic_timer_ack, + .end_event = mca_timer_ack, + .shift = 32, + .irq = 0, +}; + +void setup_pit_timer(void) +{ + pit_clockevent.mult = div_sc32(CLOCK_TICK_RATE, NSEC_PER_SEC); + pit_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFF, &pit_clockevent); + pit_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &pit_clockevent); + setup_global_clockevent(&pit_clockevent, CPU_MASK_NONE); +} + +/* + * Since the PIT overflows every tick, its not very useful + * to just read by itself. So use jiffies to emulate a free + * running counter: + */ +static cycle_t pit_read(void) +{ + unsigned long flags, seq; + int count; + u64 jifs; + + do { + seq = read_seqbegin(&xtime_lock); + + spin_lock_irqsave(&i8253_lock, flags); + outb_p(0x00, PIT_MODE); /* latch the count ASAP */ + count = inb_p(PIT_CH0); /* read the latched count */ + count |= inb_p(PIT_CH0) << 8; + + /* VIA686a test code... reset the latch if count > max + 1 */ + if (count > LATCH) { + outb_p(0x34, PIT_MODE); + outb_p(LATCH & 0xff, PIT_CH0); + outb(LATCH >> 8, PIT_CH0); + count = LATCH - 1; + } + spin_unlock_irqrestore(&i8253_lock, flags); + + jifs = jiffies_64; + } while (read_seqretry(&xtime_lock, seq)); + + jifs -= INITIAL_JIFFIES; + count = (LATCH-1) - count; + + return (cycle_t)(jifs * LATCH) + count; +} + +static struct clocksource clocksource_pit = { + .name = "pit", + .rating = 110, + .read = pit_read, + .mask = (cycle_t)-1, + .mult = 0, + .shift = 20, +}; + +static int __init init_pit_clocksource(void) +{ + /* TODO: bogus limit of 4 CPUs? --mingo */ + if (num_possible_cpus() > 4) /* PIT does not scale! */ + return 0; + + clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); + register_clocksource(&clocksource_pit); + + return 0; +} +module_init(init_pit_clocksource); Index: linux/arch/i386/kernel/i8259.c =================================================================== --- linux.orig/arch/i386/kernel/i8259.c +++ linux/arch/i386/kernel/i8259.c @@ -35,7 +35,7 @@ * moves to arch independent land */ -DEFINE_SPINLOCK(i8259A_lock); +DEFINE_RAW_SPINLOCK(i8259A_lock); static void end_8259A_irq (unsigned int irq) { @@ -366,7 +366,7 @@ static irqreturn_t math_error_irq(int cp * New motherboards sometimes make IRQ 13 be a PCI interrupt, * so allow interrupt sharing. */ -static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL }; +static struct irqaction fpu_irq = { math_error_irq, SA_NODELAY, CPU_MASK_NONE, "fpu", NULL, NULL }; void __init init_ISA_irqs (void) { @@ -422,12 +422,6 @@ void __init init_IRQ(void) intr_init_hook(); /* - * Set the clock to HZ Hz, we already have a valid - * vector now: - */ - setup_pit_timer(); - - /* * External FPU? Set up irq13 if so, for * original braindamaged IBM FERR coupling. */ Index: linux/arch/i386/kernel/init_task.c =================================================================== --- linux.orig/arch/i386/kernel/init_task.c +++ linux/arch/i386/kernel/init_task.c @@ -10,8 +10,8 @@ #include #include -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); Index: linux/arch/i386/kernel/io_apic.c =================================================================== --- linux.orig/arch/i386/kernel/io_apic.c +++ linux/arch/i386/kernel/io_apic.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -46,7 +47,7 @@ int (*ioapic_renumber_irq)(int ioapic, int irq); atomic_t irq_mis_count; -static DEFINE_SPINLOCK(ioapic_lock); +static DEFINE_RAW_SPINLOCK(ioapic_lock); /* * Is the SiS APIC rmw bug present ? @@ -54,11 +55,6 @@ static DEFINE_SPINLOCK(ioapic_lock); */ int sis_apic_bug = -1; -/* - * # of IRQ routing registers - */ -int nr_ioapic_registers[MAX_IO_APICS]; - int disable_timer_pin_1 __initdata; /* @@ -87,6 +83,27 @@ int vector_irq[NR_VECTORS] __read_mostly #define vector_to_irq(vector) (vector) #endif +static int timer_ack; + +void io_apic_timer_ack(void *priv) +{ + unsigned long flags; + + if (timer_ack) { + /* + * Subtle, when I/O APICs are used we have to ack timer IRQ + * manually to reset the IRR bit for do_slow_gettimeoffset(). + * This will also deassert NMI lines for the watchdog if run + * on an 82489DX-based system. + */ + spin_lock_irqsave(&i8259A_lock, flags); + outb(0x0c, PIC_MASTER_OCW3); + /* Ack the IRQ; AEOI will end it automatically. */ + inb(PIC_MASTER_POLL); + spin_unlock_irqrestore(&i8259A_lock, flags); + } +} + /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super @@ -130,19 +147,133 @@ static void __init replace_pin_at_irq(un } } +#ifdef CONFIG_X86_IOAPIC_FAST +# define IOAPIC_CACHE +#endif + +struct ioapic_data_struct { + struct sys_device dev; + int nr_registers; // # of IRQ routing registers + volatile unsigned int *base; + struct IO_APIC_route_entry *entry; +#ifdef IOAPIC_CACHE + unsigned int reg_set; + u32 cached_val[0]; +#endif +}; + +static struct ioapic_data_struct *ioapic_data[MAX_IO_APICS]; + +int nr_ioapic_registers(int apic) +{ + return ioapic_data[apic]->nr_registers; +} + +static inline unsigned int __raw_io_apic_read(struct ioapic_data_struct *ioapic, unsigned int reg) +{ +# ifdef IOAPIC_CACHE + ioapic->reg_set = reg; +# endif + ioapic->base[0] = reg; + return ioapic->base[4]; +} + + +# ifdef IOAPIC_CACHE +static void __init ioapic_cache_init(struct ioapic_data_struct *ioapic) +{ + int reg; + for (reg = 0; reg < (0x10 + 2 * ioapic->nr_registers); reg++) + ioapic->cached_val[reg] = __raw_io_apic_read(ioapic, reg); +} +# endif + + +static unsigned int raw_io_apic_read(struct ioapic_data_struct *ioapic, unsigned int reg) +{ + unsigned int val = __raw_io_apic_read(ioapic, reg); + +# ifdef IOAPIC_CACHE + ioapic->cached_val[reg] = val; +# endif + return val; +} + +static unsigned int io_apic_read(struct ioapic_data_struct *ioapic, unsigned int reg) +{ +# ifdef IOAPIC_CACHE + if (likely(!sis_apic_bug)) { + ioapic->reg_set = -1; + return ioapic->cached_val[reg]; + } +# endif + return raw_io_apic_read(ioapic, reg); +} + +static void io_apic_write(struct ioapic_data_struct *ioapic, unsigned int reg, unsigned int val) +{ +# ifdef IOAPIC_CACHE + ioapic->cached_val[reg] = val; + ioapic->reg_set = reg; +# endif + ioapic->base[0] = reg; + ioapic->base[4] = val; +} + + +/* + * Some systems need a POST flush or else level-triggered interrupts + * generate lots of spurious interrupts due to the POST-ed write not + * reaching the IOAPIC before the IRQ is ACK-ed in the local APIC. + * + * It seems most systems need this - disable the optimization for now. + */ +#ifndef CONFIG_X86_IOAPIC_FAST +# define IOAPIC_POSTFLUSH +#endif + +/* + * Re-write a value: to be used for read-modify-write + * cycles where the read already set up the index register. + * + * Older SiS APIC requires we rewrite the index regiser + */ +static void io_apic_modify(struct ioapic_data_struct *ioapic, unsigned int reg, unsigned int val) +{ +#ifdef IOAPIC_CACHE + ioapic->cached_val[reg] = val; + if (ioapic->reg_set != reg || sis_apic_bug) { + ioapic->reg_set = reg; +#else + if (unlikely(sis_apic_bug)) { +#endif + ioapic->base[0] = reg; + } + ioapic->base[4] = val; +#ifndef IOAPIC_POSTFLUSH + if (unlikely(sis_apic_bug)) +#endif + /* + * Force POST flush by reading: + */ + val = ioapic->base[4]; +} + static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) { struct irq_pin_list *entry = irq_2_pin + irq; - unsigned int pin, reg; + unsigned int pin, val; + struct ioapic_data_struct *ioapic; for (;;) { pin = entry->pin; if (pin == -1) break; - reg = io_apic_read(entry->apic, 0x10 + pin*2); - reg &= ~disable; - reg |= enable; - io_apic_modify(entry->apic, 0x10 + pin*2, reg); + ioapic = ioapic_data[entry->apic]; + val = io_apic_read(ioapic, 0x10 + pin*2); + val &= ~disable; + val |= enable; + io_apic_modify(ioapic, 0x10 + pin*2, val); if (!entry->next) break; entry = irq_2_pin + entry->next; @@ -150,29 +281,17 @@ static void __modify_IO_APIC_irq (unsign } /* mask = 1 */ -static void __mask_IO_APIC_irq (unsigned int irq) +static inline void __mask_IO_APIC_irq (unsigned int irq) { __modify_IO_APIC_irq(irq, 0x00010000, 0); } /* mask = 0 */ -static void __unmask_IO_APIC_irq (unsigned int irq) +static inline void __unmask_IO_APIC_irq (unsigned int irq) { __modify_IO_APIC_irq(irq, 0, 0x00010000); } -/* mask = 1, trigger = 0 */ -static void __mask_and_edge_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); -} - -/* mask = 0, trigger = 1 */ -static void __unmask_and_level_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); -} - static void mask_IO_APIC_irq (unsigned int irq) { unsigned long flags; @@ -191,15 +310,15 @@ static void unmask_IO_APIC_irq (unsigned spin_unlock_irqrestore(&ioapic_lock, flags); } -static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) +static void clear_IO_APIC_pin(struct ioapic_data_struct *ioapic, unsigned int pin) { struct IO_APIC_route_entry entry; unsigned long flags; /* Check delivery_mode to be sure we're not clearing an SMI pin */ spin_lock_irqsave(&ioapic_lock, flags); - *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); - *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); + *(((int*)&entry) + 0) = io_apic_read(ioapic, 0x10 + 2 * pin); + *(((int*)&entry) + 1) = io_apic_read(ioapic, 0x11 + 2 * pin); spin_unlock_irqrestore(&ioapic_lock, flags); if (entry.delivery_mode == dest_SMI) return; @@ -210,8 +329,8 @@ static void clear_IO_APIC_pin(unsigned i memset(&entry, 0, sizeof(entry)); entry.mask = 1; spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); + io_apic_write(ioapic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); + io_apic_write(ioapic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -219,9 +338,14 @@ static void clear_IO_APIC (void) { int apic, pin; - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - clear_IO_APIC_pin(apic, pin); + for (apic = 0; apic < nr_ioapics; apic++) { + struct ioapic_data_struct *ioapic = ioapic_data[apic]; +#ifdef IOAPIC_CACHE + ioapic->reg_set = -1; +#endif + for (pin = 0; pin < ioapic->nr_registers; pin++) + clear_IO_APIC_pin(ioapic, pin); + } } #ifdef CONFIG_SMP @@ -247,7 +371,7 @@ static void set_ioapic_affinity_irq(unsi pin = entry->pin; if (pin == -1) break; - io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); + io_apic_write(ioapic_data[entry->apic], 0x10 + 1 + pin*2, apicid_value); if (!entry->next) break; entry = irq_2_pin + entry->next; @@ -819,7 +943,7 @@ void __init setup_ioapic_dest(void) return; for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { - for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { + for (pin = 0; pin < ioapic_data[ioapic]->nr_registers; pin++) { irq_entry = find_irq_entry(ioapic, pin, mp_INT); if (irq_entry == -1) continue; @@ -1063,7 +1187,7 @@ static int pin_2_irq(int idx, int apic, */ i = irq = 0; while (i < apic) - irq += nr_ioapic_registers[i++]; + irq += ioapic_data[i++]->nr_registers; irq += pin; /* @@ -1106,7 +1230,7 @@ static inline int IO_APIC_irq_trigger(in int apic, idx, pin; for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + for (pin = 0; pin < ioapic_data[apic]->nr_registers; pin++) { idx = find_irq_entry(apic,pin,mp_INT); if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) return irq_trigger(idx); @@ -1178,11 +1302,13 @@ static void __init setup_IO_APIC_irqs(vo struct IO_APIC_route_entry entry; int apic, pin, idx, irq, first_notcon = 1, vector; unsigned long flags; + struct ioapic_data_struct *ioapic; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + ioapic = ioapic_data[apic]; + for (pin = 0; pin < ioapic->nr_registers; pin++) { /* * add it to the IO-APIC irq-routing table: @@ -1239,8 +1365,8 @@ static void __init setup_IO_APIC_irqs(vo disable_8259A_irq(irq); } spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -1287,8 +1413,8 @@ static void __init setup_ExtINT_IRQ0_pin * Add it to the IO-APIC irq-routing table: */ spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); + io_apic_write(ioapic_data[0], 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(ioapic_data[0], 0x10+2*pin, *(((int *)&entry)+0)); spin_unlock_irqrestore(&ioapic_lock, flags); enable_8259A_irq(0); @@ -1298,7 +1424,7 @@ static inline void UNEXPECTED_IO_APIC(vo { } -void __init print_IO_APIC(void) +void /*__init*/ print_IO_APIC(void) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1306,6 +1432,7 @@ void __init print_IO_APIC(void) union IO_APIC_reg_02 reg_02; union IO_APIC_reg_03 reg_03; unsigned long flags; + struct ioapic_data_struct *ioapic; if (apic_verbosity == APIC_QUIET) return; @@ -1313,7 +1440,7 @@ void __init print_IO_APIC(void) printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); + mp_ioapics[i].mpc_apicid, ioapic_data[i]->nr_registers); /* * We are a bit conservative about what we expect. We have to @@ -1322,14 +1449,14 @@ void __init print_IO_APIC(void) printk(KERN_INFO "testing the IO APIC.......................\n"); for (apic = 0; apic < nr_ioapics; apic++) { - + ioapic = ioapic_data[apic]; spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - reg_01.raw = io_apic_read(apic, 1); + reg_00.raw = io_apic_read(ioapic, 0); + reg_01.raw = io_apic_read(ioapic, 1); if (reg_01.bits.version >= 0x10) - reg_02.raw = io_apic_read(apic, 2); + reg_02.raw = io_apic_read(ioapic, 2); if (reg_01.bits.version >= 0x20) - reg_03.raw = io_apic_read(apic, 3); + reg_03.raw = io_apic_read(ioapic, 3); spin_unlock_irqrestore(&ioapic_lock, flags); printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); @@ -1400,8 +1527,8 @@ void __init print_IO_APIC(void) struct IO_APIC_route_entry entry; spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); + *(((int *)&entry)+0) = raw_io_apic_read(ioapic, 0x10+i*2); + *(((int *)&entry)+1) = raw_io_apic_read(ioapic, 0x11+i*2); spin_unlock_irqrestore(&ioapic_lock, flags); printk(KERN_DEBUG " %02x %03X %02X ", @@ -1447,7 +1574,7 @@ void __init print_IO_APIC(void) return; } -#if 0 +#if 1 static void print_APIC_bitfield (int base) { @@ -1594,9 +1721,7 @@ void /*__init*/ print_PIC(void) static void __init enable_IO_APIC(void) { - union IO_APIC_reg_01 reg_01; int i; - unsigned long flags; for (i = 0; i < PIN_MAP_SIZE; i++) { irq_2_pin[i].pin = -1; @@ -1607,16 +1732,6 @@ static void __init enable_IO_APIC(void) pirq_entries[i] = -1; /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (i = 0; i < nr_ioapics; i++) { - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(i, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[i] = reg_01.bits.entries+1; - } - - /* * Do not trust the IO-APIC being empty at bootup */ clear_IO_APIC(); @@ -1659,8 +1774,7 @@ void disable_IO_APIC(void) * Add it to the IO-APIC irq-routing table: */ spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); + io_apic_write(ioapic_data[0], 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic_data[0], 0x10+2*pin, *(((int *)&entry)+0)); spin_unlock_irqrestore(&ioapic_lock, flags); } disconnect_bsp_APIC(pin != -1); @@ -1682,6 +1796,7 @@ static void __init setup_ioapic_ids_from int i; unsigned char old_id; unsigned long flags; + struct ioapic_data_struct *ioapic; /* * Don't check I/O APIC IDs for xAPIC systems. They have @@ -1699,10 +1814,10 @@ static void __init setup_ioapic_ids_from * Set the IOAPIC ID to the value stored in the MPC table. */ for (apic = 0; apic < nr_ioapics; apic++) { - + ioapic = ioapic_data[apic]; /* Read the register 0 value */ spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); + reg_00.raw = io_apic_read(ioapic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); old_id = mp_ioapics[apic].mpc_apicid; @@ -1763,14 +1878,14 @@ static void __init setup_ioapic_ids_from reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0, reg_00.raw); + io_apic_write(ioapic, 0, reg_00.raw); spin_unlock_irqrestore(&ioapic_lock, flags); /* * Sanity check */ spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); + reg_00.raw = io_apic_read(ioapic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) printk("could not set ID!\n"); @@ -1794,7 +1909,7 @@ static int __init timer_irq_works(void) { unsigned long t1 = jiffies; - local_irq_enable(); + raw_local_irq_enable(); /* Let ten ticks pass... */ mdelay((10 * 1000) / HZ); @@ -1805,7 +1920,7 @@ static int __init timer_irq_works(void) * might have cached one ExtINT interrupt. Finally, at * least one tick may be lost due to delays. */ - if (jiffies - t1 > 4) + if (jiffies - t1 > 4 && jiffies - t1 < 16) return 1; return 0; @@ -1858,9 +1973,11 @@ static unsigned int startup_edge_ioapic_ static void ack_edge_ioapic_irq(unsigned int irq) { move_irq(irq); +#if 0 if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED)) mask_IO_APIC_irq(irq); +#endif ack_APIC_irq(); } @@ -1885,6 +2002,30 @@ static unsigned int startup_level_ioapic return 0; /* don't check for pending */ } +#ifdef CONFIG_PREEMPT_HARDIRQS + +/* + * in the PREEMPT_HARDIRQS case we dont want to keep the local + * APIC unacked, because the prevents further interrupts from + * being handled - and with IRQ threads being delayed arbitrarily, + * that's unacceptable. So we first mask the IRQ, then ack it. + * The hardirq thread will then unmask it. + */ +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ + move_irq(irq); + mask_IO_APIC_irq(irq); + ack_APIC_irq(); +} + +#else + +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ +} + +#endif + static void end_level_ioapic_irq (unsigned int irq) { unsigned long v; @@ -1919,8 +2060,10 @@ static void end_level_ioapic_irq (unsign if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); + /* mask = 1, trigger = 0 */ + __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); + /* mask = 0, trigger = 1 */ + __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); spin_unlock(&ioapic_lock); } } @@ -1948,6 +2091,13 @@ static unsigned int startup_level_ioapic return startup_level_ioapic_irq (irq); } +static void mask_and_ack_level_ioapic_vector (unsigned int vector) +{ + int irq = vector_to_irq(vector); + + mask_and_ack_level_ioapic_irq(irq); +} + static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); @@ -2111,22 +2261,23 @@ static void setup_nmi (void) * cycles as some i82489DX-based boards have glue logic that keeps the * 8259A interrupt line asserted until INTA. --macro */ -static inline void unlock_ExtINT_logic(void) +static void __init unlock_ExtINT_logic(void) { int pin, i; struct IO_APIC_route_entry entry0, entry1; unsigned char save_control, save_freq_select; unsigned long flags; + struct ioapic_data_struct *ioapic0 = ioapic_data[0]; pin = find_isa_irq_pin(8, mp_INT); if (pin == -1) return; spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin); - *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin); + *(((int *)&entry0) + 1) = io_apic_read(ioapic0, 0x11 + 2 * pin); + *(((int *)&entry0) + 0) = io_apic_read(ioapic0, 0x10 + 2 * pin); spin_unlock_irqrestore(&ioapic_lock, flags); - clear_IO_APIC_pin(0, pin); + clear_IO_APIC_pin(ioapic0, pin); memset(&entry1, 0, sizeof(entry1)); @@ -2139,8 +2290,8 @@ static inline void unlock_ExtINT_logic(v entry1.vector = 0; spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); - io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); + io_apic_write(ioapic0, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); + io_apic_write(ioapic0, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); spin_unlock_irqrestore(&ioapic_lock, flags); save_control = CMOS_READ(RTC_CONTROL); @@ -2158,11 +2309,11 @@ static inline void unlock_ExtINT_logic(v CMOS_WRITE(save_control, RTC_CONTROL); CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - clear_IO_APIC_pin(0, pin); + clear_IO_APIC_pin(ioapic0, pin); spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); - io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); + io_apic_write(ioapic0, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); + io_apic_write(ioapic0, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -2172,10 +2323,11 @@ static inline void unlock_ExtINT_logic(v * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board. */ -static inline void check_timer(void) +static void __init check_timer(void) { int pin1, pin2; int vector; + struct ioapic_data_struct *ioapic0 = ioapic_data[0]; /* * get/set the timer IRQ vector: @@ -2193,7 +2345,10 @@ static inline void check_timer(void) */ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - timer_ack = 1; +#ifdef CONFIG_PREEMPT_RT + if (nmi_watchdog) +#endif + timer_ack = 1; enable_8259A_irq(0); pin1 = find_isa_irq_pin(0, mp_INT); @@ -2216,7 +2371,7 @@ static inline void check_timer(void) clear_IO_APIC_pin(0, pin1); return; } - clear_IO_APIC_pin(0, pin1); + clear_IO_APIC_pin(ioapic0, pin1); printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); } @@ -2241,7 +2396,7 @@ static inline void check_timer(void) /* * Cleanup, just in case ... */ - clear_IO_APIC_pin(0, pin2); + clear_IO_APIC_pin(ioapic0, pin2); } printk(" failed.\n"); @@ -2282,6 +2437,46 @@ static inline void check_timer(void) "report. Then try booting with the 'noapic' option"); } +void __init setup_IO_APIC_early(int _ioapic) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + int size, nr_ioapic_registers; + volatile int *ioapic; + if (ioapic_data[_ioapic]) { + printk("been in %s before !!!!!\n", __FUNCTION__); + return; + } + + set_fixmap_nocache(FIX_IO_APIC_BASE_0 + _ioapic, mp_ioapics[_ioapic].mpc_apicaddr); + printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(FIX_IO_APIC_BASE_0 + _ioapic), mp_ioapics[_ioapic].mpc_apicaddr); + /* + * The number of IO-APIC IRQ registers (== #pins): + */ + ioapic = IO_APIC_BASE(_ioapic); + spin_lock_irqsave(&ioapic_lock, flags); + ioapic[0] = 1; + reg_01.raw = ioapic[4]; + spin_unlock_irqrestore(&ioapic_lock, flags); + nr_ioapic_registers = reg_01.bits.entries+1; + + /* + * Initialsize ioapic_data struct: + */ + size = sizeof(struct ioapic_data_struct); +#ifdef IOAPIC_CACHE + size += 0x10 * sizeof(u32) + nr_ioapic_registers * sizeof(struct IO_APIC_route_entry); +#endif + ioapic_data[_ioapic] = alloc_bootmem(size); + memset(ioapic_data[_ioapic], 0, size); + ioapic_data[_ioapic]->nr_registers = nr_ioapic_registers; + ioapic_data[_ioapic]->base = ioapic; +#ifdef IOAPIC_CACHE + ioapic_cache_init(ioapic_data[_ioapic]); +#endif +} + /* * * IRQ's that are handled by the PIC in the MPS IOAPIC case. @@ -2329,25 +2524,22 @@ static int __init io_apic_bug_finalize(v late_initcall(io_apic_bug_finalize); -struct sysfs_ioapic_data { - struct sys_device dev; - struct IO_APIC_route_entry entry[0]; -}; -static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; - static int ioapic_suspend(struct sys_device *dev, pm_message_t state) { struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; + struct ioapic_data_struct *data; unsigned long flags; int i; + struct ioapic_data_struct *ioapic; - data = container_of(dev, struct sysfs_ioapic_data, dev); + data = container_of(dev, struct ioapic_data_struct, dev); entry = data->entry; + + ioapic = ioapic_data[dev->id]; spin_lock_irqsave(&ioapic_lock, flags); - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { - *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i); - *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i); + for (i = 0; i < ioapic_data[dev->id]->nr_registers; i ++, entry ++) { + *(((int *)entry) + 1) = io_apic_read(ioapic, 0x11 + 2 * i); + *(((int *)entry) + 0) = io_apic_read(ioapic, 0x10 + 2 * i); } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2357,23 +2549,25 @@ static int ioapic_suspend(struct sys_dev static int ioapic_resume(struct sys_device *dev) { struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; + struct ioapic_data_struct *data; unsigned long flags; union IO_APIC_reg_00 reg_00; int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); + struct ioapic_data_struct *ioapic; + + data = container_of(dev, struct ioapic_data_struct, dev); entry = data->entry; + ioapic = ioapic_data[dev->id]; spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(dev->id, 0); + reg_00.raw = io_apic_read(ioapic, 0); if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; - io_apic_write(dev->id, 0, reg_00.raw); + io_apic_write(ioapic, 0, reg_00.raw); } - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { - io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1)); - io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0)); + for (i = 0; i < ioapic_data[dev->id]->nr_registers; i ++, entry ++) { + io_apic_write(ioapic, 0x11+2*i, *(((int *)entry)+1)); + io_apic_write(ioapic, 0x10+2*i, *(((int *)entry)+0)); } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2396,21 +2590,20 @@ static int __init ioapic_init_sysfs(void return error; for (i = 0; i < nr_ioapics; i++ ) { - size = sizeof(struct sys_device) + nr_ioapic_registers[i] - * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); - if (!mp_ioapic_data[i]) { + size = ioapic_data[i]->nr_registers * sizeof(struct IO_APIC_route_entry); + ioapic_data[i]->entry = kmalloc(size, GFP_KERNEL); + if (!ioapic_data[i]->entry) { printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); continue; } - memset(mp_ioapic_data[i], 0, size); - dev = &mp_ioapic_data[i]->dev; + memset(ioapic_data[i]->entry, 0, size); + dev = &ioapic_data[i]->dev; dev->id = i; dev->cls = &ioapic_sysdev_class; error = sysdev_register(dev); if (error) { - kfree(mp_ioapic_data[i]); - mp_ioapic_data[i] = NULL; + kfree(ioapic_data[i]->entry); + ioapic_data[i]->entry = NULL; printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); continue; } @@ -2427,13 +2620,14 @@ device_initcall(ioapic_init_sysfs); #ifdef CONFIG_ACPI -int __init io_apic_get_unique_id (int ioapic, int apic_id) +int __init io_apic_get_unique_id (int apic, int apic_id) { union IO_APIC_reg_00 reg_00; static physid_mask_t apic_id_map = PHYSID_MASK_NONE; physid_mask_t tmp; unsigned long flags; int i = 0; + struct ioapic_data_struct *ioapic = ioapic_data[apic]; /* * The P4 platform supports up to 256 APIC IDs on two separate APIC @@ -2453,7 +2647,7 @@ int __init io_apic_get_unique_id (int io if (apic_id >= get_physical_broadcast()) { printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " - "%d\n", ioapic, apic_id, reg_00.bits.ID); + "%d\n", apic, apic_id, reg_00.bits.ID); apic_id = reg_00.bits.ID; } @@ -2472,7 +2666,7 @@ int __init io_apic_get_unique_id (int io panic("Max apic_id exceeded!\n"); printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " - "trying %d\n", ioapic, apic_id, i); + "trying %d\n", apic, apic_id, i); apic_id = i; } @@ -2490,50 +2684,50 @@ int __init io_apic_get_unique_id (int io /* Sanity check */ if (reg_00.bits.ID != apic_id) - panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic); + panic("IOAPIC[%d]: Unable change apic_id!\n", apic); } apic_printk(APIC_VERBOSE, KERN_INFO - "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); + "IOAPIC[%d]: Assigned apic_id %d\n", apic, apic_id); return apic_id; } -int __init io_apic_get_version (int ioapic) +int __init io_apic_get_version (int apic) { union IO_APIC_reg_01 reg_01; unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); + reg_01.raw = io_apic_read(ioapic_data[apic], 1); spin_unlock_irqrestore(&ioapic_lock, flags); return reg_01.bits.version; } -int __init io_apic_get_redir_entries (int ioapic) +int __init io_apic_get_redir_entries (int apic) { union IO_APIC_reg_01 reg_01; unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); + reg_01.raw = io_apic_read(ioapic_data[apic], 1); spin_unlock_irqrestore(&ioapic_lock, flags); return reg_01.bits.entries; } -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) +int io_apic_set_pci_routing (int apic, int pin, int irq, int edge_level, int active_high_low) { struct IO_APIC_route_entry entry; unsigned long flags; - + struct ioapic_data_struct *ioapic = ioapic_data[apic]; if (!IO_APIC_IRQ(irq)) { printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", - ioapic); + apic); return -EINVAL; } @@ -2556,18 +2750,18 @@ int io_apic_set_pci_routing (int ioapic, * IRQs < 16 are already in the irq_2_pin[] map */ if (irq >= 16) - add_pin_to_irq(irq, ioapic, pin); + add_pin_to_irq(irq, apic, pin); entry.vector = assign_irq_vector(irq); apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " - "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, - mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, + "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", apic, + mp_ioapics[apic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low); ioapic_register_intr(irq, entry.vector, edge_level); - if (!ioapic && (irq < 16)) + if (!apic && (irq < 16)) disable_8259A_irq(irq); spin_lock_irqsave(&ioapic_lock, flags); Index: linux/arch/i386/kernel/irq.c =================================================================== --- linux.orig/arch/i386/kernel/irq.c +++ linux/arch/i386/kernel/irq.c @@ -51,7 +51,7 @@ static union irq_ctx *softirq_ctx[NR_CPU * SMP cross-CPU interrupts have their own specific * handlers). */ -fastcall unsigned int do_IRQ(struct pt_regs *regs) +fastcall notrace unsigned int do_IRQ(struct pt_regs *regs) { /* high bits used in ret_from_ code */ int irq = regs->orig_eax & 0xff; @@ -59,8 +59,12 @@ fastcall unsigned int do_IRQ(struct pt_r union irq_ctx *curctx, *irqctx; u32 *isp; #endif - irq_enter(); +#ifdef CONFIG_LATENCY_TRACE + if (irq == trace_user_trigger_irq) + user_trace_start(); +#endif + trace_special(regs->eip, irq, 0); #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 1KB free? */ { @@ -69,7 +73,7 @@ fastcall unsigned int do_IRQ(struct pt_r __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (THREAD_SIZE - 1)); if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { - printk("do_IRQ: stack overflow: %ld\n", + printk("BUG: do_IRQ: stack overflow: %ld\n", esp - sizeof(struct thread_info)); dump_stack(); } @@ -173,7 +177,7 @@ asmlinkage void do_softirq(void) if (in_interrupt()) return; - local_irq_save(flags); + raw_local_irq_save(flags); if (local_softirq_pending()) { curctx = current_thread_info(); @@ -194,7 +198,7 @@ asmlinkage void do_softirq(void) ); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(do_softirq); @@ -224,8 +228,10 @@ int show_interrupts(struct seq_file *p, } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); - action = irq_desc[i].action; + irq_desc_t *desc = irq_desc + i; + + spin_lock_irqsave(&desc->lock, flags); + action = desc->action; if (!action) goto skip; seq_printf(p, "%3d: ",i); @@ -235,15 +241,27 @@ int show_interrupts(struct seq_file *p, for_each_cpu(j) seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif - seq_printf(p, " %14s", irq_desc[i].handler->typename); + seq_printf(p, " %-14s", desc->handler->typename); +#define F(x,c) ((desc->status & x) ? c : '.') + seq_printf(p, " [%c%c%c%c%c%c%c%c%c/", + F(IRQ_INPROGRESS, 'I'), + F(IRQ_DISABLED, 'D'), + F(IRQ_PENDING, 'P'), + F(IRQ_REPLAY, 'R'), + F(IRQ_AUTODETECT, 'A'), + F(IRQ_WAITING, 'W'), + F(IRQ_LEVEL, 'L'), + F(IRQ_MASKED, 'M'), + F(IRQ_NODELAY, 'N')); +#undef F + seq_printf(p, "%3d]", desc->irqs_unhandled); seq_printf(p, " %s", action->name); - for (action=action->next; action; action = action->next) seq_printf(p, ", %s", action->name); seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + spin_unlock_irqrestore(&desc->lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); for_each_cpu(j) @@ -298,9 +316,9 @@ void fixup_irqs(cpumask_t map) barrier(); #else /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); + raw_local_irq_enable(); mdelay(1); - local_irq_disable(); + raw_local_irq_disable(); #endif } #endif Index: linux/arch/i386/kernel/mca.c =================================================================== --- linux.orig/arch/i386/kernel/mca.c +++ linux/arch/i386/kernel/mca.c @@ -472,3 +472,22 @@ void mca_handle_nmi(void) mca_nmi_hook(); } /* mca_handle_nmi */ + +void mca_timer_ack(void *priv) +{ + int irq; + + if (MCA_bus) { + /* The PS/2 uses level-triggered interrupts. You can't + turn them off, nor would you want to (any attempt to + enable edge-triggered interrupts usually gets intercepted by a + special hardware circuit). Hence we have to acknowledge + the timer interrupt. Through some incredibly stupid + design idea, the reset for IRQ 0 is done by setting the + high bit of the PPI port B (0x61). Note that some PS/2s, + notably the 55SX, work fine if this is removed. */ + + irq = inb_p( 0x61 ); /* read the current state */ + outb_p( irq|0x80, 0x61 ); /* reset the IRQ */ + } +} Index: linux/arch/i386/kernel/mcount-wrapper.S =================================================================== --- /dev/null +++ linux/arch/i386/kernel/mcount-wrapper.S @@ -0,0 +1,27 @@ +/* + * linux/arch/i386/mcount-wrapper.S + * + * Copyright (C) 2004 Ingo Molnar + */ + +.globl mcount +mcount: + + cmpl $0, mcount_enabled + jz out + + push %ebp + mov %esp, %ebp + pushl %eax + pushl %ecx + pushl %edx + + call __mcount + + popl %edx + popl %ecx + popl %eax + popl %ebp +out: + ret + Index: linux/arch/i386/kernel/microcode.c =================================================================== --- linux.orig/arch/i386/kernel/microcode.c +++ linux/arch/i386/kernel/microcode.c @@ -109,7 +109,7 @@ MODULE_LICENSE("GPL"); #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) /* serialize access to the physical write to MSR 0x79 */ -static DEFINE_SPINLOCK(microcode_update_lock); +static DEFINE_RAW_SPINLOCK(microcode_update_lock); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ static DECLARE_MUTEX(microcode_sem); Index: linux/arch/i386/kernel/mpparse.c =================================================================== --- linux.orig/arch/i386/kernel/mpparse.c +++ linux/arch/i386/kernel/mpparse.c @@ -271,6 +271,7 @@ static void __init MP_ioapic_info (struc return; } mp_ioapics[nr_ioapics] = *m; + setup_IO_APIC_early(nr_ioapics); nr_ioapics++; } @@ -919,7 +920,7 @@ void __init mp_register_ioapic ( mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; mp_ioapics[idx].mpc_apicaddr = address; - set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); + setup_IO_APIC_early(idx); if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15)) mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); else Index: linux/arch/i386/kernel/nmi.c =================================================================== --- linux.orig/arch/i386/kernel/nmi.c +++ linux/arch/i386/kernel/nmi.c @@ -34,7 +34,7 @@ unsigned int nmi_watchdog = NMI_NONE; extern int unknown_nmi_panic; -static unsigned int nmi_hz = HZ; +static unsigned int nmi_hz = 1000; static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ static unsigned int nmi_p4_cccr_val; extern void show_registers(struct pt_regs *regs); @@ -112,8 +112,8 @@ static int __init check_nmi_watchdog(voi for (cpu = 0; cpu < NR_CPUS; cpu++) prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; - local_irq_enable(); - mdelay((10*1000)/nmi_hz); // wait 10 ticks + raw_local_irq_enable(); + mdelay((100*1000)/nmi_hz); // wait 100 ticks for (cpu = 0; cpu < NR_CPUS; cpu++) { #ifdef CONFIG_SMP @@ -134,7 +134,7 @@ static int __init check_nmi_watchdog(voi /* now that we know it works we can reduce NMI frequency to something more reasonable; makes a difference in some configs */ if (nmi_watchdog == NMI_LOCAL_APIC) - nmi_hz = 1; + nmi_hz = 10000; return 0; } @@ -486,9 +486,34 @@ void touch_nmi_watchdog (void) extern void die_nmi(struct pt_regs *, const char *msg); -void nmi_watchdog_tick (struct pt_regs * regs) +int nmi_show_regs[NR_CPUS]; + +void nmi_show_all_regs(void) { + int i; + + if (nmi_watchdog == NMI_NONE) + return; + if (system_state != SYSTEM_RUNNING) { + printk("nmi_show_all_regs(): system state %d, not doing.\n", + system_state); + return; + } + printk("nmi_show_all_regs(): start at CPU#%d.\n", + raw_smp_processor_id()); + dump_stack(); + + for_each_online_cpu(i) + nmi_show_regs[i] = 1; + for_each_online_cpu(i) + while (nmi_show_regs[i] == 1) + barrier(); +} + +static DEFINE_RAW_SPINLOCK(nmi_print_lock); +void notrace nmi_watchdog_tick (struct pt_regs * regs) +{ /* * Since current_thread_info()-> is always on the stack, and we * always switch the stack NMI-atomically, it's safe to use @@ -496,7 +521,16 @@ void nmi_watchdog_tick (struct pt_regs * */ int sum, cpu = smp_processor_id(); - sum = per_cpu(irq_stat, cpu).apic_timer_irqs; + sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0); + + profile_tick(CPU_PROFILING, regs); + if (nmi_show_regs[cpu]) { + nmi_show_regs[cpu] = 0; + spin_lock(&nmi_print_lock); + printk("NMI show regs on CPU#%d:\n", cpu); + show_regs(regs); + spin_unlock(&nmi_print_lock); + } if (last_irq_sums[cpu] == sum) { /* @@ -504,12 +538,25 @@ void nmi_watchdog_tick (struct pt_regs * * wait a few IRQs (5 seconds) before doing the oops ... */ alert_counter[cpu]++; - if (alert_counter[cpu] == 5*nmi_hz) - /* - * die_nmi will return ONLY if NOTIFY_STOP happens.. - */ - die_nmi(regs, "NMI Watchdog detected LOCKUP"); + if (alert_counter[cpu] && !(alert_counter[cpu] % (5*nmi_hz))) { + int i; + + bust_spinlocks(1); + spin_lock(&nmi_print_lock); + printk("NMI watchdog detected lockup on CPU#%d (%d/%d)\n", cpu, alert_counter[cpu], 5*nmi_hz); + show_regs(regs); + spin_unlock(&nmi_print_lock); + + for_each_online_cpu(i) + if (i != cpu) + nmi_show_regs[i] = 1; + for_each_online_cpu(i) + while (nmi_show_regs[i] == 1) + barrier(); + die_nmi(regs, "NMI Watchdog detected LOCKUP"); + } + } else { last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; } Index: linux/arch/i386/kernel/process.c =================================================================== --- linux.orig/arch/i386/kernel/process.c +++ linux/arch/i386/kernel/process.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -64,6 +65,12 @@ static int hlt_counter; unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); +DEFINE_SPINLOCK(pm_idle_switch_lock); +EXPORT_SYMBOL_GPL(pm_idle_switch_lock); + +int pm_idle_locked = 0; +EXPORT_SYMBOL_GPL(pm_idle_locked); + /* * Return saved PC of a blocked thread. */ @@ -100,12 +107,13 @@ EXPORT_SYMBOL(enable_hlt); void default_idle(void) { if (!hlt_counter && boot_cpu_data.hlt_works_ok) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); + raw_local_irq_disable(); + if (!need_resched() && !need_resched_delayed()) + raw_safe_halt(); else - local_irq_enable(); + raw_local_irq_enable(); } else { + raw_local_irq_enable(); cpu_relax(); } } @@ -118,11 +126,11 @@ EXPORT_SYMBOL(default_idle); * to poll the ->work.need_resched flag instead of waiting for the * cross-CPU IPI to arrive. Use this option with caution. */ -static void poll_idle (void) +void poll_idle (void) { int oldval; - local_irq_enable(); + raw_local_irq_enable(); /* * Deal with another CPU just having chosen a thread to @@ -137,7 +145,7 @@ static void poll_idle (void) "testl %0, %1;" "rep; nop;" "je 2b;" - : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); + : : "i"(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), "m" (current_thread_info()->flags)); clear_thread_flag(TIF_POLLING_NRFLAG); } else { @@ -160,7 +168,7 @@ static inline void play_dead(void) /* * With physical CPU hotplug, we should halt the cpu */ - local_irq_disable(); + raw_local_irq_disable(); while (1) halt(); } @@ -183,7 +191,9 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - while (!need_resched()) { + BUG_ON(raw_irqs_disabled()); + + while (!need_resched() && !need_resched_delayed()) { void (*idle)(void); if (__get_cpu_var(cpu_idle_state)) @@ -199,9 +209,13 @@ void cpu_idle(void) play_dead(); __get_cpu_var(irq_stat).idle_timestamp = jiffies; + stop_critical_timing(); + propagate_preempt_locks_value(); idle(); } - schedule(); + raw_local_irq_disable(); + __schedule(); + raw_local_irq_enable(); } } @@ -242,16 +256,16 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); */ static void mwait_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { set_thread_flag(TIF_POLLING_NRFLAG); do { __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (need_resched()) + if (need_resched() || need_resched_delayed()) break; __mwait(0, 0); - } while (!need_resched()); + } while (!need_resched() && !need_resched_delayed()); clear_thread_flag(TIF_POLLING_NRFLAG); } } @@ -378,11 +392,16 @@ void exit_thread(void) /* The process may have allocated an io port bitmap... nuke it. */ if (unlikely(NULL != t->io_bitmap_ptr)) { - int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + int cpu; + struct tss_struct *tss; + void *io_bitmap_ptr = t->io_bitmap_ptr; - kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; + mb(); + kfree(io_bitmap_ptr); + + cpu = get_cpu(); + tss = &per_cpu(init_tss, cpu); /* * Careful, clear this in the TSS too: */ Index: linux/arch/i386/kernel/reboot.c =================================================================== --- linux.orig/arch/i386/kernel/reboot.c +++ linux/arch/i386/kernel/reboot.c @@ -194,7 +194,7 @@ void machine_real_restart(unsigned char { unsigned long flags; - local_irq_disable(); + raw_local_irq_disable(); /* Write zero to CMOS register number 0x0f, which the BIOS POST routine will recognize as telling it to do a proper reboot. (Well Index: linux/arch/i386/kernel/semaphore.c =================================================================== --- linux.orig/arch/i386/kernel/semaphore.c +++ linux/arch/i386/kernel/semaphore.c @@ -13,6 +13,7 @@ * rw semaphores implemented November 1999 by Benjamin LaHaise */ #include +#include #include /* @@ -28,15 +29,15 @@ asm( ".section .sched.text\n" ".align 4\n" -".globl __down_failed\n" -"__down_failed:\n\t" +".globl __compat_down_failed\n" +"__compat_down_failed:\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" #endif "pushl %edx\n\t" "pushl %ecx\n\t" - "call __down\n\t" + "call __compat_down\n\t" "popl %ecx\n\t" "popl %edx\n\t" #if defined(CONFIG_FRAME_POINTER) @@ -49,15 +50,15 @@ asm( asm( ".section .sched.text\n" ".align 4\n" -".globl __down_failed_interruptible\n" -"__down_failed_interruptible:\n\t" +".globl __compat_down_failed_interruptible\n" +"__compat_down_failed_interruptible:\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" #endif "pushl %edx\n\t" "pushl %ecx\n\t" - "call __down_interruptible\n\t" + "call __compat_down_interruptible\n\t" "popl %ecx\n\t" "popl %edx\n\t" #if defined(CONFIG_FRAME_POINTER) @@ -70,15 +71,15 @@ asm( asm( ".section .sched.text\n" ".align 4\n" -".globl __down_failed_trylock\n" -"__down_failed_trylock:\n\t" +".globl __compat_down_failed_trylock\n" +"__compat_down_failed_trylock:\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" #endif "pushl %edx\n\t" "pushl %ecx\n\t" - "call __down_trylock\n\t" + "call __compat_down_trylock\n\t" "popl %ecx\n\t" "popl %edx\n\t" #if defined(CONFIG_FRAME_POINTER) @@ -91,45 +92,13 @@ asm( asm( ".section .sched.text\n" ".align 4\n" -".globl __up_wakeup\n" -"__up_wakeup:\n\t" +".globl __compat_up_wakeup\n" +"__compat_up_wakeup:\n\t" "pushl %edx\n\t" "pushl %ecx\n\t" - "call __up\n\t" + "call __compat_up\n\t" "popl %ecx\n\t" "popl %edx\n\t" "ret" ); -/* - * rw spinlock fallbacks - */ -#if defined(CONFIG_SMP) -asm( -".section .sched.text\n" -".align 4\n" -".globl __write_lock_failed\n" -"__write_lock_failed:\n\t" - LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" -"1: rep; nop\n\t" - "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" - "jne 1b\n\t" - LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" - "jnz __write_lock_failed\n\t" - "ret" -); - -asm( -".section .sched.text\n" -".align 4\n" -".globl __read_lock_failed\n" -"__read_lock_failed:\n\t" - LOCK "incl (%eax)\n" -"1: rep; nop\n\t" - "cmpl $1,(%eax)\n\t" - "js 1b\n\t" - LOCK "decl (%eax)\n\t" - "js __read_lock_failed\n\t" - "ret" -); -#endif Index: linux/arch/i386/kernel/setup.c =================================================================== --- linux.orig/arch/i386/kernel/setup.c +++ linux/arch/i386/kernel/setup.c @@ -1612,6 +1612,7 @@ void __init setup_arch(char **cmdline_p) conswitchp = &dummy_con; #endif #endif + tsc_init(); } #include "setup_arch_post.h" Index: linux/arch/i386/kernel/signal.c =================================================================== --- linux.orig/arch/i386/kernel/signal.c +++ linux/arch/i386/kernel/signal.c @@ -604,6 +604,13 @@ int fastcall do_signal(struct pt_regs *r int signr; struct k_sigaction ka; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + raw_local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which * is why we may in certain cases get here from Index: linux/arch/i386/kernel/smp.c =================================================================== --- linux.orig/arch/i386/kernel/smp.c +++ linux/arch/i386/kernel/smp.c @@ -163,7 +163,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu unsigned long cfg; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); /* * Wait for idle. @@ -186,7 +186,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu */ apic_write_around(APIC_ICR, cfg); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void send_IPI_mask_sequence(cpumask_t mask, int vector) @@ -200,7 +200,7 @@ void send_IPI_mask_sequence(cpumask_t ma * should be modified to do 1 message per cluster ID - mbligh */ - local_irq_save(flags); + raw_local_irq_save(flags); for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) { if (cpu_isset(query_cpu, mask)) { @@ -227,7 +227,7 @@ void send_IPI_mask_sequence(cpumask_t ma apic_write_around(APIC_ICR, cfg); } } - local_irq_restore(flags); + raw_local_irq_restore(flags); } #include /* must come after the send_IPI functions above for inlining */ @@ -245,7 +245,7 @@ void send_IPI_mask_sequence(cpumask_t ma static cpumask_t flush_cpumask; static struct mm_struct * flush_mm; static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); +static DEFINE_RAW_SPINLOCK(tlbstate_lock); #define FLUSH_ALL 0xffffffff /* @@ -390,7 +390,7 @@ static void flush_tlb_others(cpumask_t c while (!cpus_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ - mb(); + cpu_relax(); flush_mm = NULL; flush_va = 0; @@ -481,10 +481,20 @@ void smp_send_reschedule(int cpu) } /* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + send_IPI_allbutself(RESCHEDULE_VECTOR); +} + +/* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); struct call_data_struct { void (*func) (void *info); @@ -538,7 +548,7 @@ int smp_call_function (void (*func) (voi } /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); + WARN_ON(raw_irqs_disabled()); data.func = func; data.info = info; @@ -572,7 +582,7 @@ static void stop_this_cpu (void * dummy) * Remove this CPU: */ cpu_clear(smp_processor_id(), cpu_online_map); - local_irq_disable(); + raw_local_irq_disable(); disable_local_APIC(); if (cpu_data[smp_processor_id()].hlt_works_ok) for(;;) halt(); @@ -587,19 +597,20 @@ void smp_send_stop(void) { smp_call_function(stop_this_cpu, NULL, 1, 0); - local_irq_disable(); + raw_local_irq_disable(); disable_local_APIC(); - local_irq_enable(); + raw_local_irq_enable(); } /* - * Reschedule call back. Nothing to do, - * all the work is done automatically when - * we return from the interrupt. + * Reschedule call back. Trigger a reschedule pass so that + * RT-overload balancing can pass tasks around. */ -fastcall void smp_reschedule_interrupt(struct pt_regs *regs) +fastcall notrace void smp_reschedule_interrupt(struct pt_regs *regs) { + trace_special(regs->eip, 0, 0); ack_APIC_irq(); + set_tsk_need_resched(current); } fastcall void smp_call_function_interrupt(struct pt_regs *regs) Index: linux/arch/i386/kernel/smpboot.c =================================================================== --- linux.orig/arch/i386/kernel/smpboot.c +++ linux/arch/i386/kernel/smpboot.c @@ -208,142 +208,299 @@ valid_k7: ; } -/* - * TSC synchronization. - * - * We first check whether all CPUs have their TSC's synchronized, - * then we print a warning if not, and always resync. - */ +static atomic_t tsc_start_flag, tsc_check_start, tsc_check_stop; -static atomic_t tsc_start_flag = ATOMIC_INIT(0); -static atomic_t tsc_count_start = ATOMIC_INIT(0); -static atomic_t tsc_count_stop = ATOMIC_INIT(0); -static unsigned long long tsc_values[NR_CPUS]; - -#define NR_LOOPS 5 - -static void __init synchronize_tsc_bp (void) +static int __init check_tsc_warp(void) { - int i; - unsigned long long t0; - unsigned long long sum, avg; - long long delta; - unsigned int one_usec; - int buggy = 0; - - printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); - - /* convert from kcyc/sec to cyc/usec */ - one_usec = cpu_khz / 1000; + static DEFINE_RAW_SPINLOCK(warp_lock); + static long long prev; + static unsigned int error; - atomic_set(&tsc_start_flag, 1); - wmb(); + int cpus = num_booting_cpus(), nr = 0; + long long start, now, end, delta; + atomic_inc(&tsc_check_start); + while (atomic_read(&tsc_check_start) != cpus) + cpu_relax(); /* - * We loop a few times to get a primed instruction cache, - * then the last pass is more or less synchronized and - * the BP and APs set their cycle counters to zero all at - * once. This reduces the chance of having random offsets - * between the processors, and guarantees that the maximum - * delay between the cycle counters is never bigger than - * the latency of information-passing (cachelines) between - * two CPUs. + * Run the check for 500 msecs: */ - for (i = 0; i < NR_LOOPS; i++) { - /* - * all APs synchronize but they loop on '== num_cpus' - */ - while (atomic_read(&tsc_count_start) != num_booting_cpus()-1) - mb(); - atomic_set(&tsc_count_stop, 0); - wmb(); - /* - * this lets the APs save their current TSC: - */ - atomic_inc(&tsc_count_start); + rdtscll(start); + end = start + cpu_khz*500; - rdtscll(tsc_values[smp_processor_id()]); + for (;;) { /* - * We clear the TSC in the last loop: + * Check for the TSC going backwards (between CPUs): */ - if (i == NR_LOOPS-1) - write_tsc(0, 0); + spin_lock(&warp_lock); + rdtscll(now); + delta = now - prev; + prev = now; + spin_unlock(&warp_lock); + if (unlikely(delta < 0)) + error = 1; + if (now > end) + break; /* - * Wait for all APs to leave the synchronization point: + * Take it easy every couple of iterations, + * to not starve other CPUs: */ - while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1) - mb(); - atomic_set(&tsc_count_start, 0); - wmb(); - atomic_inc(&tsc_count_stop); + nr++; + if (!(nr % 31)) + cpu_relax(); } - sum = 0; - for (i = 0; i < NR_CPUS; i++) { - if (cpu_isset(i, cpu_callout_map)) { - t0 = tsc_values[i]; - sum += t0; - } - } - avg = sum; - do_div(avg, num_booting_cpus()); + atomic_inc(&tsc_check_stop); + while (atomic_read(&tsc_check_stop) != cpus) + cpu_relax(); - sum = 0; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - delta = tsc_values[i] - avg; - if (delta < 0) - delta = -delta; - /* - * We report bigger than 2 microseconds clock differences. - */ - if (delta > 2*one_usec) { - long realdelta; - if (!buggy) { - buggy = 1; - printk("\n"); - } - realdelta = delta; - do_div(realdelta, one_usec); - if (tsc_values[i] < avg) - realdelta = -realdelta; + return error; +} - printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta); - } +/* + * TSC synchronization based on ia64 itc synchronization code. Synchronize + * pairs of processors rahter than tring to synchronize all of the processors + * with a single event. When several processors are all waiting for an + * event they don't all see it at the same time. The write will cause + * an invalidate on each processors cache and then they all scramble to + * re-read that cache line. + * + * Writing the TSC resets the upper 32-bits, so we need to be careful + * that all of the cpus can be synchronized before we overflow the + * 32-bit count. + */ - sum += delta; +#define MASTER 0 +#define SLAVE (SMP_CACHE_BYTES/sizeof(long)) + +#define NUM_ROUNDS 64 /* magic value */ +#define NUM_ITERS 5 /* likewise */ + +static volatile unsigned long go[2*SLAVE] __cacheline_aligned; +static volatile int current_slave = -1; +static volatile int tsc_sync_complete = 0; +static volatile int tsc_adj_latency = 0; +static unsigned int max_rt = 0; +static unsigned int max_delta = 0; + +#define DEBUG_TSC_SYNC 0 +#if DEBUG_TSC_SYNC +struct tsc_sync_debug { + long rt; /* roundtrip time */ + long master; /* master's timestamp */ + long diff; /* difference between midpoint and master's timestamp */ + long lat; /* estimate of tsc adjustment latency */ +} tsc_sync_debug[NUM_ROUNDS*NR_CPUS]; +#endif + +void +sync_master(void) +{ + unsigned long n, tsc, last_go_master; + + last_go_master = 0; + while (1) { + while ((n = go[MASTER]) == last_go_master) + rep_nop(); + if (n == ~0) + break; + rdtscl(tsc); + if (unlikely(!tsc)) + tsc = 1; + go[SLAVE] = tsc; + last_go_master = n; } - if (!buggy) - printk("passed.\n"); } -static void __init synchronize_tsc_ap (void) +/* + * Return the number of cycles by which our TSC differs from the TSC on + * the master (time-keeper) CPU. A positive number indicates our TSC is + * ahead of the master, negative that it is behind. + */ +static inline long +get_delta (long *rt, long *master) { - int i; + unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; + unsigned long tcenter, t0, t1, tm, last_go_slave; + long i; + + last_go_slave = go[SLAVE]; + for (i = 0; i < NUM_ITERS; ++i) { + rdtscl(t0); + go[MASTER] = i+1; + while ((tm = go[SLAVE]) == last_go_slave) + rep_nop(); + rdtscl(t1); + + if (t1 - t0 < best_t1 - best_t0) + best_t0 = t0, best_t1 = t1, best_tm = tm; + last_go_slave = tm; + } + + *rt = best_t1 - best_t0; + *master = best_tm - best_t0; + + /* average best_t0 and best_t1 without overflow: */ + tcenter = (best_t0/2 + best_t1/2); + if (best_t0 % 2 + best_t1 % 2 == 2) + ++tcenter; + return tcenter - best_tm; +} + +/* + * Synchronize TSC of the current (slave) CPU with the TSC of the MASTER CPU + * (normally the time-keeper CPU). We use a closed loop to eliminate the + * possibility of unaccounted-for errors (such as getting a machine check in + * the middle of a calibration step). The basic idea is for the slave to ask + * the master what TSC value it has and to read its own TSC before and after + * the master responds. Each iteration gives us three + * timestamps: + * + * slave master + * + * t0 ---\ + * ---\ + * ---> + * tm + * /--- + * /--- + * t1 <--- + * + * + * The goal is to adjust the slave's TSC such that tm falls exactly half-way + * between t0 and t1. If we achieve this, the clocks are synchronized provided + * the interconnect between the slave and the master is symmetric. Even if the + * interconnect were asymmetric, we would still know that the synchronization + * error is smaller than the roundtrip latency (t0 - t1). + * + * When the interconnect is quiet and symmetric, this lets us synchronize the + * TSC to within one or two cycles. However, we can only *guarantee* that the + * synchronization is accurate to within a round-trip time, which is typically + * in the range of several hundred cycles (e.g., ~500 cycles). In practice, + * this means that the TSC's are usually almost perfectly synchronized, but we + * shouldn't assume that the accuracy is much better than half a micro second + * or so. + */ + +static void __init +synchronize_tsc_ap (void) +{ + long i, delta, adj, adjust_latency, n_rounds; + unsigned long rt, master_time_stamp, tsc; +#if DEBUG_TSC_SYNC + struct tsc_sync_debug *t = + &tsc_sync_debug[smp_processor_id() * NUM_ROUNDS]; +#endif + + while (!atomic_read(&tsc_start_flag)) + mb(); + + if (!check_tsc_warp()) + return; /* - * Not every cpu is online at the time - * this gets called, so we first wait for the BP to - * finish SMP initialization: + * Wait for our turn to synchronize with the boot processor. */ - while (!atomic_read(&tsc_start_flag)) mb(); + while (current_slave != smp_processor_id()) + rep_nop(); + adjust_latency = tsc_adj_latency; + + go[SLAVE] = 0; + go[MASTER] = 0; + write_tsc(0,0); + for (i = 0; i < NUM_ROUNDS; ++i) { + delta = get_delta(&rt, &master_time_stamp); + if (delta == 0) + break; + + if (i > 0) + adjust_latency += -delta; + adj = -delta + adjust_latency/8; + rdtscl(tsc); + write_tsc(tsc + adj, 0); +#if DEBUG_TSC_SYNC + t[i].rt = rt; + t[i].master = master_time_stamp; + t[i].diff = delta; + t[i].lat = adjust_latency/8; +#endif + } + n_rounds = i; + go[MASTER] = ~0; + +#if (DEBUG_TSC_SYNC == 2) + for (i = 0; i < n_rounds; ++i) + printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", + t[i].rt, t[i].master, t[i].diff, t[i].lat); + + printk("CPU %d: synchronized TSC (last diff %ld cycles, maxerr %lu cycles)\n", + smp_processor_id(), delta, rt); + + printk("It took %ld rounds\n", n_rounds); +#endif + if (rt > max_rt) + max_rt = rt; + if (delta < 0) + delta = -delta; + if (delta > max_delta) + max_delta = delta; + tsc_adj_latency = adjust_latency; + current_slave = -1; + while (!tsc_sync_complete) + rep_nop(); +} + +/* + * The boot processor set its own TSC to zero and then gives each + * slave processor the chance to synchronize itself. + */ - for (i = 0; i < NR_LOOPS; i++) { - atomic_inc(&tsc_count_start); - while (atomic_read(&tsc_count_start) != num_booting_cpus()) - mb(); +static void __init synchronize_tsc_bp (void) +{ + unsigned int tsc_low, tsc_high, error; + int cpu; + + atomic_set(&tsc_start_flag, 1); - rdtscll(tsc_values[smp_processor_id()]); - if (i == NR_LOOPS-1) - write_tsc(0, 0); + printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", + num_booting_cpus()); - atomic_inc(&tsc_count_stop); - while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb(); + if (!check_tsc_warp()) { + printk("passed.\n"); + return; + } + printk("failed.\n"); + + printk(KERN_INFO "starting TSC synchronization\n"); + write_tsc(0, 0); + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (!cpu_isset(cpu, cpu_callout_map)) + continue; + if (cpu == smp_processor_id()) + continue; + go[MASTER] = 0; + current_slave = cpu; + sync_master(); + while (current_slave != -1) + rep_nop(); + } + rdtsc(tsc_low, tsc_high); + if (tsc_high) + printk("TSC overflowed during synchronization\n"); + else + printk("TSC synchronization complete max_delta=%d cycles\n", + max_delta); + if (max_rt < 4293) { + error = (max_rt * 1000000)/cpu_khz; + printk("TSC sync round-trip time %d.%03d microseconds\n", + error/1000, error%1000); + } else { + printk("TSC sync round-trip time %d cycles\n", max_rt); } + tsc_sync_complete = 1; } -#undef NR_LOOPS extern void calibrate_delay(void); @@ -517,7 +674,7 @@ static void __devinit start_secondary(vo per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; /* We can take interrupts now: we're officially "up". */ - local_irq_enable(); + raw_local_irq_enable(); wmb(); cpu_idle(); @@ -1305,9 +1462,9 @@ int __cpu_disable(void) /* We enable the timer again on the exit path of the death loop */ disable_APIC_timer(); /* Allow any queued timer interrupts to get serviced */ - local_irq_enable(); + raw_local_irq_enable(); mdelay(1); - local_irq_disable(); + raw_local_irq_disable(); remove_siblinginfo(cpu); @@ -1351,11 +1508,11 @@ int __devinit __cpu_up(unsigned int cpu) /* In case one didn't come up */ if (!cpu_isset(cpu, cpu_callin_map)) { printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu); - local_irq_enable(); + raw_local_irq_enable(); return -EIO; } - local_irq_enable(); + raw_local_irq_enable(); per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; /* Unleash the CPU! */ cpu_set(cpu, smp_commenced_mask); Index: linux/arch/i386/kernel/switch2poll.c =================================================================== --- /dev/null +++ linux/arch/i386/kernel/switch2poll.c @@ -0,0 +1,5 @@ +/* + * Same type of hack used for early_printk. This keeps the code + * in one place. + */ +#include "../../x86_64/kernel/switch2poll.c" Index: linux/arch/i386/kernel/time.c =================================================================== --- linux.orig/arch/i386/kernel/time.c +++ linux/arch/i386/kernel/time.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,7 @@ #include #include #include +#include #include "mach_time.h" @@ -74,25 +76,14 @@ int pit_latch_buggy; /* ext #include "do_timer.h" -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - unsigned int cpu_khz; /* Detected as we calibrate the TSC */ EXPORT_SYMBOL(cpu_khz); extern unsigned long wall_jiffies; -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); -#include - -DEFINE_SPINLOCK(i8253_lock); -EXPORT_SYMBOL(i8253_lock); - -struct timer_opts *cur_timer __read_mostly = &timer_none; - /* * This is a special lock that is owned by the CPU and holds the index * register we are working with. It is required for NMI access to the @@ -122,118 +113,25 @@ void rtc_cmos_write(unsigned char val, u } EXPORT_SYMBOL(rtc_cmos_write); -/* - * This version of gettimeofday has microsecond resolution - * and better than microsecond precision on fast x86 machines with TSC. - */ -void do_gettimeofday(struct timeval *tv) -{ - unsigned long seq; - unsigned long usec, sec; - unsigned long max_ntp_tick; - - do { - unsigned long lost; - - seq = read_seqbegin(&xtime_lock); - - usec = cur_timer->get_offset(); - lost = jiffies - wall_jiffies; - - /* - * If time_adjust is negative then NTP is slowing the clock - * so make sure not to go into next possible interval. - * Better to lose some accuracy than have time go backwards.. - */ - if (unlikely(time_adjust < 0)) { - max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj; - usec = min(usec, max_ntp_tick); - - if (lost) - usec += lost * max_ntp_tick; - } - else if (unlikely(lost)) - usec += lost * (USEC_PER_SEC / HZ); - - sec = xtime.tv_sec; - usec += (xtime.tv_nsec / 1000); - } while (read_seqretry(&xtime_lock, seq)); - - while (usec >= 1000000) { - usec -= 1000000; - sec++; - } - - tv->tv_sec = sec; - tv->tv_usec = usec; -} - -EXPORT_SYMBOL(do_gettimeofday); - -int do_settimeofday(struct timespec *tv) -{ - time_t wtm_sec, sec = tv->tv_sec; - long wtm_nsec, nsec = tv->tv_nsec; - - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) - return -EINVAL; - - write_seqlock_irq(&xtime_lock); - /* - * This is revolting. We need to set "xtime" correctly. However, the - * value in this location is the value at the most recent update of - * wall time. Discover what correction gettimeofday() would have - * made, and then undo it! - */ - nsec -= cur_timer->get_offset() * NSEC_PER_USEC; - nsec -= (jiffies - wall_jiffies) * TICK_NSEC; - - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); - - set_normalized_timespec(&xtime, sec, nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - - ntp_clear(); - write_sequnlock_irq(&xtime_lock); - clock_was_set(); - return 0; -} - -EXPORT_SYMBOL(do_settimeofday); - static int set_rtc_mmss(unsigned long nowtime) { int retval; - - WARN_ON(irqs_disabled()); + unsigned long flags; /* gets recalled with irq locally disabled */ - spin_lock_irq(&rtc_lock); + /* XXX - does irqsave resolve this? -johnstul */ + spin_lock_irqsave(&rtc_lock, flags); if (efi_enabled) retval = efi_set_rtc_mmss(nowtime); else retval = mach_set_rtc_mmss(nowtime); - spin_unlock_irq(&rtc_lock); + spin_unlock_irqrestore(&rtc_lock, flags); return retval; } - -int timer_ack; - -/* monotonic_clock(): returns # of nanoseconds passed since time_init() - * Note: This function is required to return accurate - * time even in the absence of multiple timer ticks. - */ -unsigned long long monotonic_clock(void) -{ - return cur_timer->monotonic_clock(); -} -EXPORT_SYMBOL(monotonic_clock); - #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) -unsigned long profile_pc(struct pt_regs *regs) +unsigned long notrace profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); @@ -245,70 +143,6 @@ unsigned long profile_pc(struct pt_regs EXPORT_SYMBOL(profile_pc); #endif -/* - * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick - */ -static inline void do_timer_interrupt(int irq, struct pt_regs *regs) -{ -#ifdef CONFIG_X86_IO_APIC - if (timer_ack) { - /* - * Subtle, when I/O APICs are used we have to ack timer IRQ - * manually to reset the IRR bit for do_slow_gettimeoffset(). - * This will also deassert NMI lines for the watchdog if run - * on an 82489DX-based system. - */ - spin_lock(&i8259A_lock); - outb(0x0c, PIC_MASTER_OCW3); - /* Ack the IRQ; AEOI will end it automatically. */ - inb(PIC_MASTER_POLL); - spin_unlock(&i8259A_lock); - } -#endif - - do_timer_interrupt_hook(regs); - - - if (MCA_bus) { - /* The PS/2 uses level-triggered interrupts. You can't - turn them off, nor would you want to (any attempt to - enable edge-triggered interrupts usually gets intercepted by a - special hardware circuit). Hence we have to acknowledge - the timer interrupt. Through some incredibly stupid - design idea, the reset for IRQ 0 is done by setting the - high bit of the PPI port B (0x61). Note that some PS/2s, - notably the 55SX, work fine if this is removed. */ - - irq = inb_p( 0x61 ); /* read the current state */ - outb_p( irq|0x80, 0x61 ); /* reset the IRQ */ - } -} - -/* - * This is the same as the above, except we _also_ save the current - * Time Stamp Counter value at the time of the timer interrupt, so that - * we later on can estimate the time of day more exactly. - */ -irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - /* - * Here we are in the timer irq handler. We just have irqs locally - * disabled but we don't know if the timer_bh is running on the other - * CPU. We need to avoid to SMP race with it. NOTE: we don' t need - * the irq version of write_lock because as just said we have irq - * locally disabled. -arca - */ - write_seqlock(&xtime_lock); - - cur_timer->mark_offset(); - - do_timer_interrupt(irq, regs); - - write_sequnlock(&xtime_lock); - return IRQ_HANDLED; -} - /* not static: needed by APM */ unsigned long get_cmos_time(void) { @@ -327,139 +161,42 @@ unsigned long get_cmos_time(void) } EXPORT_SYMBOL(get_cmos_time); -static void sync_cmos_clock(unsigned long dummy); - -static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); - -static void sync_cmos_clock(unsigned long dummy) +/* arch specific timeofday hooks */ +nsec_t read_persistent_clock(void) { - struct timeval now, next; - int fail = 1; + return (nsec_t)get_cmos_time() * NSEC_PER_SEC; +} +void sync_persistent_clock(struct timespec ts) +{ + static unsigned long last_rtc_update; /* * If we have an externally synchronized Linux clock, then update * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be * called as close as possible to 500 ms before the new second starts. - * This code is run on a timer. If the clock is set, that timer - * may not expire at the correct time. Thus, we adjust... */ - if (!ntp_synced()) - /* - * Not synced, exit, do not restart a timer (if one is - * running, let it run out). - */ + if (ts.tv_sec <= last_rtc_update + 660) return; - do_gettimeofday(&now); - if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 && - now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) - fail = set_rtc_mmss(now.tv_sec); - - next.tv_usec = USEC_AFTER - now.tv_usec; - if (next.tv_usec <= 0) - next.tv_usec += USEC_PER_SEC; - - if (!fail) - next.tv_sec = 659; - else - next.tv_sec = 0; - - if (next.tv_usec >= USEC_PER_SEC) { - next.tv_sec++; - next.tv_usec -= USEC_PER_SEC; + if((ts.tv_nsec / 1000) >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 && + (ts.tv_nsec / 1000) <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) { + /* horrible...FIXME */ + if (set_rtc_mmss(ts.tv_sec) == 0) + last_rtc_update = ts.tv_sec; + else + last_rtc_update = ts.tv_sec - 600; /* do it again in 60 s */ } - mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next)); -} - -void notify_arch_cmos_timer(void) -{ - mod_timer(&sync_cmos_timer, jiffies + 1); } -static long clock_cmos_diff, sleep_start; - -static struct timer_opts *last_timer; -static int timer_suspend(struct sys_device *dev, pm_message_t state) -{ - /* - * Estimate time zone so that set_time can update the clock - */ - clock_cmos_diff = -get_cmos_time(); - clock_cmos_diff += get_seconds(); - sleep_start = get_cmos_time(); - last_timer = cur_timer; - cur_timer = &timer_none; - if (last_timer->suspend) - last_timer->suspend(state); - return 0; -} - -static int timer_resume(struct sys_device *dev) -{ - unsigned long flags; - unsigned long sec; - unsigned long sleep_length; - -#ifdef CONFIG_HPET_TIMER - if (is_hpet_enabled()) - hpet_reenable(); -#endif - setup_pit_timer(); - sec = get_cmos_time() + clock_cmos_diff; - sleep_length = (get_cmos_time() - sleep_start) * HZ; - write_seqlock_irqsave(&xtime_lock, flags); - xtime.tv_sec = sec; - xtime.tv_nsec = 0; - write_sequnlock_irqrestore(&xtime_lock, flags); - jiffies += sleep_length; - wall_jiffies += sleep_length; - if (last_timer->resume) - last_timer->resume(); - cur_timer = last_timer; - last_timer = NULL; - touch_softlockup_watchdog(); - return 0; -} - -static struct sysdev_class timer_sysclass = { - .resume = timer_resume, - .suspend = timer_suspend, - set_kset_name("timer"), -}; - - -/* XXX this driverfs stuff should probably go elsewhere later -john */ -static struct sys_device device_timer = { - .id = 0, - .cls = &timer_sysclass, -}; - -static int time_init_device(void) -{ - int error = sysdev_class_register(&timer_sysclass); - if (!error) - error = sysdev_register(&device_timer); - return error; -} - -device_initcall(time_init_device); - #ifdef CONFIG_HPET_TIMER extern void (*late_time_init)(void); /* Duplicate of time_init() below, with hpet_enable part added */ static void __init hpet_time_init(void) { - xtime.tv_sec = get_cmos_time(); - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); - if ((hpet_enable() >= 0) && hpet_use_timer) { printk("Using HPET for base-timer\n"); } - cur_timer = select_timer(); - printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); time_init_hook(); } @@ -467,6 +204,9 @@ static void __init hpet_time_init(void) void __init time_init(void) { + /* Set the clock to HZ Hz: */ + setup_pit_timer(); + #ifdef CONFIG_HPET_TIMER if (is_hpet_capable()) { /* @@ -477,13 +217,5 @@ void __init time_init(void) return; } #endif - xtime.tv_sec = get_cmos_time(); - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); - - cur_timer = select_timer(); - printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); - time_init_hook(); } Index: linux/arch/i386/kernel/time_hpet.c =================================================================== --- linux.orig/arch/i386/kernel/time_hpet.c +++ linux/arch/i386/kernel/time_hpet.c @@ -302,11 +302,11 @@ int hpet_rtc_timer_init(void) else hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; - local_irq_save(flags); + raw_local_irq_save(flags); cnt = hpet_readl(HPET_COUNTER); cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); hpet_writel(cnt, HPET_T1_CMP); - local_irq_restore(flags); + raw_local_irq_restore(flags); cfg = hpet_readl(HPET_T1_CFG); cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT; Index: linux/arch/i386/kernel/timers/Makefile =================================================================== --- linux.orig/arch/i386/kernel/timers/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -# -# Makefile for x86 timers -# - -obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o - -obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o -obj-$(CONFIG_HPET_TIMER) += timer_hpet.o -obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o Index: linux/arch/i386/kernel/timers/common.c =================================================================== --- linux.orig/arch/i386/kernel/timers/common.c +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Common functions used across the timers go here - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "mach_timer.h" - -/* ------ Calibrate the TSC ------- - * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset(). - * Too much 64-bit arithmetic here to do this cleanly in C, and for - * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2) - * output busy loop as low as possible. We avoid reading the CTC registers - * directly because of the awkward 8-bit access mechanism of the 82C54 - * device. - */ - -#define CALIBRATE_TIME (5 * 1000020/HZ) - -unsigned long calibrate_tsc(void) -{ - mach_prepare_counter(); - - { - unsigned long startlow, starthigh; - unsigned long endlow, endhigh; - unsigned long count; - - rdtsc(startlow,starthigh); - mach_countup(&count); - rdtsc(endlow,endhigh); - - - /* Error: ECTCNEVERSET */ - if (count <= 1) - goto bad_ctc; - - /* 64-bit subtract - gcc just messes up with long longs */ - __asm__("subl %2,%0\n\t" - "sbbl %3,%1" - :"=a" (endlow), "=d" (endhigh) - :"g" (startlow), "g" (starthigh), - "0" (endlow), "1" (endhigh)); - - /* Error: ECPUTOOFAST */ - if (endhigh) - goto bad_ctc; - - /* Error: ECPUTOOSLOW */ - if (endlow <= CALIBRATE_TIME) - goto bad_ctc; - - __asm__("divl %2" - :"=a" (endlow), "=d" (endhigh) - :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME)); - - return endlow; - } - - /* - * The CTC wasn't reliable: we got a hit on the very first read, - * or the CPU was so fast/slow that the quotient wouldn't fit in - * 32 bits.. - */ -bad_ctc: - return 0; -} - -#ifdef CONFIG_HPET_TIMER -/* ------ Calibrate the TSC using HPET ------- - * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq. - * Second output is parameter 1 (when non NULL) - * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet(). - * calibrate_tsc() calibrates the processor TSC by comparing - * it to the HPET timer of known frequency. - * Too much 64-bit arithmetic here to do this cleanly in C - */ -#define CALIBRATE_CNT_HPET (5 * hpet_tick) -#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC) - -unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr) -{ - unsigned long tsc_startlow, tsc_starthigh; - unsigned long tsc_endlow, tsc_endhigh; - unsigned long hpet_start, hpet_end; - unsigned long result, remain; - - hpet_start = hpet_readl(HPET_COUNTER); - rdtsc(tsc_startlow, tsc_starthigh); - do { - hpet_end = hpet_readl(HPET_COUNTER); - } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET); - rdtsc(tsc_endlow, tsc_endhigh); - - /* 64-bit subtract - gcc just messes up with long longs */ - __asm__("subl %2,%0\n\t" - "sbbl %3,%1" - :"=a" (tsc_endlow), "=d" (tsc_endhigh) - :"g" (tsc_startlow), "g" (tsc_starthigh), - "0" (tsc_endlow), "1" (tsc_endhigh)); - - /* Error: ECPUTOOFAST */ - if (tsc_endhigh) - goto bad_calibration; - - /* Error: ECPUTOOSLOW */ - if (tsc_endlow <= CALIBRATE_TIME_HPET) - goto bad_calibration; - - ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET); - if (remain > (tsc_endlow >> 1)) - result++; /* rounding the result */ - - if (tsc_hpet_quotient_ptr) { - unsigned long tsc_hpet_quotient; - - ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0, - CALIBRATE_CNT_HPET); - if (remain > (tsc_endlow >> 1)) - tsc_hpet_quotient++; /* rounding the result */ - *tsc_hpet_quotient_ptr = tsc_hpet_quotient; - } - - return result; -bad_calibration: - /* - * the CPU was so fast/slow that the quotient wouldn't fit in - * 32 bits.. - */ - return 0; -} -#endif - - -unsigned long read_timer_tsc(void) -{ - unsigned long retval; - rdtscl(retval); - return retval; -} - - -/* calculate cpu_khz */ -void init_cpu_khz(void) -{ - if (cpu_has_tsc) { - unsigned long tsc_quotient = calibrate_tsc(); - if (tsc_quotient) { - /* report CPU clock rate in Hz. - * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = - * clock/second. Our precision is about 100 ppm. - */ - { unsigned long eax=0, edx=1000; - __asm__("divl %2" - :"=a" (cpu_khz), "=d" (edx) - :"r" (tsc_quotient), - "0" (eax), "1" (edx)); - printk("Detected %u.%03u MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - } - } - } -} - Index: linux/arch/i386/kernel/timers/timer.c =================================================================== --- linux.orig/arch/i386/kernel/timers/timer.c +++ /dev/null @@ -1,75 +0,0 @@ -#include -#include -#include -#include - -#ifdef CONFIG_HPET_TIMER -/* - * HPET memory read is slower than tsc reads, but is more dependable as it - * always runs at constant frequency and reduces complexity due to - * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use - * timer_pit when HPET is active. So, we default to timer_tsc. - */ -#endif -/* list of timers, ordered by preference, NULL terminated */ -static struct init_timer_opts* __initdata timers[] = { -#ifdef CONFIG_X86_CYCLONE_TIMER - &timer_cyclone_init, -#endif -#ifdef CONFIG_HPET_TIMER - &timer_hpet_init, -#endif -#ifdef CONFIG_X86_PM_TIMER - &timer_pmtmr_init, -#endif - &timer_tsc_init, - &timer_pit_init, - NULL, -}; - -static char clock_override[10] __initdata; - -static int __init clock_setup(char* str) -{ - if (str) - strlcpy(clock_override, str, sizeof(clock_override)); - return 1; -} -__setup("clock=", clock_setup); - - -/* The chosen timesource has been found to be bad. - * Fall back to a known good timesource (the PIT) - */ -void clock_fallback(void) -{ - cur_timer = &timer_pit; -} - -/* iterates through the list of timers, returning the first - * one that initializes successfully. - */ -struct timer_opts* __init select_timer(void) -{ - int i = 0; - - /* find most preferred working timer */ - while (timers[i]) { - if (timers[i]->init) - if (timers[i]->init(clock_override) == 0) - return timers[i]->opts; - ++i; - } - - panic("select_timer: Cannot find a suitable timer\n"); - return NULL; -} - -int read_current_timer(unsigned long *timer_val) -{ - if (cur_timer->read_timer) { - *timer_val = cur_timer->read_timer(); - return 0; - } - return -1; -} Index: linux/arch/i386/kernel/timers/timer_cyclone.c =================================================================== --- linux.orig/arch/i386/kernel/timers/timer_cyclone.c +++ /dev/null @@ -1,259 +0,0 @@ -/* Cyclone-timer: - * This code implements timer_ops for the cyclone counter found - * on IBM x440, x360, and other Summit based systems. - * - * Copyright (C) 2002 IBM, John Stultz (johnstul@us.ibm.com) - */ - - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "io_ports.h" - -/* Number of usecs that the last interrupt was delayed */ -static int delay_at_last_interrupt; - -#define CYCLONE_CBAR_ADDR 0xFEB00CD0 -#define CYCLONE_PMCC_OFFSET 0x51A0 -#define CYCLONE_MPMC_OFFSET 0x51D0 -#define CYCLONE_MPCS_OFFSET 0x51A8 -#define CYCLONE_TIMER_FREQ 100000000 -#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */ -int use_cyclone = 0; - -static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */ -static u32 last_cyclone_low; -static u32 last_cyclone_high; -static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; - -/* helper macro to atomically read both cyclone counter registers */ -#define read_cyclone_counter(low,high) \ - do{ \ - high = cyclone_timer[1]; low = cyclone_timer[0]; \ - } while (high != cyclone_timer[1]); - - -static void mark_offset_cyclone(void) -{ - unsigned long lost, delay; - unsigned long delta = last_cyclone_low; - int count; - unsigned long long this_offset, last_offset; - - write_seqlock(&monotonic_lock); - last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; - - spin_lock(&i8253_lock); - read_cyclone_counter(last_cyclone_low,last_cyclone_high); - - /* read values for delay_at_last_interrupt */ - outb_p(0x00, 0x43); /* latch the count ASAP */ - - count = inb_p(0x40); /* read the latched count */ - count |= inb(0x40) << 8; - - /* - * VIA686a test code... reset the latch if count > max + 1 - * from timer_pit.c - cjb - */ - if (count > LATCH) { - outb_p(0x34, PIT_MODE); - outb_p(LATCH & 0xff, PIT_CH0); - outb(LATCH >> 8, PIT_CH0); - count = LATCH - 1; - } - spin_unlock(&i8253_lock); - - /* lost tick compensation */ - delta = last_cyclone_low - delta; - delta /= (CYCLONE_TIMER_FREQ/1000000); - delta += delay_at_last_interrupt; - lost = delta/(1000000/HZ); - delay = delta%(1000000/HZ); - if (lost >= 2) - jiffies_64 += lost-1; - - /* update the monotonic base value */ - this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; - monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK; - write_sequnlock(&monotonic_lock); - - /* calculate delay_at_last_interrupt */ - count = ((LATCH-1) - count) * TICK_SIZE; - delay_at_last_interrupt = (count + LATCH/2) / LATCH; - - - /* catch corner case where tick rollover occured - * between cyclone and pit reads (as noted when - * usec delta is > 90% # of usecs/tick) - */ - if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ)) - jiffies_64++; -} - -static unsigned long get_offset_cyclone(void) -{ - u32 offset; - - if(!cyclone_timer) - return delay_at_last_interrupt; - - /* Read the cyclone timer */ - offset = cyclone_timer[0]; - - /* .. relative to previous jiffy */ - offset = offset - last_cyclone_low; - - /* convert cyclone ticks to microseconds */ - /* XXX slow, can we speed this up? */ - offset = offset/(CYCLONE_TIMER_FREQ/1000000); - - /* our adjusted time offset in microseconds */ - return delay_at_last_interrupt + offset; -} - -static unsigned long long monotonic_clock_cyclone(void) -{ - u32 now_low, now_high; - unsigned long long last_offset, this_offset, base; - unsigned long long ret; - unsigned seq; - - /* atomically read monotonic base & last_offset */ - do { - seq = read_seqbegin(&monotonic_lock); - last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; - base = monotonic_base; - } while (read_seqretry(&monotonic_lock, seq)); - - - /* Read the cyclone counter */ - read_cyclone_counter(now_low,now_high); - this_offset = ((unsigned long long)now_high<<32)|now_low; - - /* convert to nanoseconds */ - ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK); - return ret * (1000000000 / CYCLONE_TIMER_FREQ); -} - -static int __init init_cyclone(char* override) -{ - u32* reg; - u32 base; /* saved cyclone base address */ - u32 pageaddr; /* page that contains cyclone_timer register */ - u32 offset; /* offset from pageaddr to cyclone_timer register */ - int i; - - /* check clock override */ - if (override[0] && strncmp(override,"cyclone",7)) - return -ENODEV; - - /*make sure we're on a summit box*/ - if(!use_cyclone) return -ENODEV; - - printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n"); - - /* find base address */ - pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK; - offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK); - set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); - reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); - if(!reg){ - printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n"); - return -ENODEV; - } - base = *reg; - if(!base){ - printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n"); - return -ENODEV; - } - - /* setup PMCC */ - pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK; - offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK); - set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); - reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); - if(!reg){ - printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n"); - return -ENODEV; - } - reg[0] = 0x00000001; - - /* setup MPCS */ - pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK; - offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK); - set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); - reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); - if(!reg){ - printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n"); - return -ENODEV; - } - reg[0] = 0x00000001; - - /* map in cyclone_timer */ - pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK; - offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK); - set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); - cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); - if(!cyclone_timer){ - printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n"); - return -ENODEV; - } - - /*quick test to make sure its ticking*/ - for(i=0; i<3; i++){ - u32 old = cyclone_timer[0]; - int stall = 100; - while(stall--) barrier(); - if(cyclone_timer[0] == old){ - printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n"); - cyclone_timer = 0; - return -ENODEV; - } - } - - init_cpu_khz(); - - /* Everything looks good! */ - return 0; -} - - -static void delay_cyclone(unsigned long loops) -{ - unsigned long bclock, now; - if(!cyclone_timer) - return; - bclock = cyclone_timer[0]; - do { - rep_nop(); - now = cyclone_timer[0]; - } while ((now-bclock) < loops); -} -/************************************************************/ - -/* cyclone timer_opts struct */ -static struct timer_opts timer_cyclone = { - .name = "cyclone", - .mark_offset = mark_offset_cyclone, - .get_offset = get_offset_cyclone, - .monotonic_clock = monotonic_clock_cyclone, - .delay = delay_cyclone, -}; - -struct init_timer_opts __initdata timer_cyclone_init = { - .init = init_cyclone, - .opts = &timer_cyclone, -}; Index: linux/arch/i386/kernel/timers/timer_hpet.c =================================================================== --- linux.orig/arch/i386/kernel/timers/timer_hpet.c +++ /dev/null @@ -1,212 +0,0 @@ -/* - * This code largely moved from arch/i386/kernel/time.c. - * See comments there for proper credits. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "io_ports.h" -#include "mach_timer.h" -#include - -static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */ -static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */ -static unsigned long hpet_last; /* hpet counter value at last tick*/ -static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ -static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ -static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; - -/* convert from cycles(64bits) => nanoseconds (64bits) - * basic equation: - * ns = cycles / (freq / ns_per_sec) - * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_mhz * 10^6)) - * ns = cycles * (10^3 / cpu_mhz) - * - * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^3 * SC / cpu_mhz) / SC - * ns = cycles * cyc2ns_scale / SC - * - * And since SC is a constant power of two, we can convert the div - * into a shift. - * -johnstul@us.ibm.com "math is hard, lets go shopping!" - */ -static unsigned long cyc2ns_scale; -#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ - -static inline void set_cyc2ns_scale(unsigned long cpu_mhz) -{ - cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; -} - -static inline unsigned long long cycles_2_ns(unsigned long long cyc) -{ - return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; -} - -static unsigned long long monotonic_clock_hpet(void) -{ - unsigned long long last_offset, this_offset, base; - unsigned seq; - - /* atomically read monotonic base & last_offset */ - do { - seq = read_seqbegin(&monotonic_lock); - last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - base = monotonic_base; - } while (read_seqretry(&monotonic_lock, seq)); - - /* Read the Time Stamp Counter */ - rdtscll(this_offset); - - /* return the value in ns */ - return base + cycles_2_ns(this_offset - last_offset); -} - -static unsigned long get_offset_hpet(void) -{ - register unsigned long eax, edx; - - eax = hpet_readl(HPET_COUNTER); - eax -= hpet_last; /* hpet delta */ - eax = min(hpet_tick, eax); - /* - * Time offset = (hpet delta) * ( usecs per HPET clock ) - * = (hpet delta) * ( usecs per tick / HPET clocks per tick) - * = (hpet delta) * ( hpet_usec_quotient ) / (2^32) - * - * Where, - * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick - * - * Using a mull instead of a divl saves some cycles in critical path. - */ - ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax); - - /* our adjusted time offset in microseconds */ - return edx; -} - -static void mark_offset_hpet(void) -{ - unsigned long long this_offset, last_offset; - unsigned long offset; - - write_seqlock(&monotonic_lock); - last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - rdtsc(last_tsc_low, last_tsc_high); - - if (hpet_use_timer) - offset = hpet_readl(HPET_T0_CMP) - hpet_tick; - else - offset = hpet_readl(HPET_COUNTER); - if (unlikely(((offset - hpet_last) >= (2*hpet_tick)) && (hpet_last != 0))) { - int lost_ticks = ((offset - hpet_last) / hpet_tick) - 1; - jiffies_64 += lost_ticks; - } - hpet_last = offset; - - /* update the monotonic base value */ - this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - monotonic_base += cycles_2_ns(this_offset - last_offset); - write_sequnlock(&monotonic_lock); -} - -static void delay_hpet(unsigned long loops) -{ - unsigned long hpet_start, hpet_end; - unsigned long eax; - - /* loops is the number of cpu cycles. Convert it to hpet clocks */ - ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops); - - hpet_start = hpet_readl(HPET_COUNTER); - do { - rep_nop(); - hpet_end = hpet_readl(HPET_COUNTER); - } while ((hpet_end - hpet_start) < (loops)); -} - -static struct timer_opts timer_hpet; - -static int __init init_hpet(char* override) -{ - unsigned long result, remain; - - /* check clock override */ - if (override[0] && strncmp(override,"hpet",4)) - return -ENODEV; - - if (!is_hpet_enabled()) - return -ENODEV; - - printk("Using HPET for gettimeofday\n"); - if (cpu_has_tsc) { - unsigned long tsc_quotient = calibrate_tsc_hpet(&tsc_hpet_quotient); - if (tsc_quotient) { - /* report CPU clock rate in Hz. - * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = - * clock/second. Our precision is about 100 ppm. - */ - { unsigned long eax=0, edx=1000; - ASM_DIV64_REG(cpu_khz, edx, tsc_quotient, - eax, edx); - printk("Detected %u.%03u MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - } - set_cyc2ns_scale(cpu_khz/1000); - } - /* set this only when cpu_has_tsc */ - timer_hpet.read_timer = read_timer_tsc; - } - - /* - * Math to calculate hpet to usec multiplier - * Look for the comments at get_offset_hpet() - */ - ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC); - if (remain > (hpet_tick >> 1)) - result++; /* rounding the result */ - hpet_usec_quotient = result; - - return 0; -} - -static int hpet_resume(void) -{ - write_seqlock(&monotonic_lock); - /* Assume this is the last mark offset time */ - rdtsc(last_tsc_low, last_tsc_high); - - if (hpet_use_timer) - hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick; - else - hpet_last = hpet_readl(HPET_COUNTER); - write_sequnlock(&monotonic_lock); - return 0; -} -/************************************************************/ - -/* tsc timer_opts struct */ -static struct timer_opts timer_hpet __read_mostly = { - .name = "hpet", - .mark_offset = mark_offset_hpet, - .get_offset = get_offset_hpet, - .monotonic_clock = monotonic_clock_hpet, - .delay = delay_hpet, - .resume = hpet_resume, -}; - -struct init_timer_opts __initdata timer_hpet_init = { - .init = init_hpet, - .opts = &timer_hpet, -}; Index: linux/arch/i386/kernel/timers/timer_none.c =================================================================== --- linux.orig/arch/i386/kernel/timers/timer_none.c +++ /dev/null @@ -1,39 +0,0 @@ -#include -#include - -static void mark_offset_none(void) -{ - /* nothing needed */ -} - -static unsigned long get_offset_none(void) -{ - return 0; -} - -static unsigned long long monotonic_clock_none(void) -{ - return 0; -} - -static void delay_none(unsigned long loops) -{ - int d0; - __asm__ __volatile__( - "\tjmp 1f\n" - ".align 16\n" - "1:\tjmp 2f\n" - ".align 16\n" - "2:\tdecl %0\n\tjns 2b" - :"=&a" (d0) - :"0" (loops)); -} - -/* none timer_opts struct */ -struct timer_opts timer_none = { - .name = "none", - .mark_offset = mark_offset_none, - .get_offset = get_offset_none, - .monotonic_clock = monotonic_clock_none, - .delay = delay_none, -}; Index: linux/arch/i386/kernel/timers/timer_pit.c =================================================================== --- linux.orig/arch/i386/kernel/timers/timer_pit.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * This code largely moved from arch/i386/kernel/time.c. - * See comments there for proper credits. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "do_timer.h" -#include "io_ports.h" - -static int count_p; /* counter in get_offset_pit() */ - -static int __init init_pit(char* override) -{ - /* check clock override */ - if (override[0] && strncmp(override,"pit",3)) - printk(KERN_ERR "Warning: clock= override failed. Defaulting to PIT\n"); - - count_p = LATCH; - return 0; -} - -static void mark_offset_pit(void) -{ - /* nothing needed */ -} - -static unsigned long long monotonic_clock_pit(void) -{ - return 0; -} - -static void delay_pit(unsigned long loops) -{ - int d0; - __asm__ __volatile__( - "\tjmp 1f\n" - ".align 16\n" - "1:\tjmp 2f\n" - ".align 16\n" - "2:\tdecl %0\n\tjns 2b" - :"=&a" (d0) - :"0" (loops)); -} - - -/* This function must be called with xtime_lock held. - * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs - * - * However, the pc-audio speaker driver changes the divisor so that - * it gets interrupted rather more often - it loads 64 into the - * counter rather than 11932! This has an adverse impact on - * do_gettimeoffset() -- it stops working! What is also not - * good is that the interval that our timer function gets called - * is no longer 10.0002 ms, but 9.9767 ms. To get around this - * would require using a different timing source. Maybe someone - * could use the RTC - I know that this can interrupt at frequencies - * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix - * it so that at startup, the timer code in sched.c would select - * using either the RTC or the 8253 timer. The decision would be - * based on whether there was any other device around that needed - * to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz, - * and then do some jiggery to have a version of do_timer that - * advanced the clock by 1/1024 s. Every time that reached over 1/100 - * of a second, then do all the old code. If the time was kept correct - * then do_gettimeoffset could just return 0 - there is no low order - * divider that can be accessed. - * - * Ideally, you would be able to use the RTC for the speaker driver, - * but it appears that the speaker driver really needs interrupt more - * often than every 120 us or so. - * - * Anyway, this needs more thought.... pjsg (1993-08-28) - * - * If you are really that interested, you should be reading - * comp.protocols.time.ntp! - */ - -static unsigned long get_offset_pit(void) -{ - int count; - unsigned long flags; - static unsigned long jiffies_p = 0; - - /* - * cache volatile jiffies temporarily; we have xtime_lock. - */ - unsigned long jiffies_t; - - spin_lock_irqsave(&i8253_lock, flags); - /* timer count may underflow right here */ - outb_p(0x00, PIT_MODE); /* latch the count ASAP */ - - count = inb_p(PIT_CH0); /* read the latched count */ - - /* - * We do this guaranteed double memory access instead of a _p - * postfix in the previous port access. Wheee, hackady hack - */ - jiffies_t = jiffies; - - count |= inb_p(PIT_CH0) << 8; - - /* VIA686a test code... reset the latch if count > max + 1 */ - if (count > LATCH) { - outb_p(0x34, PIT_MODE); - outb_p(LATCH & 0xff, PIT_CH0); - outb(LATCH >> 8, PIT_CH0); - count = LATCH - 1; - } - - /* - * avoiding timer inconsistencies (they are rare, but they happen)... - * there are two kinds of problems that must be avoided here: - * 1. the timer counter underflows - * 2. hardware problem with the timer, not giving us continuous time, - * the counter does small "jumps" upwards on some Pentium systems, - * (see c't 95/10 page 335 for Neptun bug.) - */ - - if( jiffies_t == jiffies_p ) { - if( count > count_p ) { - /* the nutcase */ - count = do_timer_overflow(count); - } - } else - jiffies_p = jiffies_t; - - count_p = count; - - spin_unlock_irqrestore(&i8253_lock, flags); - - count = ((LATCH-1) - count) * TICK_SIZE; - count = (count + LATCH/2) / LATCH; - - return count; -} - - -/* tsc timer_opts struct */ -struct timer_opts timer_pit = { - .name = "pit", - .mark_offset = mark_offset_pit, - .get_offset = get_offset_pit, - .monotonic_clock = monotonic_clock_pit, - .delay = delay_pit, -}; - -struct init_timer_opts __initdata timer_pit_init = { - .init = init_pit, - .opts = &timer_pit, -}; - -void setup_pit_timer(void) -{ - unsigned long flags; - - spin_lock_irqsave(&i8253_lock, flags); - outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ - udelay(10); - outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ - udelay(10); - outb(LATCH >> 8 , PIT_CH0); /* MSB */ - spin_unlock_irqrestore(&i8253_lock, flags); -} Index: linux/arch/i386/kernel/timers/timer_pm.c =================================================================== --- linux.orig/arch/i386/kernel/timers/timer_pm.c +++ /dev/null @@ -1,268 +0,0 @@ -/* - * (C) Dominik Brodowski 2003 - * - * Driver to use the Power Management Timer (PMTMR) available in some - * southbridges as primary timing source for the Linux kernel. - * - * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, - * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. - * - * This file is licensed under the GPL v2. - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include "mach_timer.h" - -/* Number of PMTMR ticks expected during calibration run */ -#define PMTMR_TICKS_PER_SEC 3579545 -#define PMTMR_EXPECTED_RATE \ - ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10)) - - -/* The I/O port the PMTMR resides at. - * The location is detected during setup_arch(), - * in arch/i386/acpi/boot.c */ -u32 pmtmr_ioport = 0; - - -/* value of the Power timer at last timer interrupt */ -static u32 offset_tick; -static u32 offset_delay; - -static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; - -#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ - -/*helper function to safely read acpi pm timesource*/ -static inline u32 read_pmtmr(void) -{ - u32 v1=0,v2=0,v3=0; - /* It has been reported that because of various broken - * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time - * source is not latched, so you must read it multiple - * times to insure a safe value is read. - */ - do { - v1 = inl(pmtmr_ioport); - v2 = inl(pmtmr_ioport); - v3 = inl(pmtmr_ioport); - } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) - || (v3 > v1 && v3 < v2)); - - /* mask the output to 24 bits */ - return v2 & ACPI_PM_MASK; -} - - -/* - * Some boards have the PMTMR running way too fast. We check - * the PMTMR rate against PIT channel 2 to catch these cases. - */ -static int verify_pmtmr_rate(void) -{ - u32 value1, value2; - unsigned long count, delta; - - mach_prepare_counter(); - value1 = read_pmtmr(); - mach_countup(&count); - value2 = read_pmtmr(); - delta = (value2 - value1) & ACPI_PM_MASK; - - /* Check that the PMTMR delta is within 5% of what we expect */ - if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 || - delta > (PMTMR_EXPECTED_RATE * 21) / 20) { - printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE); - return -1; - } - - return 0; -} - - -static int init_pmtmr(char* override) -{ - u32 value1, value2; - unsigned int i; - - if (override[0] && strncmp(override,"pmtmr",5)) - return -ENODEV; - - if (!pmtmr_ioport) - return -ENODEV; - - /* we use the TSC for delay_pmtmr, so make sure it exists */ - if (!cpu_has_tsc) - return -ENODEV; - - /* "verify" this timing source */ - value1 = read_pmtmr(); - for (i = 0; i < 10000; i++) { - value2 = read_pmtmr(); - if (value2 == value1) - continue; - if (value2 > value1) - goto pm_good; - if ((value2 < value1) && ((value2) < 0xFFF)) - goto pm_good; - printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2); - return -EINVAL; - } - printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1); - return -ENODEV; - -pm_good: - if (verify_pmtmr_rate() != 0) - return -ENODEV; - - init_cpu_khz(); - return 0; -} - -static inline u32 cyc2us(u32 cycles) -{ - /* The Power Management Timer ticks at 3.579545 ticks per microsecond. - * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] - * - * Even with HZ = 100, delta is at maximum 35796 ticks, so it can - * easily be multiplied with 286 (=0x11E) without having to fear - * u32 overflows. - */ - cycles *= 286; - return (cycles >> 10); -} - -/* - * this gets called during each timer interrupt - * - Called while holding the writer xtime_lock - */ -static void mark_offset_pmtmr(void) -{ - u32 lost, delta, last_offset; - static int first_run = 1; - last_offset = offset_tick; - - write_seqlock(&monotonic_lock); - - offset_tick = read_pmtmr(); - - /* calculate tick interval */ - delta = (offset_tick - last_offset) & ACPI_PM_MASK; - - /* convert to usecs */ - delta = cyc2us(delta); - - /* update the monotonic base value */ - monotonic_base += delta * NSEC_PER_USEC; - write_sequnlock(&monotonic_lock); - - /* convert to ticks */ - delta += offset_delay; - lost = delta / (USEC_PER_SEC / HZ); - offset_delay = delta % (USEC_PER_SEC / HZ); - - - /* compensate for lost ticks */ - if (lost >= 2) - jiffies_64 += lost - 1; - - /* don't calculate delay for first run, - or if we've got less then a tick */ - if (first_run || (lost < 1)) { - first_run = 0; - offset_delay = 0; - } -} - -static int pmtmr_resume(void) -{ - write_seqlock(&monotonic_lock); - /* Assume this is the last mark offset time */ - offset_tick = read_pmtmr(); - write_sequnlock(&monotonic_lock); - return 0; -} - -static unsigned long long monotonic_clock_pmtmr(void) -{ - u32 last_offset, this_offset; - unsigned long long base, ret; - unsigned seq; - - - /* atomically read monotonic base & last_offset */ - do { - seq = read_seqbegin(&monotonic_lock); - last_offset = offset_tick; - base = monotonic_base; - } while (read_seqretry(&monotonic_lock, seq)); - - /* Read the pmtmr */ - this_offset = read_pmtmr(); - - /* convert to nanoseconds */ - ret = (this_offset - last_offset) & ACPI_PM_MASK; - ret = base + (cyc2us(ret) * NSEC_PER_USEC); - return ret; -} - -static void delay_pmtmr(unsigned long loops) -{ - unsigned long bclock, now; - - rdtscl(bclock); - do - { - rep_nop(); - rdtscl(now); - } while ((now-bclock) < loops); -} - - -/* - * get the offset (in microseconds) from the last call to mark_offset() - * - Called holding a reader xtime_lock - */ -static unsigned long get_offset_pmtmr(void) -{ - u32 now, offset, delta = 0; - - offset = offset_tick; - now = read_pmtmr(); - delta = (now - offset)&ACPI_PM_MASK; - - return (unsigned long) offset_delay + cyc2us(delta); -} - - -/* acpi timer_opts struct */ -static struct timer_opts timer_pmtmr = { - .name = "pmtmr", - .mark_offset = mark_offset_pmtmr, - .get_offset = get_offset_pmtmr, - .monotonic_clock = monotonic_clock_pmtmr, - .delay = delay_pmtmr, - .read_timer = read_timer_tsc, - .resume = pmtmr_resume, -}; - -struct init_timer_opts __initdata timer_pmtmr_init = { - .init = init_pmtmr, - .opts = &timer_pmtmr, -}; - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Dominik Brodowski "); -MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86"); Index: linux/arch/i386/kernel/timers/timer_tsc.c =================================================================== --- linux.orig/arch/i386/kernel/timers/timer_tsc.c +++ /dev/null @@ -1,595 +0,0 @@ -/* - * This code largely moved from arch/i386/kernel/time.c. - * See comments there for proper credits. - * - * 2004-06-25 Jesper Juhl - * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4 - * failing to inline. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -/* processor.h for distable_tsc flag */ -#include - -#include "io_ports.h" -#include "mach_timer.h" - -#include -#include - -#ifdef CONFIG_HPET_TIMER -static unsigned long hpet_usec_quotient; -static unsigned long hpet_last; -static struct timer_opts timer_tsc; -#endif - -static inline void cpufreq_delayed_get(void); - -int tsc_disable __devinitdata = 0; - -static int use_tsc; -/* Number of usecs that the last interrupt was delayed */ -static int delay_at_last_interrupt; - -static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ -static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ -static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; - -/* convert from cycles(64bits) => nanoseconds (64bits) - * basic equation: - * ns = cycles / (freq / ns_per_sec) - * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_mhz * 10^6)) - * ns = cycles * (10^3 / cpu_mhz) - * - * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^3 * SC / cpu_mhz) / SC - * ns = cycles * cyc2ns_scale / SC - * - * And since SC is a constant power of two, we can convert the div - * into a shift. - * -johnstul@us.ibm.com "math is hard, lets go shopping!" - */ -static unsigned long cyc2ns_scale; -#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ - -static inline void set_cyc2ns_scale(unsigned long cpu_mhz) -{ - cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; -} - -static inline unsigned long long cycles_2_ns(unsigned long long cyc) -{ - return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; -} - -static int count2; /* counter for mark_offset_tsc() */ - -/* Cached *multiplier* to convert TSC counts to microseconds. - * (see the equation below). - * Equal to 2^32 * (1 / (clocks per usec) ). - * Initialized in time_init. - */ -static unsigned long fast_gettimeoffset_quotient; - -static unsigned long get_offset_tsc(void) -{ - register unsigned long eax, edx; - - /* Read the Time Stamp Counter */ - - rdtsc(eax,edx); - - /* .. relative to previous jiffy (32 bits is enough) */ - eax -= last_tsc_low; /* tsc_low delta */ - - /* - * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient - * = (tsc_low delta) * (usecs_per_clock) - * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy) - * - * Using a mull instead of a divl saves up to 31 clock cycles - * in the critical path. - */ - - __asm__("mull %2" - :"=a" (eax), "=d" (edx) - :"rm" (fast_gettimeoffset_quotient), - "0" (eax)); - - /* our adjusted time offset in microseconds */ - return delay_at_last_interrupt + edx; -} - -static unsigned long long monotonic_clock_tsc(void) -{ - unsigned long long last_offset, this_offset, base; - unsigned seq; - - /* atomically read monotonic base & last_offset */ - do { - seq = read_seqbegin(&monotonic_lock); - last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - base = monotonic_base; - } while (read_seqretry(&monotonic_lock, seq)); - - /* Read the Time Stamp Counter */ - rdtscll(this_offset); - - /* return the value in ns */ - return base + cycles_2_ns(this_offset - last_offset); -} - -/* - * Scheduler clock - returns current time in nanosec units. - */ -unsigned long long sched_clock(void) -{ - unsigned long long this_offset; - - /* - * In the NUMA case we dont use the TSC as they are not - * synchronized across all CPUs. - */ -#ifndef CONFIG_NUMA - if (!use_tsc) -#endif - /* no locking but a rare wrong value is not a big deal */ - return jiffies_64 * (1000000000 / HZ); - - /* Read the Time Stamp Counter */ - rdtscll(this_offset); - - /* return the value in ns */ - return cycles_2_ns(this_offset); -} - -static void delay_tsc(unsigned long loops) -{ - unsigned long bclock, now; - - rdtscl(bclock); - do - { - rep_nop(); - rdtscl(now); - } while ((now-bclock) < loops); -} - -#ifdef CONFIG_HPET_TIMER -static void mark_offset_tsc_hpet(void) -{ - unsigned long long this_offset, last_offset; - unsigned long offset, temp, hpet_current; - - write_seqlock(&monotonic_lock); - last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - /* - * It is important that these two operations happen almost at - * the same time. We do the RDTSC stuff first, since it's - * faster. To avoid any inconsistencies, we need interrupts - * disabled locally. - */ - /* - * Interrupts are just disabled locally since the timer irq - * has the SA_INTERRUPT flag set. -arca - */ - /* read Pentium cycle counter */ - - hpet_current = hpet_readl(HPET_COUNTER); - rdtsc(last_tsc_low, last_tsc_high); - - /* lost tick compensation */ - offset = hpet_readl(HPET_T0_CMP) - hpet_tick; - if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) { - int lost_ticks = (offset - hpet_last) / hpet_tick; - jiffies_64 += lost_ticks; - } - hpet_last = hpet_current; - - /* update the monotonic base value */ - this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - monotonic_base += cycles_2_ns(this_offset - last_offset); - write_sequnlock(&monotonic_lock); - - /* calculate delay_at_last_interrupt */ - /* - * Time offset = (hpet delta) * ( usecs per HPET clock ) - * = (hpet delta) * ( usecs per tick / HPET clocks per tick) - * = (hpet delta) * ( hpet_usec_quotient ) / (2^32) - * Where, - * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick - */ - delay_at_last_interrupt = hpet_current - offset; - ASM_MUL64_REG(temp, delay_at_last_interrupt, - hpet_usec_quotient, delay_at_last_interrupt); -} -#endif - - -#ifdef CONFIG_CPU_FREQ -#include - -static unsigned int cpufreq_delayed_issched = 0; -static unsigned int cpufreq_init = 0; -static struct work_struct cpufreq_delayed_get_work; - -static void handle_cpufreq_delayed_get(void *v) -{ - unsigned int cpu; - for_each_online_cpu(cpu) { - cpufreq_get(cpu); - } - cpufreq_delayed_issched = 0; -} - -/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries - * to verify the CPU frequency the timing core thinks the CPU is running - * at is still correct. - */ -static inline void cpufreq_delayed_get(void) -{ - if (cpufreq_init && !cpufreq_delayed_issched) { - cpufreq_delayed_issched = 1; - printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n"); - schedule_work(&cpufreq_delayed_get_work); - } -} - -/* If the CPU frequency is scaled, TSC-based delays will need a different - * loops_per_jiffy value to function properly. - */ - -static unsigned int ref_freq = 0; -static unsigned long loops_per_jiffy_ref = 0; - -#ifndef CONFIG_SMP -static unsigned long fast_gettimeoffset_ref = 0; -static unsigned int cpu_khz_ref = 0; -#endif - -static int -time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data) -{ - struct cpufreq_freqs *freq = data; - - if (val != CPUFREQ_RESUMECHANGE) - write_seqlock_irq(&xtime_lock); - if (!ref_freq) { - ref_freq = freq->old; - loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; -#ifndef CONFIG_SMP - fast_gettimeoffset_ref = fast_gettimeoffset_quotient; - cpu_khz_ref = cpu_khz; -#endif - } - - if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || - (val == CPUFREQ_RESUMECHANGE)) { - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); -#ifndef CONFIG_SMP - if (cpu_khz) - cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); - if (use_tsc) { - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { - fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq); - set_cyc2ns_scale(cpu_khz/1000); - } - } -#endif - } - - if (val != CPUFREQ_RESUMECHANGE) - write_sequnlock_irq(&xtime_lock); - - return 0; -} - -static struct notifier_block time_cpufreq_notifier_block = { - .notifier_call = time_cpufreq_notifier -}; - - -static int __init cpufreq_tsc(void) -{ - int ret; - INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL); - ret = cpufreq_register_notifier(&time_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - if (!ret) - cpufreq_init = 1; - return ret; -} -core_initcall(cpufreq_tsc); - -#else /* CONFIG_CPU_FREQ */ -static inline void cpufreq_delayed_get(void) { return; } -#endif - -int recalibrate_cpu_khz(void) -{ -#ifndef CONFIG_SMP - unsigned int cpu_khz_old = cpu_khz; - - if (cpu_has_tsc) { - init_cpu_khz(); - cpu_data[0].loops_per_jiffy = - cpufreq_scale(cpu_data[0].loops_per_jiffy, - cpu_khz_old, - cpu_khz); - return 0; - } else - return -ENODEV; -#else - return -ENODEV; -#endif -} -EXPORT_SYMBOL(recalibrate_cpu_khz); - -static void mark_offset_tsc(void) -{ - unsigned long lost,delay; - unsigned long delta = last_tsc_low; - int count; - int countmp; - static int count1 = 0; - unsigned long long this_offset, last_offset; - static int lost_count = 0; - - write_seqlock(&monotonic_lock); - last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - /* - * It is important that these two operations happen almost at - * the same time. We do the RDTSC stuff first, since it's - * faster. To avoid any inconsistencies, we need interrupts - * disabled locally. - */ - - /* - * Interrupts are just disabled locally since the timer irq - * has the SA_INTERRUPT flag set. -arca - */ - - /* read Pentium cycle counter */ - - rdtsc(last_tsc_low, last_tsc_high); - - spin_lock(&i8253_lock); - outb_p(0x00, PIT_MODE); /* latch the count ASAP */ - - count = inb_p(PIT_CH0); /* read the latched count */ - count |= inb(PIT_CH0) << 8; - - /* - * VIA686a test code... reset the latch if count > max + 1 - * from timer_pit.c - cjb - */ - if (count > LATCH) { - outb_p(0x34, PIT_MODE); - outb_p(LATCH & 0xff, PIT_CH0); - outb(LATCH >> 8, PIT_CH0); - count = LATCH - 1; - } - - spin_unlock(&i8253_lock); - - if (pit_latch_buggy) { - /* get center value of last 3 time lutch */ - if ((count2 >= count && count >= count1) - || (count1 >= count && count >= count2)) { - count2 = count1; count1 = count; - } else if ((count1 >= count2 && count2 >= count) - || (count >= count2 && count2 >= count1)) { - countmp = count;count = count2; - count2 = count1;count1 = countmp; - } else { - count2 = count1; count1 = count; count = count1; - } - } - - /* lost tick compensation */ - delta = last_tsc_low - delta; - { - register unsigned long eax, edx; - eax = delta; - __asm__("mull %2" - :"=a" (eax), "=d" (edx) - :"rm" (fast_gettimeoffset_quotient), - "0" (eax)); - delta = edx; - } - delta += delay_at_last_interrupt; - lost = delta/(1000000/HZ); - delay = delta%(1000000/HZ); - if (lost >= 2) { - jiffies_64 += lost-1; - - /* sanity check to ensure we're not always losing ticks */ - if (lost_count++ > 100) { - printk(KERN_WARNING "Losing too many ticks!\n"); - printk(KERN_WARNING "TSC cannot be used as a timesource. \n"); - printk(KERN_WARNING "Possible reasons for this are:\n"); - printk(KERN_WARNING " You're running with Speedstep,\n"); - printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n"); - printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n"); - printk(KERN_WARNING "Falling back to a sane timesource now.\n"); - - clock_fallback(); - } - /* ... but give the TSC a fair chance */ - if (lost_count > 25) - cpufreq_delayed_get(); - } else - lost_count = 0; - /* update the monotonic base value */ - this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - monotonic_base += cycles_2_ns(this_offset - last_offset); - write_sequnlock(&monotonic_lock); - - /* calculate delay_at_last_interrupt */ - count = ((LATCH-1) - count) * TICK_SIZE; - delay_at_last_interrupt = (count + LATCH/2) / LATCH; - - /* catch corner case where tick rollover occured - * between tsc and pit reads (as noted when - * usec delta is > 90% # of usecs/tick) - */ - if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ)) - jiffies_64++; -} - -static int __init init_tsc(char* override) -{ - - /* check clock override */ - if (override[0] && strncmp(override,"tsc",3)) { -#ifdef CONFIG_HPET_TIMER - if (is_hpet_enabled()) { - printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n"); - } else -#endif - { - return -ENODEV; - } - } - - /* - * If we have APM enabled or the CPU clock speed is variable - * (CPU stops clock on HLT or slows clock to save power) - * then the TSC timestamps may diverge by up to 1 jiffy from - * 'real time' but nothing will break. - * The most frequent case is that the CPU is "woken" from a halt - * state by the timer interrupt itself, so we get 0 error. In the - * rare cases where a driver would "wake" the CPU and request a - * timestamp, the maximum error is < 1 jiffy. But timestamps are - * still perfectly ordered. - * Note that the TSC counter will be reset if APM suspends - * to disk; this won't break the kernel, though, 'cuz we're - * smart. See arch/i386/kernel/apm.c. - */ - /* - * Firstly we have to do a CPU check for chips with - * a potentially buggy TSC. At this point we haven't run - * the ident/bugs checks so we must run this hook as it - * may turn off the TSC flag. - * - * NOTE: this doesn't yet handle SMP 486 machines where only - * some CPU's have a TSC. Thats never worked and nobody has - * moaned if you have the only one in the world - you fix it! - */ - - count2 = LATCH; /* initialize counter for mark_offset_tsc() */ - - if (cpu_has_tsc) { - unsigned long tsc_quotient; -#ifdef CONFIG_HPET_TIMER - if (is_hpet_enabled() && hpet_use_timer) { - unsigned long result, remain; - printk("Using TSC for gettimeofday\n"); - tsc_quotient = calibrate_tsc_hpet(NULL); - timer_tsc.mark_offset = &mark_offset_tsc_hpet; - /* - * Math to calculate hpet to usec multiplier - * Look for the comments at get_offset_tsc_hpet() - */ - ASM_DIV64_REG(result, remain, hpet_tick, - 0, KERNEL_TICK_USEC); - if (remain > (hpet_tick >> 1)) - result++; /* rounding the result */ - - hpet_usec_quotient = result; - } else -#endif - { - tsc_quotient = calibrate_tsc(); - } - - if (tsc_quotient) { - fast_gettimeoffset_quotient = tsc_quotient; - use_tsc = 1; - /* - * We could be more selective here I suspect - * and just enable this for the next intel chips ? - */ - /* report CPU clock rate in Hz. - * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = - * clock/second. Our precision is about 100 ppm. - */ - { unsigned long eax=0, edx=1000; - __asm__("divl %2" - :"=a" (cpu_khz), "=d" (edx) - :"r" (tsc_quotient), - "0" (eax), "1" (edx)); - printk("Detected %u.%03u MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - } - set_cyc2ns_scale(cpu_khz/1000); - return 0; - } - } - return -ENODEV; -} - -static int tsc_resume(void) -{ - write_seqlock(&monotonic_lock); - /* Assume this is the last mark offset time */ - rdtsc(last_tsc_low, last_tsc_high); -#ifdef CONFIG_HPET_TIMER - if (is_hpet_enabled() && hpet_use_timer) - hpet_last = hpet_readl(HPET_COUNTER); -#endif - write_sequnlock(&monotonic_lock); - return 0; -} - -#ifndef CONFIG_X86_TSC -/* disable flag for tsc. Takes effect by clearing the TSC cpu flag - * in cpu/common.c */ -static int __init tsc_setup(char *str) -{ - tsc_disable = 1; - return 1; -} -#else -static int __init tsc_setup(char *str) -{ - printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " - "cannot disable TSC.\n"); - return 1; -} -#endif -__setup("notsc", tsc_setup); - - - -/************************************************************/ - -/* tsc timer_opts struct */ -static struct timer_opts timer_tsc = { - .name = "tsc", - .mark_offset = mark_offset_tsc, - .get_offset = get_offset_tsc, - .monotonic_clock = monotonic_clock_tsc, - .delay = delay_tsc, - .read_timer = read_timer_tsc, - .resume = tsc_resume, -}; - -struct init_timer_opts __initdata timer_tsc_init = { - .init = init_tsc, - .opts = &timer_tsc, -}; Index: linux/arch/i386/kernel/traps.c =================================================================== --- linux.orig/arch/i386/kernel/traps.c +++ linux/arch/i386/kernel/traps.c @@ -93,7 +93,7 @@ asmlinkage void machine_check(void); static int kstack_depth_to_print = 24; struct notifier_block *i386die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); +static DEFINE_RAW_SPINLOCK(die_notifier_lock); int register_die_notifier(struct notifier_block *nb) { @@ -116,22 +116,27 @@ static inline unsigned long print_contex unsigned long *stack, unsigned long ebp) { unsigned long addr; +#ifndef CONFIG_FRAME_POINTER + unsigned long prev_frame; +#endif -#ifdef CONFIG_FRAME_POINTER +#ifdef CONFIG_FRAME_POINTER while (valid_stack_ptr(tinfo, (void *)ebp)) { addr = *(unsigned long *)(ebp + 4); printk(" [<%08lx>] ", addr); print_symbol("%s", addr); - printk("\n"); + printk(" (%ld)\n", *(unsigned long *)ebp - ebp); ebp = *(unsigned long *)ebp; } #else + prev_frame = (unsigned long)stack; while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; if (__kernel_text_address(addr)) { printk(" [<%08lx>]", addr); print_symbol(" %s", addr); - printk("\n"); + printk(" (%ld)\n", (unsigned long)stack - prev_frame); + prev_frame = (unsigned long)stack; } } #endif @@ -163,6 +168,8 @@ void show_trace(struct task_struct *task break; printk(" =======================\n"); } + print_traces(task); + show_held_locks(task); } void show_stack(struct task_struct *task, unsigned long *esp) @@ -201,6 +208,12 @@ void dump_stack(void) EXPORT_SYMBOL(dump_stack); +#if defined(CONFIG_DEBUG_STACKOVERFLOW) && defined(CONFIG_LATENCY_TRACE) +extern unsigned long worst_stack_left; +#else +# define worst_stack_left -1L +#endif + void show_registers(struct pt_regs *regs) { int i; @@ -225,10 +238,17 @@ void show_registers(struct pt_regs *regs regs->eax, regs->ebx, regs->ecx, regs->edx); printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", regs->esi, regs->edi, regs->ebp, esp); - printk("ds: %04x es: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, ss); - printk("Process %s (pid: %d, threadinfo=%p task=%p)", + printk("ds: %04x es: %04x ss: %04x preempt: %08x\n", + regs->xds & 0xffff, regs->xes & 0xffff, ss, preempt_count()); + printk("Process %s (pid: %d, threadinfo=%p task=%p", current->comm, current->pid, current_thread_info(), current); + + if (in_kernel) + printk(" stack_left=%ld worst_left=%ld)", + (esp & (THREAD_SIZE-1))-sizeof(struct thread_info), + worst_stack_left); + else + printk(")"); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. @@ -297,11 +317,11 @@ bug: void die(const char * str, struct pt_regs * regs, long err) { static struct { - spinlock_t lock; + raw_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = RAW_SPIN_LOCK_UNLOCKED, .lock_owner = -1, .lock_owner_depth = 0 }; @@ -379,6 +399,11 @@ static void __kprobes do_trap(int trapnr if (!user_mode(regs)) goto kernel_trap; +#ifdef CONFIG_PREEMPT_RT + raw_local_irq_enable(); + preempt_check_resched(); +#endif + trap_signal: { if (info) force_sig_info(signr, info, tsk); @@ -508,7 +533,7 @@ fastcall void __kprobes do_general_prote return; gp_in_vm86: - local_irq_enable(); + raw_local_irq_enable(); handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); return; @@ -562,10 +587,12 @@ static void unknown_nmi_error(unsigned c printk("Do you have a strange power saving mode enabled?\n"); } -static DEFINE_SPINLOCK(nmi_print_lock); +static DEFINE_RAW_SPINLOCK(nmi_print_lock); void die_nmi (struct pt_regs *regs, const char *msg) { + deadlock_trace_off(); + if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) == NOTIFY_STOP) return; @@ -593,10 +620,11 @@ void die_nmi (struct pt_regs *regs, cons crash_kexec(regs); } + nmi_exit(); do_exit(SIGSEGV); } -static void default_do_nmi(struct pt_regs * regs) +static void notrace default_do_nmi(struct pt_regs * regs) { unsigned char reason = 0; @@ -615,6 +643,7 @@ static void default_do_nmi(struct pt_reg */ if (nmi_watchdog) { nmi_watchdog_tick(regs); +// trace_special(6, 1, 0); return; } #endif @@ -634,18 +663,19 @@ static void default_do_nmi(struct pt_reg reassert_nmi(); } -static int dummy_nmi_callback(struct pt_regs * regs, int cpu) +static notrace int dummy_nmi_callback(struct pt_regs * regs, int cpu) { return 0; } static nmi_callback_t nmi_callback = dummy_nmi_callback; -fastcall void do_nmi(struct pt_regs * regs, long error_code) +fastcall notrace void do_nmi(struct pt_regs * regs, long error_code) { int cpu; nmi_enter(); + nmi_trace((unsigned long)do_nmi, regs->eip, regs->eflags); cpu = smp_processor_id(); @@ -723,7 +753,7 @@ fastcall void __kprobes do_debug(struct return; /* It's safe to allow irq's after DR6 has been saved */ if (regs->eflags & X86_EFLAGS_IF) - local_irq_enable(); + raw_local_irq_enable(); /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { Index: linux/arch/i386/kernel/tsc.c =================================================================== --- /dev/null +++ linux/arch/i386/kernel/tsc.c @@ -0,0 +1,493 @@ +/* + * This code largely moved from arch/i386/kernel/timer/timer_tsc.c + * which was originally moved from arch/i386/kernel/time.c. + * See comments there for proper credits. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mach_timer.h" + +/* + * On some systems the TSC frequency does not + * change with the cpu frequency. So we need + * an extra value to store the TSC freq + */ +unsigned int tsc_khz; + +int tsc_disable __initdata = 0; + +#ifdef CONFIG_X86_TSC +static int __init tsc_setup(char *str) +{ + printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " + "cannot disable TSC.\n"); + return 1; +} +#else +/* + * disable flag for tsc. Takes effect by clearing the TSC cpu flag + * in cpu/common.c + */ +static int __init tsc_setup(char *str) +{ + tsc_disable = 1; + + return 1; +} +#endif + +__setup("notsc", tsc_setup); + +/* + * code to mark and check if the TSC is unstable + * due to cpufreq or due to unsynced TSCs + */ +static int tsc_unstable; + +static inline int check_tsc_unstable(void) +{ + return tsc_unstable; +} + +void mark_tsc_unstable(void) +{ + tsc_unstable = 1; +} + +/* Code to compensate for C3 stalls */ +static u64 tsc_c3_offset; + +void tsc_c3_compensate(unsigned long nsecs) +{ + /* this could def be optimized */ + u64 cycles = ((u64)nsecs * tsc_khz); + + do_div(cycles, 1000000); + tsc_c3_offset += cycles; +} + +EXPORT_SYMBOL_GPL(tsc_c3_compensate); + +static inline u64 tsc_read_c3_time(void) +{ + return tsc_c3_offset; +} + +/* Accellerators for sched_clock() + * convert from cycles(64bits) => nanoseconds (64bits) + * basic equation: + * ns = cycles / (freq / ns_per_sec) + * ns = cycles * (ns_per_sec / freq) + * ns = cycles * (10^9 / (cpu_khz * 10^3)) + * ns = cycles * (10^6 / cpu_khz) + * + * Then we use scaling math (suggested by george@mvista.com) to get: + * ns = cycles * (10^6 * SC / cpu_khz) / SC + * ns = cycles * cyc2ns_scale / SC + * + * And since SC is a constant power of two, we can convert the div + * into a shift. + * + * We can use khz divisor instead of mhz to keep a better percision, since + * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * (mathieu.desnoyers@polymtl.ca) + * + * -johnstul@us.ibm.com "math is hard, lets go shopping!" + */ +static unsigned long cyc2ns_scale; + +#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ + +static inline void set_cyc2ns_scale(unsigned long cpu_khz) +{ + cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; +} + +static inline unsigned long long cycles_2_ns(unsigned long long cyc) +{ + return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; +} + +/* + * Scheduler clock - returns current time in nanosec units. + */ +unsigned long long sched_clock(void) +{ + unsigned long long this_offset; + + /* + * in the NUMA case we dont use the TSC as they are not + * synchronized across all CPUs. + */ +#ifndef CONFIG_NUMA + if (!cpu_khz || check_tsc_unstable()) +#endif + /* no locking but a rare wrong value is not a big deal */ + return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); + + /* read the Time Stamp Counter: */ + rdtscll(this_offset); + this_offset += tsc_read_c3_time(); + + /* return the value in ns */ + return cycles_2_ns(this_offset); +} + +static unsigned long calculate_cpu_khz(void) +{ + unsigned long long start, end; + unsigned long count; + u64 delta64; + int i; + + /* run 3 times to ensure the cache is warm */ + for (i = 0; i < 3; i++) { + mach_prepare_counter(); + rdtscll(start); + mach_countup(&count); + rdtscll(end); + } + /* + * Error: ECTCNEVERSET + * The CTC wasn't reliable: we got a hit on the very first read, + * or the CPU was so fast/slow that the quotient wouldn't fit in + * 32 bits.. + */ + if (count <= 1) + return 0; + + delta64 = end - start; + + /* cpu freq too fast: */ + if (delta64 > (1ULL<<32)) + return 0; + + /* cpu freq too slow: */ + if (delta64 <= CALIBRATE_TIME_MSEC) + return 0; + + delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */ + do_div(delta64,CALIBRATE_TIME_MSEC); + + return (unsigned long)delta64; +} + +int recalibrate_cpu_khz(void) +{ +#ifndef CONFIG_SMP + unsigned long cpu_khz_old = cpu_khz; + + if (cpu_has_tsc) { + cpu_khz = calculate_cpu_khz(); + tsc_khz = cpu_khz; + cpu_data[0].loops_per_jiffy = + cpufreq_scale(cpu_data[0].loops_per_jiffy, + cpu_khz_old, cpu_khz); + return 0; + } else + return -ENODEV; +#else + return -ENODEV; +#endif +} + +EXPORT_SYMBOL(recalibrate_cpu_khz); + +void tsc_init(void) +{ + if (!cpu_has_tsc || tsc_disable) + return; + + cpu_khz = calculate_cpu_khz(); + tsc_khz = cpu_khz; + + if (!cpu_khz) + return; + + printk("Detected %lu.%03lu MHz processor.\n", + (unsigned long)cpu_khz / 1000, + (unsigned long)cpu_khz % 1000); + + set_cyc2ns_scale(cpu_khz); + use_tsc_delay(); +} + +#ifdef CONFIG_CPU_FREQ + +static unsigned int cpufreq_delayed_issched = 0; +static unsigned int cpufreq_init = 0; +static struct work_struct cpufreq_delayed_get_work; + +static void handle_cpufreq_delayed_get(void *v) +{ + unsigned int cpu; + + for_each_online_cpu(cpu) + cpufreq_get(cpu); + + cpufreq_delayed_issched = 0; +} + +/* + * if we notice cpufreq oddness, schedule a call to cpufreq_get() as it tries + * to verify the CPU frequency the timing core thinks the CPU is running + * at is still correct. + */ +static inline void cpufreq_delayed_get(void) +{ + if (cpufreq_init && !cpufreq_delayed_issched) { + cpufreq_delayed_issched = 1; + printk(KERN_DEBUG "Checking if CPU frequency changed.\n"); + schedule_work(&cpufreq_delayed_get_work); + } +} + +/* + * if the CPU frequency is scaled, TSC-based delays will need a different + * loops_per_jiffy value to function properly. + */ +static unsigned int ref_freq = 0; +static unsigned long loops_per_jiffy_ref = 0; + +#ifndef CONFIG_SMP +static unsigned long cpu_khz_ref = 0; +#endif + +static int +time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + + if (val != CPUFREQ_RESUMECHANGE) + write_seqlock_irq(&xtime_lock); + + if (!ref_freq) { + ref_freq = freq->old; + loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; +#ifndef CONFIG_SMP + cpu_khz_ref = cpu_khz; +#endif + } + + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || + (val == CPUFREQ_RESUMECHANGE)) { + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) + cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); + + if (cpu_khz) { +#ifndef CONFIG_SMP + cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, + freq->new); +#endif + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { + tsc_khz = cpu_khz; + set_cyc2ns_scale(cpu_khz); + /* + * TSC based sched_clock turns + * to junk w/ cpufreq + */ + mark_tsc_unstable(); + } + } + } + + if (val != CPUFREQ_RESUMECHANGE) + write_sequnlock_irq(&xtime_lock); + + return 0; +} + +static struct notifier_block time_cpufreq_notifier_block = { + .notifier_call = time_cpufreq_notifier +}; + +static int __init cpufreq_tsc(void) +{ + int ret; + + INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL); + ret = cpufreq_register_notifier(&time_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + if (!ret) + cpufreq_init = 1; + + return ret; +} + +core_initcall(cpufreq_tsc); + +#endif + +/* Clock source code */ + +static unsigned long current_tsc_khz = 0; +static int tsc_update_callback(void); + +#ifdef CONFIG_PARANOID_GENERIC_TIME +/* This will hurt performance! */ +static DEFINE_RAW_SPINLOCK(checktsc_lock); +static cycle_t last_tsc; + +static cycle_t read_tsc(void) +{ + static int once = 1; + + unsigned long flags; + cycle_t ret; + + spin_lock_irqsave(&checktsc_lock, flags); + + rdtscll(ret); + + if (once && ret < last_tsc) { + once = 0; + spin_unlock_irqrestore(&checktsc_lock, flags); + printk("BUG in read_tsc(): TSC went backward!\n"); + if (num_online_cpus() > 1) + printk("... Unsynced TSCs?\n"); + printk("... [ from %016Lx to %016Lx ]\n", last_tsc, ret); + + } else { + last_tsc = ret; + spin_unlock_irqrestore(&checktsc_lock, flags); + } + + return ret; +} + +static cycle_t read_tsc_c3(void) +{ + static int once = 1; + + unsigned long flags; + cycle_t ret; + + spin_lock_irqsave(&checktsc_lock, flags); + + rdtscll(ret); + ret += tsc_read_c3_time(); + + if (once && ret < last_tsc) { + once = 0; + spin_unlock_irqrestore(&checktsc_lock, flags); + printk("BUG in read_tsc_c3(): TSC went backward!\n"); + if (num_online_cpus() > 1) + printk("... Unsynced TSCs?\n"); + printk("... [ from %016Lx to %016Lx ]\n", last_tsc, ret); + } else { + last_tsc = ret; + spin_unlock_irqrestore(&checktsc_lock, flags); + } + + return ret; +} + +#else /* CONFIG_PARANOID_GENERIC_TIME */ + +static cycle_t read_tsc(void) +{ + cycle_t ret; + + rdtscll(ret); + + return ret; +} + +static cycle_t read_tsc_c3(void) +{ + cycle_t ret; + + rdtscll(ret); + + return ret + tsc_read_c3_time(); +} + +#endif /* CONFIG_PARANOID_GENERIC_TIME */ + +static struct clocksource clocksource_tsc = { + .name = "tsc", + .rating = 300, + .read = read_tsc, + .mask = (cycle_t)-1, + .mult = 0, /* to be set */ + .shift = 22, + .update_callback = tsc_update_callback, + .is_continuous = 1, +}; + +static int tsc_update_callback(void) +{ + int change = 0; + + /* check to see if we should switch to the safe clocksource: */ + if (tsc_read_c3_time() && strncmp(clocksource_tsc.name, "c3tsc", 5)) { + printk("Falling back to C3 safe TSC\n"); + clocksource_tsc.read = read_tsc_c3; + clocksource_tsc.name = "c3tsc"; + change = 1; + } + + if (clocksource_tsc.rating != 50 && check_tsc_unstable()) { + clocksource_tsc.rating = 50; + reselect_clocksource(); + change = 1; + } + + /* only update if tsc_khz has changed: */ + if (current_tsc_khz != tsc_khz) { + current_tsc_khz = tsc_khz; + clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, + clocksource_tsc.shift); + change = 1; + } + + return change; +} + +/* + * Make an educated guess if the TSC is trustworthy and synchronized + * over all CPUs. + */ +static __init int unsynchronized_tsc(void) +{ + /* + * Intel systems are normally all synchronized. + * Exceptions must mark TSC as unstable: + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + return 0; + + /* assume multi socket systems are not synchronized: */ + return num_possible_cpus() > 1; +} + +/* NUMAQ can't use TSC: */ +#ifndef CONFIG_X86_NUMAQ +static int __init init_tsc_clocksource(void) +{ + /* TSC initialization is done in arch/i386/kernel/tsc.c */ + if (cpu_has_tsc && tsc_khz) { + if (unsynchronized_tsc()) /* lower rating if unsynced */ + clocksource_tsc.rating = 150; + current_tsc_khz = tsc_khz; + clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, + clocksource_tsc.shift); + register_clocksource(&clocksource_tsc); + } + + return 0; +} + +module_init(init_tsc_clocksource); + +#endif Index: linux/arch/i386/kernel/vm86.c =================================================================== --- linux.orig/arch/i386/kernel/vm86.c +++ linux/arch/i386/kernel/vm86.c @@ -105,9 +105,10 @@ struct pt_regs * fastcall save_v86_state * from process context. Enable interrupts here, before trying * to access user space. */ - local_irq_enable(); + raw_local_irq_enable(); if (!current->thread.vm86_info) { + raw_local_irq_disable(); printk("no vm86_info: BAD\n"); do_exit(SIGSEGV); } Index: linux/arch/i386/lib/bitops.c =================================================================== --- linux.orig/arch/i386/lib/bitops.c +++ linux/arch/i386/lib/bitops.c @@ -68,3 +68,37 @@ int find_next_zero_bit(const unsigned lo return (offset + set + res); } EXPORT_SYMBOL(find_next_zero_bit); + + +/* + * rw spinlock fallbacks + */ +#if defined(CONFIG_SMP) +asm( +".section .sched.text\n" +".align 4\n" +".globl __write_lock_failed\n" +"__write_lock_failed:\n\t" + LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" +"1: rep; nop\n\t" + "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" + "jne 1b\n\t" + LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" + "jnz __write_lock_failed\n\t" + "ret" +); + +asm( +".section .sched.text\n" +".align 4\n" +".globl __read_lock_failed\n" +"__read_lock_failed:\n\t" + LOCK "incl (%eax)\n" +"1: rep; nop\n\t" + "cmpl $1,(%eax)\n\t" + "js 1b\n\t" + LOCK "decl (%eax)\n\t" + "js __read_lock_failed\n\t" + "ret" +); +#endif Index: linux/arch/i386/lib/delay.c =================================================================== --- linux.orig/arch/i386/lib/delay.c +++ linux/arch/i386/lib/delay.c @@ -10,43 +10,93 @@ * we have to worry about. */ +#include +#include #include #include #include -#include + #include #include #include #ifdef CONFIG_SMP -#include +# include #endif -extern struct timer_opts* timer; +/* simple loop based delay: */ +static void delay_loop(unsigned long loops) +{ + int d0; + + __asm__ __volatile__( + "\tjmp 1f\n" + ".align 16\n" + "1:\tjmp 2f\n" + ".align 16\n" + "2:\tdecl %0\n\tjns 2b" + :"=&a" (d0) + :"0" (loops)); +} + +/* TSC based delay: */ +static void delay_tsc(unsigned long loops) +{ + unsigned long bclock, now; + + rdtscl(bclock); + do { + rep_nop(); + rdtscl(now); + } while ((now-bclock) < loops); +} + +/* + * Since we calibrate only once at boot, this + * function should be set once at boot and not changed + */ +static void (*delay_fn)(unsigned long) = delay_loop; + +void use_tsc_delay(void) +{ + delay_fn = delay_tsc; +} + +int read_current_timer(unsigned long *timer_val) +{ + if (delay_fn == delay_tsc) { + rdtscl(*timer_val); + return 0; + } + return -1; +} void __delay(unsigned long loops) { - cur_timer->delay(loops); + delay_fn(loops); } inline void __const_udelay(unsigned long xloops) { int d0; + xloops *= 4; __asm__("mull %0" :"=d" (xloops), "=&a" (d0) - :"1" (xloops),"0" (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); - __delay(++xloops); + :"1" (xloops), "0" + (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); + + __delay(++xloops); } void __udelay(unsigned long usecs) { - __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ + __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ } void __ndelay(unsigned long nsecs) { - __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ + __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ } EXPORT_SYMBOL(__delay); Index: linux/arch/i386/mach-default/setup.c =================================================================== --- linux.orig/arch/i386/mach-default/setup.c +++ linux/arch/i386/mach-default/setup.c @@ -34,7 +34,7 @@ void __init pre_intr_init_hook(void) /* * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { no_action, SA_NODELAY, CPU_MASK_NONE, "cascade", NULL, NULL}; /** * intr_init_hook - post gate setup interrupt initialisation @@ -78,8 +78,6 @@ void __init trap_init_hook(void) { } -static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL}; - /** * time_init_hook - do any specific initialisations for the system timer. * @@ -89,7 +87,6 @@ static struct irqaction irq0 = { timer_ **/ void __init time_init_hook(void) { - setup_irq(0, &irq0); } #ifdef CONFIG_MCA Index: linux/arch/i386/mach-es7000/es7000plat.c =================================================================== --- linux.orig/arch/i386/mach-es7000/es7000plat.c +++ linux/arch/i386/mach-es7000/es7000plat.c @@ -65,7 +65,7 @@ es7000_rename_gsi(int ioapic, int gsi) if (!base) { int i; for (i = 0; i < nr_ioapics; i++) - base += nr_ioapic_registers[i]; + base += nr_ioapic_registers(i); } if (!ioapic && (gsi < 16)) Index: linux/arch/i386/mach-visws/setup.c =================================================================== --- linux.orig/arch/i386/mach-visws/setup.c +++ linux/arch/i386/mach-visws/setup.c @@ -113,7 +113,7 @@ void __init pre_setup_arch_hook() static struct irqaction irq0 = { .handler = timer_interrupt, - .flags = SA_INTERRUPT, + .flags = SA_INTERRUPT | SA_NODELAY, .name = "timer", }; Index: linux/arch/i386/mach-visws/visws_apic.c =================================================================== --- linux.orig/arch/i386/mach-visws/visws_apic.c +++ linux/arch/i386/mach-visws/visws_apic.c @@ -260,11 +260,13 @@ out_unlock: static struct irqaction master_action = { .handler = piix4_master_intr, .name = "PIIX4-8259", + .flags = SA_NODELAY, }; static struct irqaction cascade_action = { .handler = no_action, .name = "cascade", + .flags = SA_NODELAY, }; Index: linux/arch/i386/mach-voyager/setup.c =================================================================== --- linux.orig/arch/i386/mach-voyager/setup.c +++ linux/arch/i386/mach-voyager/setup.c @@ -16,7 +16,7 @@ void __init pre_intr_init_hook(void) /* * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { no_action, SA_NODELAY, CPU_MASK_NONE, "cascade", NULL, NULL}; void __init intr_init_hook(void) { @@ -39,7 +39,7 @@ void __init trap_init_hook(void) { } -static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL}; +static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT | SA_NODELAY, CPU_MASK_NONE, "timer", NULL, NULL}; void __init time_init_hook(void) { Index: linux/arch/i386/mm/fault.c =================================================================== --- linux.orig/arch/i386/mm/fault.c +++ linux/arch/i386/mm/fault.c @@ -39,6 +39,8 @@ void bust_spinlocks(int yes) int loglevel_save = console_loglevel; if (yes) { + stop_trace(); + zap_rt_locks(); oops_in_progress = 1; return; } @@ -224,8 +226,8 @@ fastcall void do_invalid_op(struct pt_re * bit 1 == 0 means read, 1 means write * bit 2 == 0 means kernel, 1 means user-mode */ -fastcall void __kprobes do_page_fault(struct pt_regs *regs, - unsigned long error_code) +fastcall notrace void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long error_code) { struct task_struct *tsk; struct mm_struct *mm; @@ -236,13 +238,14 @@ fastcall void __kprobes do_page_fault(st /* get the address */ address = read_cr2(); + trace_special(regs->eip, error_code, address); if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, SIGSEGV) == NOTIFY_STOP) return; /* It's safe to allow irq's after cr2 has been saved */ if (regs->eflags & (X86_EFLAGS_IF|VM_MASK)) - local_irq_enable(); + raw_local_irq_enable(); tsk = current; @@ -449,9 +452,9 @@ no_context: } #endif if (address < PAGE_SIZE) - printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); + printk(KERN_ALERT "BUG: Unable to handle kernel NULL pointer dereference"); else - printk(KERN_ALERT "Unable to handle kernel paging request"); + printk(KERN_ALERT "BUG: Unable to handle kernel paging request"); printk(" at virtual address %08lx\n",address); printk(KERN_ALERT " printing eip:\n"); printk("%08lx\n", regs->eip); Index: linux/arch/i386/mm/highmem.c =================================================================== --- linux.orig/arch/i386/mm/highmem.c +++ linux/arch/i386/mm/highmem.c @@ -18,6 +18,27 @@ void kunmap(struct page *page) kunmap_high(page); } +void kunmap_virt(void *ptr) +{ + struct page *page; + + if ((unsigned long)ptr < PKMAP_ADDR(0)) + return; + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); + kunmap(page); +} + +struct page *kmap_to_page(void *ptr) +{ + struct page *page; + + if ((unsigned long)ptr < PKMAP_ADDR(0)) + return virt_to_page(ptr); + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); + return page; +} + + /* * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because * no global lock is needed and because the kmap code must perform a global TLB @@ -26,7 +47,7 @@ void kunmap(struct page *page) * However when holding an atomic kmap is is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. */ -void *kmap_atomic(struct page *page, enum km_type type) +void *__kmap_atomic(struct page *page, enum km_type type) { enum fixed_addresses idx; unsigned long vaddr; @@ -48,7 +69,7 @@ void *kmap_atomic(struct page *page, enu return (void*) vaddr; } -void kunmap_atomic(void *kvaddr, enum km_type type) +void __kunmap_atomic(void *kvaddr, enum km_type type) { #ifdef CONFIG_DEBUG_HIGHMEM unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; @@ -78,7 +99,7 @@ void kunmap_atomic(void *kvaddr, enum km /* This is the same as kmap_atomic() but can map memory that doesn't * have a struct page associated with it. */ -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +void *__kmap_atomic_pfn(unsigned long pfn, enum km_type type) { enum fixed_addresses idx; unsigned long vaddr; @@ -93,7 +114,7 @@ void *kmap_atomic_pfn(unsigned long pfn, return (void*) vaddr; } -struct page *kmap_atomic_to_page(void *ptr) +struct page *__kmap_atomic_to_page(void *ptr) { unsigned long idx, vaddr = (unsigned long)ptr; pte_t *pte; @@ -108,6 +129,7 @@ struct page *kmap_atomic_to_page(void *p EXPORT_SYMBOL(kmap); EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); -EXPORT_SYMBOL(kmap_atomic_to_page); +EXPORT_SYMBOL(kunmap_virt); +EXPORT_SYMBOL(__kmap_atomic); +EXPORT_SYMBOL(__kunmap_atomic); +EXPORT_SYMBOL(__kmap_atomic_to_page); Index: linux/arch/i386/mm/init.c =================================================================== --- linux.orig/arch/i386/mm/init.c +++ linux/arch/i386/mm/init.c @@ -42,7 +42,7 @@ unsigned int __VMALLOC_RESERVE = 128 << 20; -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); unsigned long highstart_pfn, highend_pfn; static int noinline do_test_wp_bit(void); Index: linux/arch/i386/mm/pageattr.c =================================================================== --- linux.orig/arch/i386/mm/pageattr.c +++ linux/arch/i386/mm/pageattr.c @@ -207,6 +207,9 @@ void kernel_map_pages(struct page *page, { if (PageHighMem(page)) return; + if (!enable) + check_no_locks_freed(page_address(page), page_address(page+numpages)); + /* the return value is ignored - the calls cannot fail, * large pages are disabled at boot time. */ Index: linux/arch/i386/mm/pgtable.c =================================================================== --- linux.orig/arch/i386/mm/pgtable.c +++ linux/arch/i386/mm/pgtable.c @@ -180,7 +180,7 @@ void pmd_ctor(void *pmd, kmem_cache_t *c * recommendations and having no core impact whatsoever. * -- wli */ -DEFINE_SPINLOCK(pgd_lock); +DEFINE_RAW_SPINLOCK(pgd_lock); struct page *pgd_list; static inline void pgd_list_add(pgd_t *pgd) Index: linux/arch/i386/oprofile/Kconfig =================================================================== --- linux.orig/arch/i386/oprofile/Kconfig +++ linux/arch/i386/oprofile/Kconfig @@ -19,5 +19,9 @@ config OPROFILE If unsure, say N. +config PROFILE_NMI + bool + default y + endmenu Index: linux/arch/i386/pci/direct.c =================================================================== --- linux.orig/arch/i386/pci/direct.c +++ linux/arch/i386/pci/direct.c @@ -211,16 +211,23 @@ static int __init pci_check_type1(void) unsigned int tmp; int works = 0; - local_irq_save(flags); + spin_lock_irqsave(&pci_config_lock, flags); outb(0x01, 0xCFB); tmp = inl(0xCF8); outl(0x80000000, 0xCF8); - if (inl(0xCF8) == 0x80000000 && pci_sanity_check(&pci_direct_conf1)) { - works = 1; + + if (inl(0xCF8) == 0x80000000) { + spin_unlock_irqrestore(&pci_config_lock, flags); + + if (pci_sanity_check(&pci_direct_conf1)) + works = 1; + + spin_lock_irqsave(&pci_config_lock, flags); } outl(tmp, 0xCF8); - local_irq_restore(flags); + + spin_unlock_irqrestore(&pci_config_lock, flags); return works; } @@ -230,17 +237,19 @@ static int __init pci_check_type2(void) unsigned long flags; int works = 0; - local_irq_save(flags); + spin_lock_irqsave(&pci_config_lock, flags); outb(0x00, 0xCFB); outb(0x00, 0xCF8); outb(0x00, 0xCFA); - if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00 && - pci_sanity_check(&pci_direct_conf2)) { - works = 1; - } - local_irq_restore(flags); + if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00) { + spin_unlock_irqrestore(&pci_config_lock, flags); + + if (pci_sanity_check(&pci_direct_conf2)) + works = 1; + } else + spin_unlock_irqrestore(&pci_config_lock, flags); return works; } Index: linux/arch/i386/pci/pcbios.c =================================================================== --- linux.orig/arch/i386/pci/pcbios.c +++ linux/arch/i386/pci/pcbios.c @@ -70,7 +70,7 @@ static unsigned long bios32_service(unsi unsigned long entry; /* %edx */ unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); __asm__("lcall *(%%edi); cld" : "=a" (return_code), "=b" (address), @@ -79,7 +79,7 @@ static unsigned long bios32_service(unsi : "0" (service), "1" (0), "D" (&bios32_indirect)); - local_irq_restore(flags); + raw_local_irq_restore(flags); switch (return_code) { case 0: @@ -110,7 +110,7 @@ static int __devinit check_pcibios(void) if ((pcibios_entry = bios32_service(PCI_SERVICE))) { pci_indirect.address = pcibios_entry + PAGE_OFFSET; - local_irq_save(flags); + raw_local_irq_save(flags); __asm__( "lcall *(%%edi); cld\n\t" "jc 1f\n\t" @@ -123,7 +123,7 @@ static int __devinit check_pcibios(void) : "1" (PCIBIOS_PCI_BIOS_PRESENT), "D" (&pci_indirect) : "memory"); - local_irq_restore(flags); + raw_local_irq_restore(flags); status = (eax >> 8) & 0xff; hw_mech = eax & 0xff; Index: linux/arch/ia64/kernel/time.c =================================================================== --- linux.orig/arch/ia64/kernel/time.c +++ linux/arch/ia64/kernel/time.c @@ -32,10 +32,6 @@ extern unsigned long wall_jiffies; -u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - #define TIME_KEEPER_ID 0 /* smp_processor_id() of time-keeper */ #ifdef CONFIG_IA64_DEBUG_IRQ Index: linux/arch/m32r/kernel/time.c =================================================================== --- linux.orig/arch/m32r/kernel/time.c +++ linux/arch/m32r/kernel/time.c @@ -39,10 +39,6 @@ extern void send_IPI_allbutself(int, int extern void smp_local_timer_interrupt(struct pt_regs *); #endif -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - extern unsigned long wall_jiffies; #define TICK_SIZE (tick_nsec / 1000) Index: linux/arch/m68k/kernel/time.c =================================================================== --- linux.orig/arch/m68k/kernel/time.c +++ linux/arch/m68k/kernel/time.c @@ -27,10 +27,6 @@ #include #include -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - static inline int set_rtc_mmss(unsigned long nowtime) { if (mach_set_clock_mmss) Index: linux/arch/m68knommu/kernel/time.c =================================================================== --- linux.orig/arch/m68knommu/kernel/time.c +++ linux/arch/m68knommu/kernel/time.c @@ -27,10 +27,6 @@ #define TICK_SIZE (tick_nsec / 1000) -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - extern unsigned long wall_jiffies; Index: linux/arch/mips/Kconfig =================================================================== --- linux.orig/arch/mips/Kconfig +++ linux/arch/mips/Kconfig @@ -364,6 +364,7 @@ config MIPS_SEAD config MOMENCO_OCELOT bool "Support for Momentum Ocelot board" select DMA_NONCOHERENT + select NO_SPINLOCK select HW_HAS_PCI select IRQ_CPU select IRQ_CPU_RM7K @@ -750,6 +751,7 @@ config SIBYTE_SB1xxx_SOC depends on EXPERIMENTAL select BOOT_ELF32 select DMA_COHERENT + select NO_SPINLOCK select SWAP_IO_SPACE select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_64BIT_KERNEL @@ -998,12 +1000,21 @@ config TOSHIBA_FPCIB0 bool "FPCIB0 Backplane Support" depends on TOSHIBA_RBTX4927 +source "kernel/Kconfig.preempt" + config RWSEM_GENERIC_SPINLOCK bool + depends on !PREEMPT_RT default y config RWSEM_XCHGADD_ALGORITHM bool + depends on !PREEMPT_RT + +config ASM_SEMAPHORES + bool +# depends on !PREEMPT_RT + default y config GENERIC_CALIBRATE_DELAY bool @@ -1034,6 +1045,9 @@ config DMA_NONCOHERENT config DMA_NEED_PCI_MAP_STATE bool +config NO_SPINLOCK + bool + config EARLY_PRINTK bool depends on MACH_DECSTATION @@ -1543,15 +1557,6 @@ config NR_CPUS This is purely to save memory - each supported CPU adds approximately eight kilobytes to the kernel image. -config PREEMPT - bool "Preemptible Kernel" - help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is - under load. - config RTC_DS1742 bool "DS1742 BRAM/RTC support" depends on TOSHIBA_JMR3927 || TOSHIBA_RBTX4927 @@ -1566,10 +1571,6 @@ config MIPS_INSANE_LARGE This will result in additional memory usage, so it is not recommended for normal users. -config RWSEM_GENERIC_SPINLOCK - bool - default y - endmenu menu "Bus options (PCI, PCMCIA, EISA, ISA, TC)" Index: linux/arch/mips/arc/misc.c =================================================================== --- linux.orig/arch/mips/arc/misc.c +++ linux/arch/mips/arc/misc.c @@ -27,7 +27,7 @@ VOID ArcHalt(VOID) { bc_disable(); - local_irq_disable(); + raw_local_irq_disable(); #ifdef CONFIG_SCSI_SGIWD93 reset_wd33c93(sgiwd93_host); #endif @@ -39,7 +39,7 @@ VOID ArcPowerDown(VOID) { bc_disable(); - local_irq_disable(); + raw_local_irq_disable(); #ifdef CONFIG_SCSI_SGIWD93 reset_wd33c93(sgiwd93_host); #endif @@ -52,7 +52,7 @@ VOID ArcRestart(VOID) { bc_disable(); - local_irq_disable(); + raw_local_irq_disable(); #ifdef CONFIG_SCSI_SGIWD93 reset_wd33c93(sgiwd93_host); #endif @@ -64,7 +64,7 @@ VOID ArcReboot(VOID) { bc_disable(); - local_irq_disable(); + raw_local_irq_disable(); #ifdef CONFIG_SCSI_SGIWD93 reset_wd33c93(sgiwd93_host); #endif @@ -76,7 +76,7 @@ VOID ArcEnterInteractiveMode(VOID) { bc_disable(); - local_irq_disable(); + raw_local_irq_disable(); #ifdef CONFIG_SCSI_SGIWD93 reset_wd33c93(sgiwd93_host); #endif Index: linux/arch/mips/au1000/common/irq.c =================================================================== --- linux.orig/arch/mips/au1000/common/irq.c +++ linux/arch/mips/au1000/common/irq.c @@ -253,47 +253,43 @@ void restore_local_and_enable(int contro static struct hw_interrupt_type rise_edge_irq_type = { - "Au1000 Rise Edge", - startup_irq, - shutdown_irq, - local_enable_irq, - local_disable_irq, - mask_and_ack_rise_edge_irq, - end_irq, - NULL + .typename = "Au1000 Rise Edge", + .startup = startup_irq, + .shutdown = shutdown_irq, + .enable = local_enable_irq, + .disable = local_disable_irq, + .ack = mask_and_ack_rise_edge_irq, + .end = end_irq, }; static struct hw_interrupt_type fall_edge_irq_type = { - "Au1000 Fall Edge", - startup_irq, - shutdown_irq, - local_enable_irq, - local_disable_irq, - mask_and_ack_fall_edge_irq, - end_irq, - NULL + .typename = "Au1000 Fall Edge", + .startup = startup_irq, + .shutdown = shutdown_irq, + .enable = local_enable_irq, + .disable = local_disable_irq, + .ack = mask_and_ack_fall_edge_irq, + .end = end_irq, }; static struct hw_interrupt_type either_edge_irq_type = { - "Au1000 Rise or Fall Edge", - startup_irq, - shutdown_irq, - local_enable_irq, - local_disable_irq, - mask_and_ack_either_edge_irq, - end_irq, - NULL + .typename = "Au1000 Rise or Fall Edge", + .startup = startup_irq, + .shutdown = shutdown_irq, + .enable = local_enable_irq, + .disable = local_disable_irq, + .ack = mask_and_ack_either_edge_irq, + .end = end_irq, }; static struct hw_interrupt_type level_irq_type = { - "Au1000 Level", - startup_irq, - shutdown_irq, - local_enable_irq, - local_disable_irq, - mask_and_ack_level_irq, - end_irq, - NULL + .typename = "Au1000 Level", + .startup = startup_irq, + .shutdown = shutdown_irq, + .enable = local_enable_irq, + .disable = local_disable_irq, + .ack = mask_and_ack_level_irq, + .end = end_irq, }; #ifdef CONFIG_PM Index: linux/arch/mips/ddb5xxx/ddb5074/nile4_pic.c =================================================================== --- linux.orig/arch/mips/ddb5xxx/ddb5074/nile4_pic.c +++ linux/arch/mips/ddb5xxx/ddb5074/nile4_pic.c @@ -209,14 +209,13 @@ static void nile4_irq_end(unsigned int i #define nile4_irq_shutdown nile4_disable_irq static hw_irq_controller nile4_irq_controller = { - "nile4", - nile4_irq_startup, - nile4_irq_shutdown, - nile4_enable_irq, - nile4_disable_irq, - nile4_ack_irq, - nile4_irq_end, - NULL + .typename = "nile4", + .startup = nile4_irq_startup, + .shutdown = nile4_irq_shutdown, + .enable = nile4_enable_irq, + .disable = nile4_disable_irq, + .ack = nile4_ack_irq, + .end = nile4_irq_end, }; void nile4_irq_setup(u32 base) { Index: linux/arch/mips/ddb5xxx/ddb5476/vrc5476_irq.c =================================================================== --- linux.orig/arch/mips/ddb5xxx/ddb5476/vrc5476_irq.c +++ linux/arch/mips/ddb5xxx/ddb5476/vrc5476_irq.c @@ -53,14 +53,13 @@ static void vrc5476_irq_end(uint irq) } static hw_irq_controller vrc5476_irq_controller = { - "vrc5476", - vrc5476_irq_startup, - vrc5476_irq_shutdown, - vrc5476_irq_enable, - vrc5476_irq_disable, - vrc5476_irq_ack, - vrc5476_irq_end, - NULL /* no affinity stuff for UP */ + .typename = "vrc5476", + .startup = vrc5476_irq_startup, + .shutdown = vrc5476_irq_shutdown, + .enable = vrc5476_irq_enable, + .disable = vrc5476_irq_disable, + .ack = vrc5476_irq_ack, + .end = vrc5476_irq_end }; void __init Index: linux/arch/mips/ddb5xxx/ddb5477/irq_5477.c =================================================================== --- linux.orig/arch/mips/ddb5xxx/ddb5477/irq_5477.c +++ linux/arch/mips/ddb5xxx/ddb5477/irq_5477.c @@ -90,14 +90,13 @@ vrc5477_irq_end(unsigned int irq) } hw_irq_controller vrc5477_irq_controller = { - "vrc5477_irq", - vrc5477_irq_startup, - vrc5477_irq_shutdown, - vrc5477_irq_enable, - vrc5477_irq_disable, - vrc5477_irq_ack, - vrc5477_irq_end, - NULL /* no affinity stuff for UP */ + .typename = "vrc5477_irq", + .startup = vrc5477_irq_startup, + .shutdown = vrc5477_irq_shutdown, + .enable = vrc5477_irq_enable, + .disable = vrc5477_irq_disable, + .ack = vrc5477_irq_ack, + .end = vrc5477_irq_end }; void __init vrc5477_irq_init(u32 irq_base) Index: linux/arch/mips/gt64120/ev64120/irq.c =================================================================== --- linux.orig/arch/mips/gt64120/ev64120/irq.c +++ linux/arch/mips/gt64120/ev64120/irq.c @@ -60,25 +60,25 @@ static void disable_ev64120_irq(unsigned { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); if (irq_nr >= 8) { // All PCI interrupts are on line 5 or 2 clear_c0_status(9 << 10); } else { clear_c0_status(1 << (irq_nr + 8)); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void enable_ev64120_irq(unsigned int irq_nr) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); if (irq_nr >= 8) // All PCI interrupts are on line 5 or 2 set_c0_status(9 << 10); else set_c0_status(1 << (irq_nr + 8)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_ev64120_irq(unsigned int irq) @@ -119,7 +119,7 @@ void gt64120_irq_setup(void) /* Sets the exception_handler array. */ set_except_vector(0, galileo_handle_int); - local_irq_disable(); + raw_local_irq_disable(); /* * Enable timer. Other interrupts will be enabled as they are Index: linux/arch/mips/gt64120/momenco_ocelot/irq.c =================================================================== --- linux.orig/arch/mips/gt64120/momenco_ocelot/irq.c +++ linux/arch/mips/gt64120/momenco_ocelot/irq.c @@ -57,7 +57,7 @@ void __init arch_init_irq(void) * int-handler is not on bootstrap */ clear_c0_status(ST0_IM); - local_irq_disable(); + raw_local_irq_disable(); /* Sets the first-level interrupt dispatcher. */ set_except_vector(0, ocelot_handle_int); Index: linux/arch/mips/ite-boards/generic/irq.c =================================================================== --- linux.orig/arch/mips/ite-boards/generic/irq.c +++ linux/arch/mips/ite-boards/generic/irq.c @@ -138,14 +138,13 @@ static void end_ite_irq(unsigned int irq } static struct hw_interrupt_type it8172_irq_type = { - "ITE8172", - startup_ite_irq, - shutdown_ite_irq, - enable_it8172_irq, - disable_it8172_irq, - mask_and_ack_ite_irq, - end_ite_irq, - NULL + .typename = "ITE8172", + .startup = startup_ite_irq, + .shutdown = shutdown_ite_irq, + .enable = enable_it8172_irq, + .disable = disable_it8172_irq, + .ack = mask_and_ack_ite_irq, + .end = end_ite_irq, }; @@ -159,22 +158,22 @@ static void ack_none(unsigned int irq) { #define end_none enable_none static struct hw_interrupt_type cp0_irq_type = { - "CP0 Count", - startup_none, - shutdown_none, - enable_none, - disable_none, - ack_none, - end_none + .typename = "CP0 Count", + .startup = startup_none, + .shutdown = shutdown_none, + .enable = enable_none, + .disable = disable_none, + .ack = ack_none, + .end = end_none }; void enable_cpu_timer(void) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); set_c0_status(0x100 << EXT_IRQ5_TO_IP); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __init arch_init_irq(void) Index: linux/arch/mips/ite-boards/generic/time.c =================================================================== --- linux.orig/arch/mips/ite-boards/generic/time.c +++ linux/arch/mips/ite-boards/generic/time.c @@ -124,7 +124,7 @@ static unsigned long __init cal_r4koff(v { unsigned int flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* Start counter exactly on falling edge of update flag */ while (CMOS_READ(RTC_REG_A) & RTC_UIP); @@ -140,7 +140,7 @@ static unsigned long __init cal_r4koff(v mips_hpt_frequency = read_c0_count(); /* restore interrupts */ - local_irq_restore(flags); + raw_local_irq_restore(flags); return (mips_hpt_frequency / HZ); } @@ -153,11 +153,11 @@ it8172_rtc_get_time(void) /* avoid update-in-progress. */ for (;;) { - local_irq_save(flags); + raw_local_irq_save(flags); if (! (CMOS_READ(RTC_REG_A) & RTC_UIP)) break; /* don't hold intr closed all the time */ - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* Read regs. */ @@ -170,7 +170,7 @@ it8172_rtc_get_time(void) hw_to_bin(*rtc_century_reg) * 100; /* restore interrupts */ - local_irq_restore(flags); + raw_local_irq_restore(flags); return mktime(year, mon, day, hour, min, sec); } @@ -186,11 +186,11 @@ it8172_rtc_set_time(unsigned long t) /* avoid update-in-progress. */ for (;;) { - local_irq_save(flags); + raw_local_irq_save(flags); if (! (CMOS_READ(RTC_REG_A) & RTC_UIP)) break; /* don't hold intr closed all the time */ - local_irq_restore(flags); + raw_local_irq_restore(flags); } *rtc_century_reg = bin_to_hw(tm.tm_year/100); @@ -202,7 +202,7 @@ it8172_rtc_set_time(unsigned long t) CMOS_WRITE(bin_to_hw(tm.tm_year%100), RTC_YEAR); /* restore interrupts */ - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -211,7 +211,7 @@ void __init it8172_time_init(void) { unsigned int est_freq, flags; - local_irq_save(flags); + raw_local_irq_save(flags); saved_control = CMOS_READ(RTC_CONTROL); @@ -225,7 +225,7 @@ void __init it8172_time_init(void) printk("CPU frequency %d.%02d MHz\n", est_freq/1000000, (est_freq%1000000)*100/1000000); - local_irq_restore(flags); + raw_local_irq_restore(flags); rtc_get_time = it8172_rtc_get_time; rtc_set_time = it8172_rtc_set_time; Index: linux/arch/mips/jazz/irq.c =================================================================== --- linux.orig/arch/mips/jazz/irq.c +++ linux/arch/mips/jazz/irq.c @@ -58,14 +58,13 @@ static void end_r4030_irq(unsigned int i } static struct hw_interrupt_type r4030_irq_type = { - "R4030", - startup_r4030_irq, - shutdown_r4030_irq, - enable_r4030_irq, - disable_r4030_irq, - mask_and_ack_r4030_irq, - end_r4030_irq, - NULL + .typename = "R4030", + .startup = startup_r4030_irq, + .shutdown = shutdown_r4030_irq, + .enable = enable_r4030_irq, + .disable = disable_r4030_irq, + .ack = mask_and_ack_r4030_irq, + .end = end_r4030_irq, }; void __init init_r4030_ints(void) Index: linux/arch/mips/jmr3927/rbhma3100/irq.c =================================================================== --- linux.orig/arch/mips/jmr3927/rbhma3100/irq.c +++ linux/arch/mips/jmr3927/rbhma3100/irq.c @@ -412,13 +412,13 @@ void __init arch_init_irq(void) } static hw_irq_controller jmr3927_irq_controller = { - "jmr3927_irq", - jmr3927_irq_startup, - jmr3927_irq_shutdown, - jmr3927_irq_enable, - jmr3927_irq_disable, - jmr3927_irq_ack, - jmr3927_irq_end, + .typename = "jmr3927_irq", + .startup = jmr3927_irq_startup, + .shutdown = jmr3927_irq_shutdown, + .enable = jmr3927_irq_enable, + .disable = jmr3927_irq_disable, + .ack = jmr3927_irq_ack, + .end = jmr3927_irq_end, }; void jmr3927_irq_init(u32 irq_base) Index: linux/arch/mips/jmr3927/rbhma3100/setup.c =================================================================== --- linux.orig/arch/mips/jmr3927/rbhma3100/setup.c +++ linux/arch/mips/jmr3927/rbhma3100/setup.c @@ -108,7 +108,7 @@ static inline void do_reset(void) static void jmr3927_machine_restart(char *command) { - local_irq_disable(); + raw_local_irq_disable(); puts("Rebooting..."); do_reset(); } Index: linux/arch/mips/kernel/Makefile =================================================================== --- linux.orig/arch/mips/kernel/Makefile +++ linux/arch/mips/kernel/Makefile @@ -5,7 +5,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \ - ptrace.o reset.o semaphore.o setup.o signal.o syscall.o \ + ptrace.o reset.o setup.o signal.o syscall.o \ time.o traps.o unaligned.o binfmt_irix-objs := irixelf.o irixinv.o irixioctl.o irixsig.o \ @@ -17,6 +17,8 @@ obj-$(CONFIG_32BIT) += module-elf32.o obj-$(CONFIG_64BIT) += module-elf64.o endif +obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o + obj-$(CONFIG_CPU_R3000) += r2300_fpu.o r2300_switch.o obj-$(CONFIG_CPU_TX39XX) += r2300_fpu.o r2300_switch.o obj-$(CONFIG_CPU_TX49XX) += r4k_fpu.o r4k_switch.o Index: linux/arch/mips/kernel/asm-offsets.c =================================================================== --- linux.orig/arch/mips/kernel/asm-offsets.c +++ linux/arch/mips/kernel/asm-offsets.c @@ -11,6 +11,9 @@ #include #include #include +#include +#include +#include #include #include #include Index: linux/arch/mips/kernel/cpu-bugs64.c =================================================================== --- linux.orig/arch/mips/kernel/cpu-bugs64.c +++ linux/arch/mips/kernel/cpu-bugs64.c @@ -48,7 +48,7 @@ static inline void mult_sh_align_mod(lon * used for. */ - local_irq_save(flags); + raw_local_irq_save(flags); /* * The following code leads to a wrong result of the first * dsll32 when executed on R4000 rev. 2.2 or 3.0 (PRId @@ -101,7 +101,7 @@ static inline void mult_sh_align_mod(lon "" : "=r" (lv2) : "0" (lv2), "r" (p)); - local_irq_restore(flags); + raw_local_irq_restore(flags); *v1 = lv1; *v2 = lv2; @@ -182,7 +182,7 @@ static inline void check_daddi(void) printk("Checking for the daddi bug... "); - local_irq_save(flags); + raw_local_irq_save(flags); handler = set_except_vector(12, handle_daddi_ov); /* * The following code fails to trigger an overflow exception @@ -208,7 +208,7 @@ static inline void check_daddi(void) : "=r" (v), "=&r" (tmp) : "I" (0xffffffffffffdb9a), "I" (0x1234)); set_except_vector(12, handler); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (daddi_ov) { printk("no.\n"); @@ -217,7 +217,7 @@ static inline void check_daddi(void) printk("yes, workaround... "); - local_irq_save(flags); + raw_local_irq_save(flags); handler = set_except_vector(12, handle_daddi_ov); asm volatile( "addiu %1, $0, %2\n\t" @@ -226,7 +226,7 @@ static inline void check_daddi(void) : "=r" (v), "=&r" (tmp) : "I" (0xffffffffffffdb9a), "I" (0x1234)); set_except_vector(12, handler); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (daddi_ov) { printk("yes.\n"); Index: linux/arch/mips/kernel/entry.S =================================================================== --- linux.orig/arch/mips/kernel/entry.S +++ linux/arch/mips/kernel/entry.S @@ -23,7 +23,7 @@ .endm #else .macro preempt_stop reg=t0 - local_irq_disable \reg + mips_raw_local_irq_disable \reg .endm #define resume_kernel restore_all #endif @@ -38,7 +38,7 @@ FEXPORT(ret_from_irq) beqz t0, resume_kernel FEXPORT(resume_userspace) - local_irq_disable t0 # make sure we dont miss an + mips_raw_local_irq_disable t0 # make sure we dont miss an # interrupt setting need_resched # between sampling and return LONG_L a2, TI_FLAGS($28) # current->work @@ -48,6 +48,8 @@ FEXPORT(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) + lw t0, kernel_preemption + beqz t0, restore_all lw t0, TI_PRE_COUNT($28) bnez t0, restore_all need_resched: @@ -57,12 +59,9 @@ need_resched: LONG_L t0, PT_STATUS(sp) # Interrupts off? andi t0, 1 beqz t0, restore_all - li t0, PREEMPT_ACTIVE - sw t0, TI_PRE_COUNT($28) - local_irq_enable t0 - jal schedule + mips_raw_local_irq_disable t0 + jal preempt_schedule_irq sw zero, TI_PRE_COUNT($28) - local_irq_disable t0 b need_resched #endif @@ -70,7 +69,7 @@ FEXPORT(ret_from_fork) jal schedule_tail # a0 = task_t *prev FEXPORT(syscall_exit) - local_irq_disable # make sure need_resched and + mips_raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) # current->work @@ -89,19 +88,19 @@ FEXPORT(restore_partial) # restore part .set at FEXPORT(work_pending) - andi t0, a2, _TIF_NEED_RESCHED + andi t0, a2, (_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) beqz t0, work_notifysig work_resched: - jal schedule - - local_irq_disable t0 # make sure need_resched and + mips_raw_local_irq_disable + jal __schedule + # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) andi t0, a2, _TIF_WORK_MASK # is there any work to be done # other than syscall tracing? beqz t0, restore_all - andi t0, a2, _TIF_NEED_RESCHED + andi t0, a2, (_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bnez t0, work_resched work_notifysig: # deal with pending signals and @@ -118,7 +117,7 @@ FEXPORT(syscall_exit_work) li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT and t0, t1 beqz t0, work_pending # trace bit is set - local_irq_enable # could let do_syscall_trace() + mips_raw_local_irq_enable # could let do_syscall_trace() # call schedule() instead move a0, sp li a1, 1 Index: linux/arch/mips/kernel/gdb-stub.c =================================================================== --- linux.orig/arch/mips/kernel/gdb-stub.c +++ linux/arch/mips/kernel/gdb-stub.c @@ -400,7 +400,7 @@ void set_debug_traps(void) unsigned long flags; unsigned char c; - local_irq_save(flags); + raw_local_irq_save(flags); for (ht = hard_trap_info; ht->tt && ht->signo; ht++) saved_vectors[ht->tt] = set_except_vector(ht->tt, trap_low); @@ -416,7 +416,7 @@ void set_debug_traps(void) putDebugChar('+'); /* ack it */ initialized = 1; - local_irq_restore(flags); + raw_local_irq_restore(flags); } void restore_debug_traps(void) @@ -424,10 +424,10 @@ void restore_debug_traps(void) struct hard_trap_info *ht; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); for (ht = hard_trap_info; ht->tt && ht->signo; ht++) set_except_vector(ht->tt, saved_vectors[ht->tt]); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -643,9 +643,11 @@ void set_async_breakpoint(unsigned long if ((*epc & 0x80000000) == 0) return; +#ifdef CONFIG_SMP /* avoid deadlock if someone is make IPC */ if (spin_is_locked(&smp_call_lock)) return; +#endif async_bp.addr = *epc; *epc = (unsigned long)async_breakpoint; @@ -656,12 +658,12 @@ void kgdb_wait(void *arg) unsigned flags; int cpu = smp_processor_id(); - local_irq_save(flags); + raw_local_irq_save(flags); spin_lock(&kgdb_cpulock[cpu]); spin_unlock(&kgdb_cpulock[cpu]); - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux/arch/mips/kernel/i8259.c =================================================================== --- linux.orig/arch/mips/kernel/i8259.c +++ linux/arch/mips/kernel/i8259.c @@ -31,7 +31,7 @@ void disable_8259A_irq(unsigned int irq) * moves to arch independent land */ -spinlock_t DEFINE_SPINLOCK(i8259A_lock); +DEFINE_RAW_SPINLOCK(i8259A_lock); static void end_8259A_irq (unsigned int irq) { @@ -52,14 +52,13 @@ static unsigned int startup_8259A_irq(un } static struct hw_interrupt_type i8259A_irq_type = { - "XT-PIC", - startup_8259A_irq, - shutdown_8259A_irq, - enable_8259A_irq, - disable_8259A_irq, - mask_and_ack_8259A, - end_8259A_irq, - NULL + .typename = "XT-PIC", + .startup = startup_8259A_irq, + .shutdown = shutdown_8259A_irq, + .enable = enable_8259A_irq, + .disable = disable_8259A_irq, + .ack = mask_and_ack_8259A, + .end = end_8259A_irq, }; /* Index: linux/arch/mips/kernel/init_task.c =================================================================== --- linux.orig/arch/mips/kernel/init_task.c +++ linux/arch/mips/kernel/init_task.c @@ -9,8 +9,8 @@ #include #include -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); Index: linux/arch/mips/kernel/irq-msc01.c =================================================================== --- linux.orig/arch/mips/kernel/irq-msc01.c +++ linux/arch/mips/kernel/irq-msc01.c @@ -129,25 +129,23 @@ msc_bind_eic_interrupt (unsigned int irq #define shutdown_msc_irq disable_msc_irq struct hw_interrupt_type msc_levelirq_type = { - "SOC-it-Level", - startup_msc_irq, - shutdown_msc_irq, - enable_msc_irq, - disable_msc_irq, - level_mask_and_ack_msc_irq, - end_msc_irq, - NULL + .typename = "SOC-it-Level", + .startup = startup_msc_irq, + .shutdown = shutdown_msc_irq, + .enable = enable_msc_irq, + .disable = disable_msc_irq, + .ack = level_mask_and_ack_msc_irq, + .end = end_msc_irq, }; struct hw_interrupt_type msc_edgeirq_type = { - "SOC-it-Edge", - startup_msc_irq, - shutdown_msc_irq, - enable_msc_irq, - disable_msc_irq, - edge_mask_and_ack_msc_irq, - end_msc_irq, - NULL + .typename = "SOC-it-Edge", + .startup =startup_msc_irq, + .shutdown = shutdown_msc_irq, + .enable = enable_msc_irq, + .disable = disable_msc_irq, + .ack = edge_mask_and_ack_msc_irq, + .end = end_msc_irq, }; Index: linux/arch/mips/kernel/irq-mv6434x.c =================================================================== --- linux.orig/arch/mips/kernel/irq-mv6434x.c +++ linux/arch/mips/kernel/irq-mv6434x.c @@ -135,14 +135,13 @@ void ll_mv64340_irq(struct pt_regs *regs #define shutdown_mv64340_irq disable_mv64340_irq struct hw_interrupt_type mv64340_irq_type = { - "MV-64340", - startup_mv64340_irq, - shutdown_mv64340_irq, - enable_mv64340_irq, - disable_mv64340_irq, - mask_and_ack_mv64340_irq, - end_mv64340_irq, - NULL + .typename = "MV-64340", + .startup = startup_mv64340_irq, + .shutdown = shutdown_mv64340_irq, + .enable = enable_mv64340_irq, + .disable = disable_mv64340_irq, + .ack = mask_and_ack_mv64340_irq, + .end = end_mv64340_irq, }; void __init mv64340_irq_init(unsigned int base) Index: linux/arch/mips/kernel/irq-rm7000.c =================================================================== --- linux.orig/arch/mips/kernel/irq-rm7000.c +++ linux/arch/mips/kernel/irq-rm7000.c @@ -33,18 +33,18 @@ static inline void rm7k_cpu_irq_enable(u { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); unmask_rm7k_irq(irq); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void rm7k_cpu_irq_disable(unsigned int irq) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); mask_rm7k_irq(irq); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int rm7k_cpu_irq_startup(unsigned int irq) @@ -72,13 +72,13 @@ static void rm7k_cpu_irq_end(unsigned in } static hw_irq_controller rm7k_irq_controller = { - "RM7000", - rm7k_cpu_irq_startup, - rm7k_cpu_irq_shutdown, - rm7k_cpu_irq_enable, - rm7k_cpu_irq_disable, - rm7k_cpu_irq_ack, - rm7k_cpu_irq_end, + .typename = "RM7000", + .startup = rm7k_cpu_irq_startup, + .shutdown = rm7k_cpu_irq_shutdown, + .enable = rm7k_cpu_irq_enable, + .disable = rm7k_cpu_irq_disable, + .ack = rm7k_cpu_irq_ack, + .end = rm7k_cpu_irq_end, }; void __init rm7k_cpu_irq_init(int base) Index: linux/arch/mips/kernel/irq-rm9000.c =================================================================== --- linux.orig/arch/mips/kernel/irq-rm9000.c +++ linux/arch/mips/kernel/irq-rm9000.c @@ -34,18 +34,18 @@ static inline void rm9k_cpu_irq_enable(u { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); unmask_rm9k_irq(irq); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void rm9k_cpu_irq_disable(unsigned int irq) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); mask_rm9k_irq(irq); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int rm9k_cpu_irq_startup(unsigned int irq) @@ -79,9 +79,9 @@ static void local_rm9k_perfcounter_irq_s unsigned int irq = (unsigned int) args; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); mask_rm9k_irq(irq); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void rm9k_perfcounter_irq_shutdown(unsigned int irq) @@ -106,23 +106,23 @@ static void rm9k_cpu_irq_end(unsigned in } static hw_irq_controller rm9k_irq_controller = { - "RM9000", - rm9k_cpu_irq_startup, - rm9k_cpu_irq_shutdown, - rm9k_cpu_irq_enable, - rm9k_cpu_irq_disable, - rm9k_cpu_irq_ack, - rm9k_cpu_irq_end, + .typename = "RM9000", + .startup = rm9k_cpu_irq_startup, + .shutdown = rm9k_cpu_irq_shutdown, + .enable = rm9k_cpu_irq_enable, + .disable = rm9k_cpu_irq_disable, + .ack = rm9k_cpu_irq_ack, + .end = rm9k_cpu_irq_end, }; static hw_irq_controller rm9k_perfcounter_irq = { - "RM9000", - rm9k_perfcounter_irq_startup, - rm9k_perfcounter_irq_shutdown, - rm9k_cpu_irq_enable, - rm9k_cpu_irq_disable, - rm9k_cpu_irq_ack, - rm9k_cpu_irq_end, + .typename = "RM9000", + .startup = rm9k_perfcounter_irq_startup, + .shutdown = rm9k_perfcounter_irq_shutdown, + .enable = rm9k_cpu_irq_enable, + .disable = rm9k_cpu_irq_disable, + .ack = rm9k_cpu_irq_ack, + .end = rm9k_cpu_irq_end, }; unsigned int rm9000_perfcount_irq; Index: linux/arch/mips/kernel/irq.c =================================================================== --- linux.orig/arch/mips/kernel/irq.c +++ linux/arch/mips/kernel/irq.c @@ -125,7 +125,10 @@ void __init init_IRQ(void) irq_desc[i].action = NULL; irq_desc[i].depth = 1; irq_desc[i].handler = &no_irq_type; - spin_lock_init(&irq_desc[i].lock); + __raw_spin_lock_init(&irq_desc[i].lock); +#ifdef CONFIG_PREEMPT_HARDIRQS + irq_desc[i].thread = NULL; +#endif } arch_init_irq(); Index: linux/arch/mips/kernel/irq_cpu.c =================================================================== --- linux.orig/arch/mips/kernel/irq_cpu.c +++ linux/arch/mips/kernel/irq_cpu.c @@ -50,18 +50,18 @@ static inline void mips_cpu_irq_enable(u { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); unmask_mips_irq(irq); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void mips_cpu_irq_disable(unsigned int irq) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); mask_mips_irq(irq); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int mips_cpu_irq_startup(unsigned int irq) @@ -92,14 +92,13 @@ static void mips_cpu_irq_end(unsigned in } static hw_irq_controller mips_cpu_irq_controller = { - "MIPS", - mips_cpu_irq_startup, - mips_cpu_irq_shutdown, - mips_cpu_irq_enable, - mips_cpu_irq_disable, - mips_cpu_irq_ack, - mips_cpu_irq_end, - NULL /* no affinity stuff for UP */ + .typename = "MIPS", + .startup = mips_cpu_irq_startup, + .shutdown = mips_cpu_irq_shutdown, + .enable = mips_cpu_irq_enable, + .disable = mips_cpu_irq_disable, + .ack = mips_cpu_irq_ack, + .end = mips_cpu_irq_end, }; Index: linux/arch/mips/kernel/module.c =================================================================== --- linux.orig/arch/mips/kernel/module.c +++ linux/arch/mips/kernel/module.c @@ -2,7 +2,7 @@ #include static LIST_HEAD(dbe_list); -static DEFINE_SPINLOCK(dbe_lock); +static DEFINE_RAW_SPINLOCK(dbe_lock); /* Given an address, look for it in the module exception tables. */ const struct exception_table_entry *search_module_dbetables(unsigned long addr) Index: linux/arch/mips/kernel/process.c =================================================================== --- linux.orig/arch/mips/kernel/process.c +++ linux/arch/mips/kernel/process.c @@ -55,10 +55,12 @@ ATTRIB_NORET void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { - while (!need_resched()) + while (!need_resched() && !need_resched_delayed()) if (cpu_wait) (*cpu_wait)(); - schedule(); + raw_local_irq_disable(); + __schedule(); + raw_local_irq_enable(); } } Index: linux/arch/mips/kernel/scall32-o32.S =================================================================== --- linux.orig/arch/mips/kernel/scall32-o32.S +++ linux/arch/mips/kernel/scall32-o32.S @@ -72,7 +72,7 @@ stack_done: 1: sw v0, PT_R2(sp) # result o32_syscall_exit: - local_irq_disable # make sure need_resched and + mips_raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return lw a2, TI_FLAGS($28) # current->work Index: linux/arch/mips/kernel/scall64-64.S =================================================================== --- linux.orig/arch/mips/kernel/scall64-64.S +++ linux/arch/mips/kernel/scall64-64.S @@ -71,7 +71,7 @@ NESTED(handle_sys64, PT_SIZE, sp) 1: sd v0, PT_R2(sp) # result n64_syscall_exit: - local_irq_disable # make sure need_resched and + raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) # current->work Index: linux/arch/mips/kernel/scall64-n32.S =================================================================== --- linux.orig/arch/mips/kernel/scall64-n32.S +++ linux/arch/mips/kernel/scall64-n32.S @@ -68,7 +68,7 @@ NESTED(handle_sysn32, PT_SIZE, sp) sd v0, PT_R0(sp) # set flag for syscall restarting 1: sd v0, PT_R2(sp) # result - local_irq_disable # make sure need_resched and + raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) # current->work Index: linux/arch/mips/kernel/scall64-o32.S =================================================================== --- linux.orig/arch/mips/kernel/scall64-o32.S +++ linux/arch/mips/kernel/scall64-o32.S @@ -97,7 +97,7 @@ NESTED(handle_sys, PT_SIZE, sp) 1: sd v0, PT_R2(sp) # result o32_syscall_exit: - local_irq_disable # make need_resched and + raw_local_irq_disable # make need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) Index: linux/arch/mips/kernel/semaphore.c =================================================================== --- linux.orig/arch/mips/kernel/semaphore.c +++ linux/arch/mips/kernel/semaphore.c @@ -36,7 +36,7 @@ * sem->count and sem->waking atomic. Scalability isn't an issue because * this lock is used on UP only so it's just an empty variable. */ -static inline int __sem_update_count(struct semaphore *sem, int incr) +static inline int __sem_update_count(struct compat_semaphore *sem, int incr) { int old_count, tmp; @@ -63,7 +63,7 @@ static inline int __sem_update_count(str : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) : "r" (incr), "m" (sem->count)); } else { - static DEFINE_SPINLOCK(semaphore_lock); + static DEFINE_RAW_SPINLOCK(semaphore_lock); unsigned long flags; spin_lock_irqsave(&semaphore_lock, flags); @@ -76,7 +76,7 @@ static inline int __sem_update_count(str return old_count; } -void __up(struct semaphore *sem) +void __compat_up(struct compat_semaphore *sem) { /* * Note that we incremented count in up() before we came here, @@ -90,7 +90,7 @@ void __up(struct semaphore *sem) wake_up(&sem->wait); } -EXPORT_SYMBOL(__up); +EXPORT_SYMBOL(__compat_up); /* * Note that when we come in to __down or __down_interruptible, @@ -100,7 +100,7 @@ EXPORT_SYMBOL(__up); * Thus it is only when we decrement count from some value > 0 * that we have actually got the semaphore. */ -void __sched __down(struct semaphore *sem) +void __sched __compat_down(struct compat_semaphore *sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -129,9 +129,9 @@ void __sched __down(struct semaphore *se wake_up(&sem->wait); } -EXPORT_SYMBOL(__down); +EXPORT_SYMBOL(__compat_down); -int __sched __down_interruptible(struct semaphore * sem) +int __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -161,4 +161,4 @@ int __sched __down_interruptible(struct return retval; } -EXPORT_SYMBOL(__down_interruptible); +EXPORT_SYMBOL(__compat_down_interruptible); Index: linux/arch/mips/kernel/signal.c =================================================================== --- linux.orig/arch/mips/kernel/signal.c +++ linux/arch/mips/kernel/signal.c @@ -448,6 +448,10 @@ static int do_signal(sigset_t *oldset, s } #endif +#ifdef CONFIG_PREEMPT_RT + raw_local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which is why we may in certain * cases get here from kernel mode. Just return without doing anything Index: linux/arch/mips/kernel/signal32.c =================================================================== --- linux.orig/arch/mips/kernel/signal32.c +++ linux/arch/mips/kernel/signal32.c @@ -765,6 +765,10 @@ int do_signal32(sigset_t *oldset, struct siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + raw_local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which is why we may in certain * cases get here from kernel mode. Just return without doing anything Index: linux/arch/mips/kernel/smp.c =================================================================== --- linux.orig/arch/mips/kernel/smp.c +++ linux/arch/mips/kernel/smp.c @@ -105,7 +105,22 @@ asmlinkage void start_secondary(void) cpu_idle(); } -DEFINE_SPINLOCK(smp_call_lock); +DEFINE_RAW_SPINLOCK(smp_call_lock); + +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them. + */ +void smp_send_reschedule_allbutself(void) +{ + int cpu = smp_processor_id(); + int i; + + for (i = 0; i < NR_CPUS; i++) + if (cpu_online(i) && i != cpu) + core_send_ipi(i, SMP_RESCHEDULE_YOURSELF); +} struct call_data_struct *call_data; @@ -197,7 +212,7 @@ static void stop_this_cpu(void *dummy) * Remove this CPU: */ cpu_clear(smp_processor_id(), cpu_online_map); - local_irq_enable(); /* May need to service _machine_restart IPI */ + raw_local_irq_enable(); /* May need to service _machine_restart IPI */ for (;;); /* Wait if available. */ } @@ -284,6 +299,8 @@ int setup_profiling_timer(unsigned int m return 0; } +static DEFINE_RAW_SPINLOCK(tlbstate_lock); + static void flush_tlb_all_ipi(void *info) { local_flush_tlb_all(); @@ -315,6 +332,7 @@ static void flush_tlb_mm_ipi(void *mm) void flush_tlb_mm(struct mm_struct *mm) { preempt_disable(); + spin_lock(&tlbstate_lock); if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) { smp_call_function(flush_tlb_mm_ipi, (void *)mm, 1, 1); @@ -324,6 +342,7 @@ void flush_tlb_mm(struct mm_struct *mm) if (smp_processor_id() != i) cpu_context(i, mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_mm(mm); preempt_enable(); @@ -347,6 +366,8 @@ void flush_tlb_range(struct vm_area_stru struct mm_struct *mm = vma->vm_mm; preempt_disable(); + spin_lock(&tlbstate_lock); + if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) { struct flush_tlb_data fd; @@ -360,6 +381,7 @@ void flush_tlb_range(struct vm_area_stru if (smp_processor_id() != i) cpu_context(i, mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_range(vma, start, end); preempt_enable(); } @@ -390,6 +412,8 @@ static void flush_tlb_page_ipi(void *inf void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) { preempt_disable(); + spin_lock(&tlbstate_lock); + if ((atomic_read(&vma->vm_mm->mm_users) != 1) || (current->mm != vma->vm_mm)) { struct flush_tlb_data fd; @@ -402,6 +426,7 @@ void flush_tlb_page(struct vm_area_struc if (smp_processor_id() != i) cpu_context(i, vma->vm_mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_page(vma, page); preempt_enable(); } Index: linux/arch/mips/kernel/time.c =================================================================== --- linux.orig/arch/mips/kernel/time.c +++ linux/arch/mips/kernel/time.c @@ -43,16 +43,12 @@ #define TICK_SIZE (tick_nsec / 1000) -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - /* * forward reference */ extern volatile unsigned long wall_jiffies; -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); /* * By default we provide the null RTC ops @@ -552,7 +548,7 @@ unsigned int mips_hpt_frequency; static struct irqaction timer_irqaction = { .handler = timer_interrupt, - .flags = SA_INTERRUPT, + .flags = SA_NODELAY | SA_INTERRUPT, .name = "timer", }; Index: linux/arch/mips/kernel/traps.c =================================================================== --- linux.orig/arch/mips/kernel/traps.c +++ linux/arch/mips/kernel/traps.c @@ -250,7 +250,7 @@ void show_registers(struct pt_regs *regs printk("\n"); } -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); NORET_TYPE void __die(const char * str, struct pt_regs * regs, const char * file, const char * func, unsigned long line) Index: linux/arch/mips/lasat/interrupt.c =================================================================== --- linux.orig/arch/mips/lasat/interrupt.c +++ linux/arch/mips/lasat/interrupt.c @@ -39,18 +39,18 @@ void disable_lasat_irq(unsigned int irq_ { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); *lasat_int_mask &= ~(1 << irq_nr) << lasat_int_mask_shift; - local_irq_restore(flags); + raw_local_irq_restore(flags); } void enable_lasat_irq(unsigned int irq_nr) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); *lasat_int_mask |= (1 << irq_nr) << lasat_int_mask_shift; - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_lasat_irq(unsigned int irq) @@ -71,14 +71,13 @@ static void end_lasat_irq(unsigned int i } static struct hw_interrupt_type lasat_irq_type = { - "Lasat", - startup_lasat_irq, - shutdown_lasat_irq, - enable_lasat_irq, - disable_lasat_irq, - mask_and_ack_lasat_irq, - end_lasat_irq, - NULL + .typename = "Lasat", + .startup = startup_lasat_irq, + .shutdown = shutdown_lasat_irq, + .enable = enable_lasat_irq, + .disable = disable_lasat_irq, + .ack = mask_and_ack_lasat_irq, + .end = end_lasat_irq, }; static inline int ls1bit32(unsigned int x) Index: linux/arch/mips/lasat/reset.c =================================================================== --- linux.orig/arch/mips/lasat/reset.c +++ linux/arch/mips/lasat/reset.c @@ -33,7 +33,7 @@ int lasat_boot_to_service = 0; static void lasat_machine_restart(char *command) { - local_irq_disable(); + raw_local_irq_disable(); if (lasat_boot_to_service) { printk("machine_restart: Rebooting to service mode\n"); @@ -47,7 +47,7 @@ static void lasat_machine_restart(char * #define MESSAGE "System halted" static void lasat_machine_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); /* Disable interrupts and loop forever */ printk(KERN_NOTICE MESSAGE "\n"); Index: linux/arch/mips/lib-32/dump_tlb.c =================================================================== --- linux.orig/arch/mips/lib-32/dump_tlb.c +++ linux/arch/mips/lib-32/dump_tlb.c @@ -111,7 +111,7 @@ void dump_tlb_addr(unsigned long addr) unsigned int flags, oldpid; int index; - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi() & 0xff; BARRIER(); write_c0_entryhi((addr & PAGE_MASK) | oldpid); @@ -120,7 +120,7 @@ void dump_tlb_addr(unsigned long addr) BARRIER(); index = read_c0_index(); write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (index < 0) { printk("No entry for address 0x%08lx in TLB\n", addr); Index: linux/arch/mips/lib-32/r3k_dump_tlb.c =================================================================== --- linux.orig/arch/mips/lib-32/r3k_dump_tlb.c +++ linux/arch/mips/lib-32/r3k_dump_tlb.c @@ -79,13 +79,13 @@ void dump_tlb_addr(unsigned long addr) unsigned long flags, oldpid; int index; - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi() & 0xff; write_c0_entryhi((addr & PAGE_MASK) | oldpid); tlb_probe(); index = read_c0_index(); write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (index < 0) { printk("No entry for address 0x%08lx in TLB\n", addr); Index: linux/arch/mips/lib-64/dump_tlb.c =================================================================== --- linux.orig/arch/mips/lib-64/dump_tlb.c +++ linux/arch/mips/lib-64/dump_tlb.c @@ -112,7 +112,7 @@ void dump_tlb_addr(unsigned long addr) unsigned int flags, oldpid; int index; - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi() & 0xff; BARRIER(); write_c0_entryhi((addr & PAGE_MASK) | oldpid); @@ -121,7 +121,7 @@ void dump_tlb_addr(unsigned long addr) BARRIER(); index = read_c0_index(); write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (index < 0) { printk("No entry for address 0x%08lx in TLB\n", addr); Index: linux/arch/mips/math-emu/cp1emu.c =================================================================== --- linux.orig/arch/mips/math-emu/cp1emu.c +++ linux/arch/mips/math-emu/cp1emu.c @@ -1310,7 +1310,9 @@ int fpu_emulator_cop1Handler(int xcptno, if (sig) break; + preempt_enable(); cond_resched(); + preempt_disable(); } while (xcp->cp0_epc > prevepc); /* SIGILL indicates a non-fpu instruction */ Index: linux/arch/mips/mips-boards/atlas/atlas_int.c =================================================================== --- linux.orig/arch/mips/mips-boards/atlas/atlas_int.c +++ linux/arch/mips/mips-boards/atlas/atlas_int.c @@ -76,14 +76,13 @@ static void end_atlas_irq(unsigned int i } static struct hw_interrupt_type atlas_irq_type = { - "Atlas", - startup_atlas_irq, - shutdown_atlas_irq, - enable_atlas_irq, - disable_atlas_irq, - mask_and_ack_atlas_irq, - end_atlas_irq, - NULL + .typename = "Atlas", + .startup = startup_atlas_irq, + .shutdown = shutdown_atlas_irq, + .enable = enable_atlas_irq, + .disable = disable_atlas_irq, + .ack = mask_and_ack_atlas_irq, + .end = end_atlas_irq, }; static inline int ls1bit32(unsigned int x) Index: linux/arch/mips/mips-boards/generic/mipsIRQ.S =================================================================== --- linux.orig/arch/mips/mips-boards/generic/mipsIRQ.S +++ linux/arch/mips/mips-boards/generic/mipsIRQ.S @@ -143,11 +143,23 @@ * time we take the exception the IRQ pin goes low, so just leave if * this is the case. */ +#define PREEMPT_RT_MALTA_DEBUG +#ifdef PREEMPT_RT_MALTA_DEBUG + lui t0, 0x1000 + and a0, s0, t0 + bne a0, zero, 1f + nop # delay slot + beq s0, zero, 1f + nop # delay slot +#endif move a1,s0 PRINT("Got interrupt: c0_cause = %08x\n") mfc0 a1, CP0_EPC PRINT("c0_epc = %08x\n") +#ifdef PREEMPT_RT_MALTA_DEBUG +1: +#endif j ret_from_irq nop END(mipsIRQ) Index: linux/arch/mips/mips-boards/generic/time.c =================================================================== --- linux.orig/arch/mips/mips-boards/generic/time.c +++ linux/arch/mips/mips-boards/generic/time.c @@ -99,7 +99,7 @@ static unsigned int __init estimate_cpu_ #if defined(CONFIG_MIPS_ATLAS) || defined(CONFIG_MIPS_MALTA) unsigned int flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* Start counter exactly on falling edge of update flag */ while (CMOS_READ(RTC_REG_A) & RTC_UIP); @@ -115,7 +115,7 @@ static unsigned int __init estimate_cpu_ count = read_c0_count(); /* restore interrupts */ - local_irq_restore(flags); + raw_local_irq_restore(flags); #endif mips_hpt_frequency = count; @@ -138,7 +138,7 @@ void __init mips_time_init(void) { unsigned int est_freq, flags; - local_irq_save(flags); + raw_local_irq_save(flags); #if defined(CONFIG_MIPS_ATLAS) || defined(CONFIG_MIPS_MALTA) /* Set Data mode - binary. */ @@ -152,7 +152,7 @@ void __init mips_time_init(void) cpu_khz = est_freq / 1000; - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __init mips_timer_setup(struct irqaction *irq) Index: linux/arch/mips/mm/c-r4k.c =================================================================== --- linux.orig/arch/mips/mm/c-r4k.c +++ linux/arch/mips/mm/c-r4k.c @@ -110,9 +110,9 @@ static inline void blast_r4600_v1_icache { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); blast_icache32(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void tx49_blast_icache32(void) @@ -140,9 +140,9 @@ static inline void blast_icache32_r4600_ { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); blast_icache32_page_indexed(page); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void tx49_blast_icache32_page_indexed(unsigned long page) @@ -1063,7 +1063,7 @@ static int __init probe_scache(void) * This is such a bitch, you'd think they would make it easy to do * this. Away you daemons of stupidity! */ - local_irq_save(flags); + raw_local_irq_save(flags); /* Fill each size-multiple cache line with a valid tag. */ pow2 = (64 * 1024); @@ -1091,7 +1091,7 @@ static int __init probe_scache(void) break; pow2 <<= 1; } - local_irq_restore(flags); + raw_local_irq_restore(flags); addr -= begin; scache_size = addr; Index: linux/arch/mips/mm/c-tx39.c =================================================================== --- linux.orig/arch/mips/mm/c-tx39.c +++ linux/arch/mips/mm/c-tx39.c @@ -49,7 +49,7 @@ static void tx39h_flush_icache_all(void) unsigned long flags, config; /* disable icache (set ICE#) */ - local_irq_save(flags); + raw_local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); @@ -61,7 +61,7 @@ static void tx39h_flush_icache_all(void) } write_c0_conf(config); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void tx39h_dma_cache_wback_inv(unsigned long addr, unsigned long size) @@ -104,39 +104,39 @@ static inline void tx39_blast_icache_pag { unsigned long flags, config; /* disable icache (set ICE#) */ - local_irq_save(flags); + raw_local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); blast_icache16_page(addr); write_c0_conf(config); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void tx39_blast_icache_page_indexed(unsigned long addr) { unsigned long flags, config; /* disable icache (set ICE#) */ - local_irq_save(flags); + raw_local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); blast_icache16_page_indexed(addr); write_c0_conf(config); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void tx39_blast_icache(void) { unsigned long flags, config; /* disable icache (set ICE#) */ - local_irq_save(flags); + raw_local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); blast_icache16(); write_c0_conf(config); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void tx39_flush_cache_all(void) @@ -263,7 +263,7 @@ static void tx39_flush_icache_range(unsi addr = start & ~(dc_lsize - 1); aend = (end - 1) & ~(dc_lsize - 1); /* disable icache (set ICE#) */ - local_irq_save(flags); + raw_local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); @@ -275,7 +275,7 @@ static void tx39_flush_icache_range(unsi addr += dc_lsize; } write_c0_conf(config); - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -364,13 +364,13 @@ static void tx39_flush_cache_sigtramp(un protected_writeback_dcache_line(addr & ~(dc_lsize - 1)); /* disable icache (set ICE#) */ - local_irq_save(flags); + raw_local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); protected_flush_icache_line(addr & ~(ic_lsize - 1)); write_c0_conf(config); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static __init void tx39_probe_cache(void) Index: linux/arch/mips/mm/init.c =================================================================== --- linux.orig/arch/mips/mm/init.c +++ linux/arch/mips/mm/init.c @@ -35,7 +35,7 @@ #include #include -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); unsigned long highstart_pfn, highend_pfn; Index: linux/arch/mips/mm/sc-ip22.c =================================================================== --- linux.orig/arch/mips/mm/sc-ip22.c +++ linux/arch/mips/mm/sc-ip22.c @@ -72,7 +72,7 @@ static void indy_sc_wback_invalidate(uns first_line = SC_INDEX(addr); last_line = SC_INDEX(addr + size - 1); - local_irq_save(flags); + raw_local_irq_save(flags); if (first_line <= last_line) { indy_sc_wipe(first_line, last_line); goto out; @@ -81,7 +81,7 @@ static void indy_sc_wback_invalidate(uns indy_sc_wipe(first_line, SC_SIZE - SC_LINE); indy_sc_wipe(0, last_line); out: - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void indy_sc_enable(void) Index: linux/arch/mips/mm/sc-r5k.c =================================================================== --- linux.orig/arch/mips/mm/sc-r5k.c +++ linux/arch/mips/mm/sc-r5k.c @@ -61,20 +61,20 @@ static void r5k_sc_enable(void) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); set_c0_config(R5K_CONF_SE); blast_r5000_scache(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void r5k_sc_disable(void) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); blast_r5000_scache(); clear_c0_config(R5K_CONF_SE); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline int __init r5k_sc_probe(void) Index: linux/arch/mips/mm/tlb-andes.c =================================================================== --- linux.orig/arch/mips/mm/tlb-andes.c +++ linux/arch/mips/mm/tlb-andes.c @@ -27,7 +27,7 @@ void local_flush_tlb_all(void) unsigned long old_ctx; unsigned long entry; - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi() & ASID_MASK; write_c0_entryhi(CKSEG0); @@ -43,7 +43,7 @@ void local_flush_tlb_all(void) entry++; } write_c0_entryhi(old_ctx); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_mm(struct mm_struct *mm) @@ -64,7 +64,7 @@ void local_flush_tlb_range(struct vm_are unsigned long flags; int size; - local_irq_save(flags); + raw_local_irq_save(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; if (size <= NTLB_ENTRIES_HALF) { @@ -93,7 +93,7 @@ void local_flush_tlb_range(struct vm_are } else { drop_mmu_context(mm, cpu); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -105,7 +105,7 @@ void local_flush_tlb_kernel_range(unsign size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; - local_irq_save(flags); + raw_local_irq_save(flags); if (size <= NTLB_ENTRIES_HALF) { int pid = read_c0_entryhi(); @@ -131,7 +131,7 @@ void local_flush_tlb_kernel_range(unsign } else { local_flush_tlb_all(); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) @@ -143,7 +143,7 @@ void local_flush_tlb_page(struct vm_area newpid = (cpu_context(smp_processor_id(), vma->vm_mm) & ASID_MASK); page &= (PAGE_MASK << 1); - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = (read_c0_entryhi() & ASID_MASK); write_c0_entryhi(page | newpid); tlb_probe(); @@ -157,7 +157,7 @@ void local_flush_tlb_page(struct vm_area finish: write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -170,7 +170,7 @@ void local_flush_tlb_one(unsigned long p unsigned long flags; int oldpid, idx; - local_irq_save(flags); + raw_local_irq_save(flags); page &= (PAGE_MASK << 1); oldpid = read_c0_entryhi() & 0xff; write_c0_entryhi(page); @@ -185,7 +185,7 @@ void local_flush_tlb_one(unsigned long p } write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* XXX Simplify this. On the R10000 writing a TLB entry for an virtual @@ -215,7 +215,7 @@ void __update_tlb(struct vm_area_struct vma->vm_mm) & ASID_MASK), pid); } - local_irq_save(flags); + raw_local_irq_save(flags); address &= (PAGE_MASK << 1); write_c0_entryhi(address | (pid)); pgdp = pgd_offset(vma->vm_mm, address); @@ -232,7 +232,7 @@ void __update_tlb(struct vm_area_struct tlb_write_indexed(); } write_c0_entryhi(pid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __init tlb_init(void) Index: linux/arch/mips/mm/tlb-r3k.c =================================================================== --- linux.orig/arch/mips/mm/tlb-r3k.c +++ linux/arch/mips/mm/tlb-r3k.c @@ -49,7 +49,7 @@ void local_flush_tlb_all(void) printk("[tlball]"); #endif - local_irq_save(flags); + raw_local_irq_save(flags); old_ctx = read_c0_entryhi() & ASID_MASK; write_c0_entrylo0(0); entry = r3k_have_wired_reg ? read_c0_wired() : 8; @@ -60,7 +60,7 @@ void local_flush_tlb_all(void) tlb_write_indexed(); } write_c0_entryhi(old_ctx); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_mm(struct mm_struct *mm) @@ -89,7 +89,7 @@ void local_flush_tlb_range(struct vm_are printk("[tlbrange<%lu,0x%08lx,0x%08lx>]", cpu_context(cpu, mm) & ASID_MASK, start, end); #endif - local_irq_save(flags); + raw_local_irq_save(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; if (size <= current_cpu_data.tlbsize) { int oldpid = read_c0_entryhi() & ASID_MASK; @@ -115,7 +115,7 @@ void local_flush_tlb_range(struct vm_are } else { drop_mmu_context(mm, cpu); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -127,7 +127,7 @@ void local_flush_tlb_kernel_range(unsign #ifdef DEBUG_TLB printk("[tlbrange<%lu,0x%08lx,0x%08lx>]", start, end); #endif - local_irq_save(flags); + raw_local_irq_save(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; if (size <= current_cpu_data.tlbsize) { int pid = read_c0_entryhi(); @@ -153,7 +153,7 @@ void local_flush_tlb_kernel_range(unsign } else { local_flush_tlb_all(); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) @@ -169,7 +169,7 @@ void local_flush_tlb_page(struct vm_area #endif newpid = cpu_context(cpu, vma->vm_mm) & ASID_MASK; page &= PAGE_MASK; - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi() & ASID_MASK; write_c0_entryhi(page | newpid); BARRIER; @@ -183,7 +183,7 @@ void local_flush_tlb_page(struct vm_area finish: write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -207,7 +207,7 @@ void __update_tlb(struct vm_area_struct } #endif - local_irq_save(flags); + raw_local_irq_save(flags); address &= PAGE_MASK; write_c0_entryhi(address | pid); BARRIER; @@ -221,7 +221,7 @@ void __update_tlb(struct vm_area_struct tlb_write_indexed(); } write_c0_entryhi(pid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, @@ -240,7 +240,7 @@ void __init add_wired_entry(unsigned lon entrylo0, entryhi, pagemask); #endif - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi() & ASID_MASK; old_pagemask = read_c0_pagemask(); @@ -260,7 +260,7 @@ void __init add_wired_entry(unsigned lon write_c0_entryhi(old_ctx); write_c0_pagemask(old_pagemask); local_flush_tlb_all(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } else if (wired < 8) { #ifdef DEBUG_TLB @@ -268,7 +268,7 @@ void __init add_wired_entry(unsigned lon entrylo0, entryhi); #endif - local_irq_save(flags); + raw_local_irq_save(flags); old_ctx = read_c0_entryhi() & ASID_MASK; write_c0_entrylo0(entrylo0); write_c0_entryhi(entryhi); @@ -277,7 +277,7 @@ void __init add_wired_entry(unsigned lon tlb_write_indexed(); write_c0_entryhi(old_ctx); local_flush_tlb_all(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } } Index: linux/arch/mips/mm/tlb-r4k.c =================================================================== --- linux.orig/arch/mips/mm/tlb-r4k.c +++ linux/arch/mips/mm/tlb-r4k.c @@ -32,7 +32,7 @@ void local_flush_tlb_all(void) unsigned long old_ctx; int entry; - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi(); write_c0_entrylo0(0); @@ -54,7 +54,7 @@ void local_flush_tlb_all(void) } tlbw_use_hazard(); write_c0_entryhi(old_ctx); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_mm(struct mm_struct *mm) @@ -75,7 +75,7 @@ void local_flush_tlb_range(struct vm_are unsigned long flags; int size; - local_irq_save(flags); + raw_local_irq_save(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; if (size <= current_cpu_data.tlbsize/2) { @@ -109,7 +109,7 @@ void local_flush_tlb_range(struct vm_are } else { drop_mmu_context(mm, cpu); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -118,7 +118,7 @@ void local_flush_tlb_kernel_range(unsign unsigned long flags; int size; - local_irq_save(flags); + raw_local_irq_save(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; if (size <= current_cpu_data.tlbsize / 2) { @@ -151,7 +151,7 @@ void local_flush_tlb_kernel_range(unsign } else { local_flush_tlb_all(); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) @@ -164,7 +164,7 @@ void local_flush_tlb_page(struct vm_area newpid = cpu_asid(cpu, vma->vm_mm); page &= (PAGE_MASK << 1); - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi(); write_c0_entryhi(page | newpid); mtc0_tlbw_hazard(); @@ -183,7 +183,7 @@ void local_flush_tlb_page(struct vm_area finish: write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -196,7 +196,7 @@ void local_flush_tlb_one(unsigned long p unsigned long flags; int oldpid, idx; - local_irq_save(flags); + raw_local_irq_save(flags); page &= (PAGE_MASK << 1); oldpid = read_c0_entryhi(); write_c0_entryhi(page); @@ -215,7 +215,7 @@ void local_flush_tlb_one(unsigned long p } write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -239,7 +239,7 @@ void __update_tlb(struct vm_area_struct pid = read_c0_entryhi() & ASID_MASK; - local_irq_save(flags); + raw_local_irq_save(flags); address &= (PAGE_MASK << 1); write_c0_entryhi(address | pid); pgdp = pgd_offset(vma->vm_mm, address); @@ -266,7 +266,7 @@ void __update_tlb(struct vm_area_struct tlb_write_indexed(); tlbw_use_hazard(); write_c0_entryhi(pid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #if 0 @@ -280,7 +280,7 @@ static void r4k_update_mmu_cache_hwbug(s pte_t *ptep; int idx; - local_irq_save(flags); + raw_local_irq_save(flags); address &= (PAGE_MASK << 1); asid = read_c0_entryhi() & ASID_MASK; write_c0_entryhi(address | asid); @@ -299,7 +299,7 @@ static void r4k_update_mmu_cache_hwbug(s else tlb_write_indexed(); tlbw_use_hazard(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #endif @@ -311,7 +311,7 @@ void __init add_wired_entry(unsigned lon unsigned long old_pagemask; unsigned long old_ctx; - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi(); old_pagemask = read_c0_pagemask(); @@ -331,7 +331,7 @@ void __init add_wired_entry(unsigned lon BARRIER; write_c0_pagemask(old_pagemask); local_flush_tlb_all(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -351,7 +351,7 @@ __init int add_temporary_entry(unsigned unsigned long old_pagemask; unsigned long old_ctx; - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi(); old_pagemask = read_c0_pagemask(); @@ -374,7 +374,7 @@ __init int add_temporary_entry(unsigned write_c0_entryhi(old_ctx); write_c0_pagemask(old_pagemask); out: - local_irq_restore(flags); + raw_local_irq_restore(flags); return ret; } Index: linux/arch/mips/mm/tlb-r8k.c =================================================================== --- linux.orig/arch/mips/mm/tlb-r8k.c +++ linux/arch/mips/mm/tlb-r8k.c @@ -35,7 +35,7 @@ void local_flush_tlb_all(void) unsigned long old_ctx; int entry; - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi(); write_c0_entrylo(0); @@ -49,7 +49,7 @@ void local_flush_tlb_all(void) } tlbw_use_hazard(); write_c0_entryhi(old_ctx); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_mm(struct mm_struct *mm) @@ -74,7 +74,7 @@ void local_flush_tlb_range(struct vm_are size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; - local_irq_save(flags); + raw_local_irq_save(flags); if (size > TFP_TLB_SIZE / 2) { drop_mmu_context(mm, cpu); @@ -106,7 +106,7 @@ void local_flush_tlb_range(struct vm_are write_c0_entryhi(oldpid); out_restore: - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* Usable for KV1 addresses only! */ @@ -123,7 +123,7 @@ void local_flush_tlb_kernel_range(unsign return; } - local_irq_save(flags); + raw_local_irq_save(flags); write_c0_entrylo(0); @@ -145,7 +145,7 @@ void local_flush_tlb_kernel_range(unsign tlb_write(); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) @@ -160,7 +160,7 @@ void local_flush_tlb_page(struct vm_area newpid = cpu_asid(cpu, vma->vm_mm); page &= PAGE_MASK; - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi(); write_c0_vaddr(page); write_c0_entryhi(newpid); @@ -175,7 +175,7 @@ void local_flush_tlb_page(struct vm_area finish: write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -199,7 +199,7 @@ void __update_tlb(struct vm_area_struct pid = read_c0_entryhi() & ASID_MASK; - local_irq_save(flags); + raw_local_irq_save(flags); address &= PAGE_MASK; write_c0_vaddr(address); write_c0_entryhi(pid); @@ -212,7 +212,7 @@ void __update_tlb(struct vm_area_struct tlb_write(); write_c0_entryhi(pid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void __init probe_tlb(unsigned long config) Index: linux/arch/mips/mm/tlb-sb1.c =================================================================== --- linux.orig/arch/mips/mm/tlb-sb1.c +++ linux/arch/mips/mm/tlb-sb1.c @@ -66,7 +66,7 @@ void sb1_dump_tlb(void) unsigned long old_ctx; unsigned long flags; int entry; - local_irq_save(flags); + raw_local_irq_save(flags); old_ctx = read_c0_entryhi(); printk("Current TLB registers state:\n" " EntryHi EntryLo0 EntryLo1 PageMask Index\n" @@ -83,7 +83,7 @@ void sb1_dump_tlb(void) } printk("\n"); write_c0_entryhi(old_ctx); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_all(void) @@ -92,7 +92,7 @@ void local_flush_tlb_all(void) unsigned long old_ctx; int entry; - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi() & ASID_MASK; write_c0_entrylo0(0); @@ -106,7 +106,7 @@ void local_flush_tlb_all(void) entry++; } write_c0_entryhi(old_ctx); - local_irq_restore(flags); + raw_local_irq_restore(flags); } @@ -147,7 +147,7 @@ void local_flush_tlb_range(struct vm_are unsigned long flags; int cpu; - local_irq_save(flags); + raw_local_irq_save(flags); cpu = smp_processor_id(); if (cpu_context(cpu, mm) != 0) { int size; @@ -179,7 +179,7 @@ void local_flush_tlb_range(struct vm_are drop_mmu_context(mm, cpu); } } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) @@ -190,7 +190,7 @@ void local_flush_tlb_kernel_range(unsign size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; - local_irq_save(flags); + raw_local_irq_save(flags); if (size <= (current_cpu_data.tlbsize/2)) { int pid = read_c0_entryhi(); @@ -216,7 +216,7 @@ void local_flush_tlb_kernel_range(unsign } else { local_flush_tlb_all(); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) @@ -224,7 +224,7 @@ void local_flush_tlb_page(struct vm_area unsigned long flags; int cpu = smp_processor_id(); - local_irq_save(flags); + raw_local_irq_save(flags); if (cpu_context(cpu, vma->vm_mm) != 0) { int oldpid, newpid, idx; newpid = cpu_asid(cpu, vma->vm_mm); @@ -243,7 +243,7 @@ void local_flush_tlb_page(struct vm_area finish: write_c0_entryhi(oldpid); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -258,7 +258,7 @@ void local_flush_tlb_one(unsigned long p page &= (PAGE_MASK << 1); oldpid = read_c0_entryhi() & ASID_MASK; - local_irq_save(flags); + raw_local_irq_save(flags); write_c0_entryhi(page); tlb_probe(); idx = read_c0_index(); @@ -271,7 +271,7 @@ void local_flush_tlb_one(unsigned long p } write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* All entries common to a mm share an asid. To effectively flush @@ -307,7 +307,7 @@ void __update_tlb(struct vm_area_struct if (current->active_mm != vma->vm_mm) return; - local_irq_save(flags); + raw_local_irq_save(flags); pid = read_c0_entryhi() & ASID_MASK; address &= (PAGE_MASK << 1); @@ -324,7 +324,7 @@ void __update_tlb(struct vm_area_struct } else { tlb_write_indexed(); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, @@ -335,7 +335,7 @@ void __init add_wired_entry(unsigned lon unsigned long old_pagemask; unsigned long old_ctx; - local_irq_save(flags); + raw_local_irq_save(flags); old_ctx = read_c0_entryhi() & 0xff; old_pagemask = read_c0_pagemask(); wired = read_c0_wired(); @@ -352,7 +352,7 @@ void __init add_wired_entry(unsigned lon write_c0_pagemask(old_pagemask); local_flush_tlb_all(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* Index: linux/arch/mips/momentum/ocelot_c/cpci-irq.c =================================================================== --- linux.orig/arch/mips/momentum/ocelot_c/cpci-irq.c +++ linux/arch/mips/momentum/ocelot_c/cpci-irq.c @@ -129,14 +129,13 @@ void ll_cpci_irq(struct pt_regs *regs) #define shutdown_cpci_irq disable_cpci_irq struct hw_interrupt_type cpci_irq_type = { - "CPCI/FPGA", - startup_cpci_irq, - shutdown_cpci_irq, - enable_cpci_irq, - disable_cpci_irq, - mask_and_ack_cpci_irq, - end_cpci_irq, - NULL + .typename = "CPCI/FPGA", + .startup = startup_cpci_irq, + .shutdown = shutdown_cpci_irq, + .enable = enable_cpci_irq, + .disable = disable_cpci_irq, + .ack = mask_and_ack_cpci_irq, + .end = end_cpci_irq, }; void cpci_irq_init(void) Index: linux/arch/mips/momentum/ocelot_c/uart-irq.c =================================================================== --- linux.orig/arch/mips/momentum/ocelot_c/uart-irq.c +++ linux/arch/mips/momentum/ocelot_c/uart-irq.c @@ -122,14 +122,13 @@ void ll_uart_irq(struct pt_regs *regs) #define shutdown_uart_irq disable_uart_irq struct hw_interrupt_type uart_irq_type = { - "UART/FPGA", - startup_uart_irq, - shutdown_uart_irq, - enable_uart_irq, - disable_uart_irq, - mask_and_ack_uart_irq, - end_uart_irq, - NULL + .typename = "UART/FPGA", + .startup = startup_uart_irq, + .shutdown = shutdown_uart_irq, + .enable = enable_uart_irq, + .disable = disable_uart_irq, + .ack = mask_and_ack_uart_irq, + .end = end_uart_irq, }; void uart_irq_init(void) Index: linux/arch/mips/momentum/ocelot_g/irq.c =================================================================== --- linux.orig/arch/mips/momentum/ocelot_g/irq.c +++ linux/arch/mips/momentum/ocelot_g/irq.c @@ -58,7 +58,7 @@ void __init arch_init_irq(void) * int-handler is not on bootstrap */ clear_c0_status(ST0_IM); - local_irq_disable(); + raw_local_irq_disable(); /* Sets the first-level interrupt dispatcher. */ set_except_vector(0, ocelot_handle_int); Index: linux/arch/mips/pci/ops-au1000.c =================================================================== --- linux.orig/arch/mips/pci/ops-au1000.c +++ linux/arch/mips/pci/ops-au1000.c @@ -102,7 +102,7 @@ static int config_access(unsigned char a return -1; } - local_irq_save(flags); + raw_local_irq_save(flags); au_writel(((0x2000 << 16) | (au_readl(Au1500_PCI_STATCMD) & 0xffff)), Au1500_PCI_STATCMD); au_sync_udelay(1); @@ -135,7 +135,7 @@ static int config_access(unsigned char a if (board_pci_idsel) { if (board_pci_idsel(device, 1) == 0) { *data = 0xffffffff; - local_irq_restore(flags); + raw_local_irq_restore(flags); return -1; } } @@ -194,7 +194,7 @@ static int config_access(unsigned char a (void)board_pci_idsel(device, 0); } - local_irq_restore(flags); + raw_local_irq_restore(flags); return error; #endif } Index: linux/arch/mips/pmc-sierra/yosemite/smp.c =================================================================== --- linux.orig/arch/mips/pmc-sierra/yosemite/smp.c +++ linux/arch/mips/pmc-sierra/yosemite/smp.c @@ -19,7 +19,7 @@ static unsigned char launchstack[LAUNCHS static void __init prom_smp_bootstrap(void) { - local_irq_disable(); + raw_local_irq_disable(); while (spin_is_locked(&launch_lock)); Index: linux/arch/mips/sgi-ip22/ip22-eisa.c =================================================================== --- linux.orig/arch/mips/sgi-ip22/ip22-eisa.c +++ linux/arch/mips/sgi-ip22/ip22-eisa.c @@ -107,13 +107,13 @@ static void enable_eisa1_irq(unsigned in unsigned long flags; u8 mask; - local_irq_save(flags); + raw_local_irq_save(flags); mask = EISA_READ_8(EISA_INT1_MASK); mask &= ~((u8) (1 << irq)); EISA_WRITE_8(EISA_INT1_MASK, mask); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_eisa1_irq(unsigned int irq) @@ -169,13 +169,13 @@ static void enable_eisa2_irq(unsigned in unsigned long flags; u8 mask; - local_irq_save(flags); + raw_local_irq_save(flags); mask = EISA_READ_8(EISA_INT2_MASK); mask &= ~((u8) (1 << (irq - 8))); EISA_WRITE_8(EISA_INT2_MASK, mask); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_eisa2_irq(unsigned int irq) Index: linux/arch/mips/sgi-ip22/ip22-int.c =================================================================== --- linux.orig/arch/mips/sgi-ip22/ip22-int.c +++ linux/arch/mips/sgi-ip22/ip22-int.c @@ -44,12 +44,12 @@ static void enable_local0_irq(unsigned i { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* don't allow mappable interrupt to be enabled from setup_irq, * we have our own way to do so */ if (irq != SGI_MAP_0_IRQ) sgint->imask0 |= (1 << (irq - SGINT_LOCAL0)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_local0_irq(unsigned int irq) @@ -62,9 +62,9 @@ static void disable_local0_irq(unsigned { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); sgint->imask0 &= ~(1 << (irq - SGINT_LOCAL0)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #define shutdown_local0_irq disable_local0_irq @@ -90,12 +90,12 @@ static void enable_local1_irq(unsigned i { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* don't allow mappable interrupt to be enabled from setup_irq, * we have our own way to do so */ if (irq != SGI_MAP_1_IRQ) sgint->imask1 |= (1 << (irq - SGINT_LOCAL1)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_local1_irq(unsigned int irq) @@ -108,9 +108,9 @@ void disable_local1_irq(unsigned int irq { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); sgint->imask1 &= ~(1 << (irq - SGINT_LOCAL1)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #define shutdown_local1_irq disable_local1_irq @@ -136,10 +136,10 @@ static void enable_local2_irq(unsigned i { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); sgint->imask0 |= (1 << (SGI_MAP_0_IRQ - SGINT_LOCAL0)); sgint->cmeimask0 |= (1 << (irq - SGINT_LOCAL2)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_local2_irq(unsigned int irq) @@ -152,11 +152,11 @@ void disable_local2_irq(unsigned int irq { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); sgint->cmeimask0 &= ~(1 << (irq - SGINT_LOCAL2)); if (!sgint->cmeimask0) sgint->imask0 &= ~(1 << (SGI_MAP_0_IRQ - SGINT_LOCAL0)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #define shutdown_local2_irq disable_local2_irq @@ -182,10 +182,10 @@ static void enable_local3_irq(unsigned i { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); sgint->imask1 |= (1 << (SGI_MAP_1_IRQ - SGINT_LOCAL1)); sgint->cmeimask1 |= (1 << (irq - SGINT_LOCAL3)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_local3_irq(unsigned int irq) @@ -198,11 +198,11 @@ void disable_local3_irq(unsigned int irq { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); sgint->cmeimask1 &= ~(1 << (irq - SGINT_LOCAL3)); if (!sgint->cmeimask1) sgint->imask1 &= ~(1 << (SGI_MAP_1_IRQ - SGINT_LOCAL1)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #define shutdown_local3_irq disable_local3_irq Index: linux/arch/mips/sgi-ip22/ip22-reset.c =================================================================== --- linux.orig/arch/mips/sgi-ip22/ip22-reset.c +++ linux/arch/mips/sgi-ip22/ip22-reset.c @@ -66,7 +66,7 @@ static void sgi_machine_power_off(void) { unsigned int tmp; - local_irq_disable(); + raw_local_irq_disable(); /* Disable watchdog */ tmp = hpc3c0->rtcregs[RTC_CMD] & 0xff; Index: linux/arch/mips/sgi-ip27/ip27-smp.c =================================================================== --- linux.orig/arch/mips/sgi-ip27/ip27-smp.c +++ linux/arch/mips/sgi-ip27/ip27-smp.c @@ -188,7 +188,7 @@ void __init prom_boot_secondary(int cpu, void prom_init_secondary(void) { per_cpu_init(); - local_irq_enable(); + raw_local_irq_enable(); } void __init prom_cpus_done(void) Index: linux/arch/mips/sgi-ip32/ip32-irq.c =================================================================== --- linux.orig/arch/mips/sgi-ip32/ip32-irq.c +++ linux/arch/mips/sgi-ip32/ip32-irq.c @@ -163,14 +163,13 @@ static void end_cpu_irq(unsigned int irq #define mask_and_ack_cpu_irq disable_cpu_irq static struct hw_interrupt_type ip32_cpu_interrupt = { - "IP32 CPU", - startup_cpu_irq, - shutdown_cpu_irq, - enable_cpu_irq, - disable_cpu_irq, - mask_and_ack_cpu_irq, - end_cpu_irq, - NULL + .typename = "IP32 CPU", + .startup = startup_cpu_irq, + .shutdown = shutdown_cpu_irq, + .enable = enable_cpu_irq, + .disable = disable_cpu_irq, + .ack = mask_and_ack_cpu_irq, + .end = end_cpu_irq, }; /* @@ -234,14 +233,13 @@ static void end_crime_irq(unsigned int i #define shutdown_crime_irq disable_crime_irq static struct hw_interrupt_type ip32_crime_interrupt = { - "IP32 CRIME", - startup_crime_irq, - shutdown_crime_irq, - enable_crime_irq, - disable_crime_irq, - mask_and_ack_crime_irq, - end_crime_irq, - NULL + .typename = "IP32 CRIME", + .startup = startup_crime_irq, + .shutdown = shutdown_crime_irq, + .enable = enable_crime_irq, + .disable = disable_crime_irq, + .ack = mask_and_ack_crime_irq, + .end = end_crime_irq, }; /* @@ -294,14 +292,13 @@ static void end_macepci_irq(unsigned int #define mask_and_ack_macepci_irq disable_macepci_irq static struct hw_interrupt_type ip32_macepci_interrupt = { - "IP32 MACE PCI", - startup_macepci_irq, - shutdown_macepci_irq, - enable_macepci_irq, - disable_macepci_irq, - mask_and_ack_macepci_irq, - end_macepci_irq, - NULL + .typename = "IP32 MACE PCI", + .startup = startup_macepci_irq, + .shutdown = shutdown_macepci_irq, + .enable = enable_macepci_irq, + .disable = disable_macepci_irq, + .ack = mask_and_ack_macepci_irq, + .end = end_macepci_irq, }; /* This is used for MACE ISA interrupts. That means bits 4-6 in the @@ -425,14 +422,13 @@ static void end_maceisa_irq(unsigned irq #define shutdown_maceisa_irq disable_maceisa_irq static struct hw_interrupt_type ip32_maceisa_interrupt = { - "IP32 MACE ISA", - startup_maceisa_irq, - shutdown_maceisa_irq, - enable_maceisa_irq, - disable_maceisa_irq, - mask_and_ack_maceisa_irq, - end_maceisa_irq, - NULL + .typename = "IP32 MACE ISA", + .startup = startup_maceisa_irq, + .shutdown = shutdown_maceisa_irq, + .enable = enable_maceisa_irq, + .disable = disable_maceisa_irq, + .ack = mask_and_ack_maceisa_irq, + .end = end_maceisa_irq, }; /* This is used for regular non-ISA, non-PCI MACE interrupts. That means @@ -476,14 +472,13 @@ static void end_mace_irq(unsigned int ir #define mask_and_ack_mace_irq disable_mace_irq static struct hw_interrupt_type ip32_mace_interrupt = { - "IP32 MACE", - startup_mace_irq, - shutdown_mace_irq, - enable_mace_irq, - disable_mace_irq, - mask_and_ack_mace_irq, - end_mace_irq, - NULL + .typename = "IP32 MACE", + .startup = startup_mace_irq, + .shutdown = shutdown_mace_irq, + .enable = enable_mace_irq, + .disable = disable_mace_irq, + .ack = mask_and_ack_mace_irq, + .end = end_mace_irq, }; static void ip32_unknown_interrupt(struct pt_regs *regs) Index: linux/arch/mips/sibyte/sb1250/irq.c =================================================================== --- linux.orig/arch/mips/sibyte/sb1250/irq.c +++ linux/arch/mips/sibyte/sb1250/irq.c @@ -71,24 +71,22 @@ extern char sb1250_duart_present[]; #endif static struct hw_interrupt_type sb1250_irq_type = { - "SB1250-IMR", - startup_sb1250_irq, - shutdown_sb1250_irq, - enable_sb1250_irq, - disable_sb1250_irq, - ack_sb1250_irq, - end_sb1250_irq, + .typename = "SB1250-IMR", + .startup = startup_sb1250_irq, + .shutdown = shutdown_sb1250_irq, + .enable = enable_sb1250_irq, + .disable = disable_sb1250_irq, + .ack = ack_sb1250_irq, + .end = end_sb1250_irq, #ifdef CONFIG_SMP - sb1250_set_affinity -#else - NULL + .set_affinity = sb1250_set_affinity #endif }; /* Store the CPU id (not the logical number) */ int sb1250_irq_owner[SB1250_NR_IRQS]; -DEFINE_SPINLOCK(sb1250_imr_lock); +DEFINE_RAW_SPINLOCK(sb1250_imr_lock); void sb1250_mask_irq(int cpu, int irq) { @@ -276,7 +274,7 @@ static irqreturn_t sb1250_dummy_handler static struct irqaction sb1250_dummy_action = { .handler = sb1250_dummy_handler, - .flags = 0, + .flags = SA_NODELAY, .mask = CPU_MASK_NONE, .name = "sb1250-private", .next = NULL, Index: linux/arch/mips/sibyte/sb1250/smp.c =================================================================== --- linux.orig/arch/mips/sibyte/sb1250/smp.c +++ linux/arch/mips/sibyte/sb1250/smp.c @@ -59,7 +59,7 @@ void sb1250_smp_finish(void) { extern void sb1250_time_init(void); sb1250_time_init(); - local_irq_enable(); + raw_local_irq_enable(); } /* Index: linux/arch/mips/sibyte/sb1250/time.c =================================================================== --- linux.orig/arch/mips/sibyte/sb1250/time.c +++ linux/arch/mips/sibyte/sb1250/time.c @@ -115,10 +115,12 @@ void sb1250_timer_interrupt(struct pt_re ll_timer_interrupt(irq, regs); } - /* - * every CPU should do profiling and process accouting - */ - ll_local_timer_interrupt(irq, regs); + if (cpu != 0) { + /* + * every CPU should do profiling and process accouting + */ + ll_local_timer_interrupt(irq, regs); + } } /* Index: linux/arch/mips/sni/irq.c =================================================================== --- linux.orig/arch/mips/sni/irq.c +++ linux/arch/mips/sni/irq.c @@ -58,14 +58,13 @@ static void end_pciasic_irq(unsigned int } static struct hw_interrupt_type pciasic_irq_type = { - "ASIC-PCI", - startup_pciasic_irq, - shutdown_pciasic_irq, - enable_pciasic_irq, - disable_pciasic_irq, - mask_and_ack_pciasic_irq, - end_pciasic_irq, - NULL + .typename = "ASIC-PCI", + .startup = startup_pciasic_irq, + .shutdown = shutdown_pciasic_irq, + .enable = enable_pciasic_irq, + .disable = disable_pciasic_irq, + .ack = mask_and_ack_pciasic_irq, + .end = end_pciasic_irq, }; /* Index: linux/arch/mips/sni/reset.c =================================================================== --- linux.orig/arch/mips/sni/reset.c +++ linux/arch/mips/sni/reset.c @@ -30,7 +30,7 @@ void sni_machine_restart(char *command) /* This does a normal via the keyboard controller like a PC. We can do that easier ... */ - local_irq_disable(); + raw_local_irq_disable(); for (;;) { for (i=0; i<100; i++) { kb_wait(); Index: linux/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c =================================================================== --- linux.orig/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c +++ linux/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c @@ -669,7 +669,7 @@ void __init arch_init_irq(void) { extern void tx4927_irq_init(void); - local_irq_disable(); + raw_local_irq_disable(); tx4927_irq_init(); toshiba_rbtx4927_irq_ioc_init(); Index: linux/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c =================================================================== --- linux.orig/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c +++ linux/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c @@ -727,7 +727,7 @@ void toshiba_rbtx4927_restart(char *comm reg_wr08(RBTX4927_SW_RESET_DO, RBTX4927_SW_RESET_DO_SET); /* do something passive while waiting for reset */ - local_irq_disable(); + raw_local_irq_disable(); while (1) asm_wait(); @@ -738,7 +738,7 @@ void toshiba_rbtx4927_restart(char *comm void toshiba_rbtx4927_halt(void) { printk(KERN_NOTICE "System Halted\n"); - local_irq_disable(); + raw_local_irq_disable(); while (1) { asm_wait(); } Index: linux/arch/mips/vr41xx/common/pmu.c =================================================================== --- linux.orig/arch/mips/vr41xx/common/pmu.c +++ linux/arch/mips/vr41xx/common/pmu.c @@ -62,7 +62,7 @@ static inline void software_reset(void) static void vr41xx_restart(char *command) { - local_irq_disable(); + raw_local_irq_disable(); software_reset(); printk(KERN_NOTICE "\nYou can reset your system\n"); while (1) ; @@ -70,14 +70,14 @@ static void vr41xx_restart(char *command static void vr41xx_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); printk(KERN_NOTICE "\nYou can turn off the power supply\n"); while (1) ; } static void vr41xx_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); printk(KERN_NOTICE "\nYou can turn off the power supply\n"); while (1) ; } Index: linux/arch/parisc/kernel/time.c =================================================================== --- linux.orig/arch/parisc/kernel/time.c +++ linux/arch/parisc/kernel/time.c @@ -33,10 +33,6 @@ #include -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - /* xtime and wall_jiffies keep wall-clock time */ extern unsigned long wall_jiffies; Index: linux/arch/ppc/8260_io/enet.c =================================================================== --- linux.orig/arch/ppc/8260_io/enet.c +++ linux/arch/ppc/8260_io/enet.c @@ -116,7 +116,7 @@ struct scc_enet_private { scc_t *sccp; struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; }; static int scc_enet_open(struct net_device *dev); Index: linux/arch/ppc/8260_io/fcc_enet.c =================================================================== --- linux.orig/arch/ppc/8260_io/fcc_enet.c +++ linux/arch/ppc/8260_io/fcc_enet.c @@ -377,7 +377,7 @@ struct fcc_enet_private { volatile fcc_enet_t *ep; struct net_device_stats stats; uint tx_free; - spinlock_t lock; + raw_spinlock_t lock; #ifdef CONFIG_USE_MDIO uint phy_id; Index: linux/arch/ppc/8xx_io/commproc.c =================================================================== --- linux.orig/arch/ppc/8xx_io/commproc.c +++ linux/arch/ppc/8xx_io/commproc.c @@ -356,7 +356,7 @@ cpm_setbrg(uint brg, uint rate) /* * dpalloc / dpfree bits. */ -static spinlock_t cpm_dpmem_lock; +static raw_spinlock_t cpm_dpmem_lock; /* * 16 blocks should be enough to satisfy all requests * until the memory subsystem goes up... Index: linux/arch/ppc/8xx_io/enet.c =================================================================== --- linux.orig/arch/ppc/8xx_io/enet.c +++ linux/arch/ppc/8xx_io/enet.c @@ -144,7 +144,7 @@ struct scc_enet_private { unsigned char *rx_vaddr[RX_RING_SIZE]; struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; }; static int scc_enet_open(struct net_device *dev); Index: linux/arch/ppc/8xx_io/fec.c =================================================================== --- linux.orig/arch/ppc/8xx_io/fec.c +++ linux/arch/ppc/8xx_io/fec.c @@ -165,7 +165,7 @@ struct fec_enet_private { struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; #ifdef CONFIG_USE_MDIO uint phy_id; Index: linux/arch/ppc/Kconfig =================================================================== --- linux.orig/arch/ppc/Kconfig +++ linux/arch/ppc/Kconfig @@ -15,13 +15,6 @@ config GENERIC_HARDIRQS bool default y -config RWSEM_GENERIC_SPINLOCK - bool - -config RWSEM_XCHGADD_ALGORITHM - bool - default y - config GENERIC_CALIBRATE_DELAY bool default y @@ -922,6 +915,14 @@ config HIGHMEM source kernel/Kconfig.hz source kernel/Kconfig.preempt + +config RWSEM_GENERIC_SPINLOCK + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + source "mm/Kconfig" source "fs/Kconfig.binfmt" Index: linux/arch/ppc/boot/Makefile =================================================================== --- linux.orig/arch/ppc/boot/Makefile +++ linux/arch/ppc/boot/Makefile @@ -11,6 +11,15 @@ # CFLAGS += -fno-builtin -D__BOOTER__ -Iarch/$(ARCH)/boot/include + +ifdef CONFIG_MCOUNT +# do not trace the boot loader +nullstring := +space := $(nullstring) # end of the line +pg_flag = $(nullstring) -pg # end of the line +CFLAGS := $(subst ${pg_flag},${space},${CFLAGS}) +endif + HOSTCFLAGS += -Iarch/$(ARCH)/boot/include BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd Index: linux/arch/ppc/boot/lib/Makefile =================================================================== --- linux.orig/arch/ppc/boot/lib/Makefile +++ linux/arch/ppc/boot/lib/Makefile @@ -5,19 +5,49 @@ CFLAGS_kbd.o := -Idrivers/char CFLAGS_vreset.o := -I$(srctree)/arch/ppc/boot/include -zlib := infblock.c infcodes.c inffast.c inflate.c inftrees.c infutil.c - -lib-y += $(zlib:.c=.o) div64.o -lib-$(CONFIG_VGA_CONSOLE) += vreset.o kbd.o - +zlib := infblock.c infcodes.c inffast.c inflate.c inftrees.c infutil.c +zlibheader := infblock.h infcodes.h inffast.h inftrees.h infutil.h +zliblinuxheader := zlib.h zconf.h zutil.h + +$(addprefix $(obj)/,$(zlib)): $(addprefix $(obj)/,$(zliblinuxheader)) $(addprefix $(obj)/,$(zlibheader)) + +src-boot := div64.S +src-boot += $(zlib) +#src-boot := $(addprefix $(obj)/, $(src-boot)) +obj-boot := $(addsuffix .o, $(basename $(src-boot))) -# zlib files needs header from their original place -EXTRA_CFLAGS += -Ilib/zlib_inflate +BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj) $(CFLAGS) quiet_cmd_copy_zlib = COPY $@ - cmd_copy_zlib = cat $< > $@ + cmd_copy_zlib = sed "s@__attribute_used__@@;s@.include.@@;s@.include.@@;s@.*spin.*lock.*@@;s@.*SPINLOCK.*@@;s@]\+\).*@\"\1\"@" $< > $@ + +quiet_cmd_copy_zlibheader = COPY $@ + cmd_copy_zlibheader = sed "s@]\+\).*@\"\1\"@" $< > $@ +# stddef.h for NULL +quiet_cmd_copy_zliblinuxheader = COPY $@ + cmd_copy_zliblinuxheader = sed "s@.include.@@;s@.include.@@;s@@@;s@]\+\).*@\"\1\"@" $< > $@ $(addprefix $(obj)/,$(zlib)): $(obj)/%: $(srctree)/lib/zlib_inflate/% $(call cmd,copy_zlib) -clean-files := $(zlib) +$(addprefix $(obj)/,$(zlibheader)): $(obj)/%: $(srctree)/lib/zlib_inflate/% + $(call cmd,copy_zlibheader) + +$(addprefix $(obj)/,$(zliblinuxheader)): $(obj)/%: $(srctree)/include/linux/% + $(call cmd,copy_zliblinuxheader) + +clean-files := $(zlib) $(zlibheader) $(zliblinuxheader) + +quiet_cmd_bootcc = BOOTCC $@ + cmd_bootcc = $(CC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $< + +quiet_cmd_bootas = BOOTAS $@ + cmd_bootas = $(CC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $< + +$(patsubst %.c,%.o, $(filter %.c, $(src-boot))): %.o: %.c + $(call if_changed_dep,bootcc) +$(patsubst %.S,%.o, $(filter %.S, $(src-boot))): %.o: %.S + $(call if_changed_dep,bootas) + +lib-y += $(obj-boot) +lib-$(CONFIG_VGA_CONSOLE) += vreset.o kbd.o Index: linux/arch/ppc/kernel/dma-mapping.c =================================================================== --- linux.orig/arch/ppc/kernel/dma-mapping.c +++ linux/arch/ppc/kernel/dma-mapping.c @@ -71,7 +71,7 @@ int map_page(unsigned long va, phys_addr * This is the page table (2MB) covering uncached, DMA consistent allocations */ static pte_t *consistent_pte; -static DEFINE_SPINLOCK(consistent_lock); +static DEFINE_RAW_SPINLOCK(consistent_lock); /* * VM region handling support. @@ -407,7 +407,7 @@ static inline void __dma_sync_page_highm int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE; int seg_nr = 0; - local_irq_save(flags); + raw_local_irq_save(flags); do { start = (unsigned long)kmap_atomic(page + seg_nr, @@ -426,7 +426,7 @@ static inline void __dma_sync_page_highm seg_offset = 0; } while (seg_nr < nr_segs); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #endif /* CONFIG_HIGHMEM */ Index: linux/arch/ppc/kernel/entry.S =================================================================== --- linux.orig/arch/ppc/kernel/entry.S +++ linux/arch/ppc/kernel/entry.S @@ -240,7 +240,7 @@ ret_from_syscall: SYNC MTMSRD(r10) lwz r9,TI_FLAGS(r12) - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED) + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne- syscall_exit_work syscall_exit_cont: #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) @@ -318,7 +318,7 @@ syscall_exit_work: rlwinm r12,r1,0,0,18 /* current_thread_info() */ lwz r9,TI_FLAGS(r12) 5: - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne 1f lwz r5,_MSR(r1) andi. r5,r5,MSR_PR @@ -658,7 +658,7 @@ user_exc_return: /* r10 contains MSR_KE /* Check current_thread_info()->flags */ rlwinm r9,r1,0,0,18 lwz r9,TI_FLAGS(r9) - andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED) + andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne do_work restore_user: @@ -876,7 +876,7 @@ load_dbcr0: #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ do_work: /* r10 contains MSR_KERNEL here */ - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ @@ -890,7 +890,7 @@ recheck: MTMSRD(r10) /* disable interrupts */ rlwinm r9,r1,0,0,18 lwz r9,TI_FLAGS(r9) - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne- do_resched andi. r0,r9,_TIF_SIGPENDING beq restore_user @@ -1000,3 +1000,85 @@ machine_check_in_rtas: /* XXX load up BATs and panic */ #endif /* CONFIG_PPC_OF */ + +#ifdef CONFIG_MCOUNT + +/* + * mcount() is not the same as _mcount(). The callers of mcount() have a + * normal context. The callers of _mcount() do not have a stack frame and + * have not saved the "caller saves" registers. + */ +_GLOBAL(mcount) + stwu r1,-16(r1) + mflr r3 + lis r5,mcount_enabled@ha + lwz r5,mcount_enabled@l(r5) + stw r3,20(r1) + cmpwi r5,0 + beq 1f + /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */ + lwz r4,16(r1) + lwz r4,4(r4) + bl __trace +1: + lwz r0,20(r1) + mtlr r0 + addi r1,r1,16 + blr + +/* + * The -pg flag, which is specified in the case of CONFIG_MCOUNT, causes the + * C compiler to add a call to _mcount() at the start of each function preamble, + * before the stack frame is created. An example of this preamble code is: + * + * mflr r0 + * lis r12,-16354 + * stw r0,4(r1) + * addi r0,r12,-19652 + * bl 0xc00034c8 <_mcount> + * mflr r0 + * stwu r1,-16(r1) + */ +_GLOBAL(_mcount) +#define M_STK_SIZE 48 + /* Would not expect to need to save cr, but glibc version of */ + /* _mcount() does, so cautiously saving it here too. */ + stwu r1,-M_STK_SIZE(r1) + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + mflr r3 /* will use as first arg to __trace() */ + mfcr r4 + lis r5,mcount_enabled@ha + lwz r5,mcount_enabled@l(r5) + cmpwi r5,0 + stw r3, 44(r1) /* lr */ + stw r4, 8(r1) /* cr */ + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + beq 1f + /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */ + lwz r4,M_STK_SIZE+4(r1) + bl __trace +1: + lwz r8, 8(r1) /* cr */ + lwz r9, 44(r1) /* lr */ + lwz r3, 12(r1) + lwz r4, 16(r1) + lwz r5, 20(r1) + mtcrf 0xff,r8 + mtctr r9 + lwz r0, 52(r1) + lwz r6, 24(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + lwz r9, 36(r1) + lwz r10,40(r1) + addi r1,r1,M_STK_SIZE + mtlr r0 + bctr + +#endif /* CONFIG_MCOUNT */ Index: linux/arch/ppc/kernel/idle.c =================================================================== --- linux.orig/arch/ppc/kernel/idle.c +++ linux/arch/ppc/kernel/idle.c @@ -40,7 +40,7 @@ void default_idle(void) powersave = ppc_md.power_save; - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { if (powersave != NULL) powersave(); #ifdef CONFIG_SMP @@ -52,8 +52,11 @@ void default_idle(void) } #endif } - if (need_resched()) - schedule(); + if (need_resched()) { + raw_local_irq_disable(); + __schedule(); + raw_local_irq_enable(); + } if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); } @@ -63,11 +66,15 @@ void default_idle(void) */ void cpu_idle(void) { - for (;;) + for (;;) { + BUG_ON(raw_irqs_disabled()); + stop_critical_timing(); + propagate_preempt_locks_value(); if (ppc_md.idle != NULL) ppc_md.idle(); else default_idle(); + } } #if defined(CONFIG_SYSCTL) && defined(CONFIG_6xx) Index: linux/arch/ppc/kernel/irq.c =================================================================== --- linux.orig/arch/ppc/kernel/irq.c +++ linux/arch/ppc/kernel/irq.c @@ -138,6 +138,7 @@ skip: void do_IRQ(struct pt_regs *regs) { int irq, first = 1; + irq_enter(); /* @@ -149,6 +150,7 @@ void do_IRQ(struct pt_regs *regs) * has already been handled. -- Tom */ while ((irq = ppc_md.get_irq(regs)) >= 0) { + trace_special(regs->nip, irq, 0); __do_IRQ(irq, regs); first = 0; } Index: linux/arch/ppc/kernel/misc.S =================================================================== --- linux.orig/arch/ppc/kernel/misc.S +++ linux/arch/ppc/kernel/misc.S @@ -302,8 +302,8 @@ _GLOBAL(local_save_flags_ptr) nop _GLOBAL(local_save_flags_ptr_end) -/* void local_irq_restore(unsigned long flags) */ -_GLOBAL(local_irq_restore) +/* void __raw_local_irq_restore(unsigned long flags) */ +_GLOBAL(__raw_local_irq_restore) /* * Just set/clear the MSR_EE bit through restore/flags but do not * change anything else. This is needed by the RT system and makes @@ -341,9 +341,9 @@ _GLOBAL(local_irq_restore) nop nop nop -_GLOBAL(local_irq_restore_end) +_GLOBAL(__raw_local_irq_restore_end) -_GLOBAL(local_irq_disable) +_GLOBAL(__raw_local_irq_disable) mfmsr r0 /* Get current interrupt state */ rlwinm r3,r0,16+1,32-1,31 /* Extract old value of 'EE' */ rlwinm r0,r0,0,17,15 /* clear MSR_EE in r0 */ @@ -370,9 +370,9 @@ _GLOBAL(local_irq_disable) nop nop nop -_GLOBAL(local_irq_disable_end) +_GLOBAL(__raw_local_irq_disable_end) -_GLOBAL(local_irq_enable) +_GLOBAL(__raw_local_irq_enable) mfmsr r3 /* Get current state */ ori r3,r3,MSR_EE /* Turn on 'EE' bit */ SYNC /* Some chip revs have problems here... */ @@ -399,7 +399,7 @@ _GLOBAL(local_irq_enable) nop nop nop -_GLOBAL(local_irq_enable_end) +_GLOBAL(__raw_local_irq_enable_end) /* * complement mask on the msr then "or" some values on. Index: linux/arch/ppc/kernel/ppc_ksyms.c =================================================================== --- linux.orig/arch/ppc/kernel/ppc_ksyms.c +++ linux/arch/ppc/kernel/ppc_ksyms.c @@ -291,9 +291,11 @@ EXPORT_SYMBOL(console_drivers); EXPORT_SYMBOL(xmon); EXPORT_SYMBOL(xmon_printf); #endif -EXPORT_SYMBOL(__up); -EXPORT_SYMBOL(__down); -EXPORT_SYMBOL(__down_interruptible); +#ifdef CONFIG_ASM_SEMAPHORES +EXPORT_SYMBOL(__compat_up); +EXPORT_SYMBOL(__compat_down); +EXPORT_SYMBOL(__compat_down_interruptible); +#endif #if defined(CONFIG_KGDB) || defined(CONFIG_XMON) extern void (*debugger)(struct pt_regs *regs); Index: linux/arch/ppc/kernel/process.c =================================================================== --- linux.orig/arch/ppc/kernel/process.c +++ linux/arch/ppc/kernel/process.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include #include @@ -52,8 +54,8 @@ struct task_struct *last_task_used_math struct task_struct *last_task_used_altivec = NULL; struct task_struct *last_task_used_spe = NULL; -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); @@ -241,7 +243,7 @@ struct task_struct *__switch_to(struct t unsigned long s; struct task_struct *last; - local_irq_save(s); + raw_local_irq_save(s); #ifdef CHECK_STACK check_stack(prev); check_stack(new); @@ -302,7 +304,7 @@ struct task_struct *__switch_to(struct t new_thread = &new->thread; old_thread = ¤t->thread; last = _switch(old_thread, new_thread); - local_irq_restore(s); + raw_local_irq_restore(s); return last; } Index: linux/arch/ppc/kernel/semaphore.c =================================================================== --- linux.orig/arch/ppc/kernel/semaphore.c +++ linux/arch/ppc/kernel/semaphore.c @@ -29,7 +29,7 @@ * sem->count = tmp; * return old_count; */ -static inline int __sem_update_count(struct semaphore *sem, int incr) +static inline int __sem_update_count(struct compat_semaphore *sem, int incr) { int old_count, tmp; @@ -48,7 +48,7 @@ static inline int __sem_update_count(str return old_count; } -void __up(struct semaphore *sem) +void __compat_up(struct compat_semaphore *sem) { /* * Note that we incremented count in up() before we came here, @@ -70,7 +70,7 @@ void __up(struct semaphore *sem) * Thus it is only when we decrement count from some value > 0 * that we have actually got the semaphore. */ -void __sched __down(struct semaphore *sem) +void __sched __compat_down(struct compat_semaphore *sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -100,7 +100,7 @@ void __sched __down(struct semaphore *se wake_up(&sem->wait); } -int __sched __down_interruptible(struct semaphore * sem) +int __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -129,3 +129,8 @@ int __sched __down_interruptible(struct wake_up(&sem->wait); return retval; } + +int compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} Index: linux/arch/ppc/kernel/signal.c =================================================================== --- linux.orig/arch/ppc/kernel/signal.c +++ linux/arch/ppc/kernel/signal.c @@ -705,6 +705,14 @@ int do_signal(sigset_t *oldset, struct p unsigned long frame, newsp; int signr, ret; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + local_irq_enable(); + preempt_check_resched(); +#endif + if (try_to_freeze()) { signr = 0; if (!signal_pending(current)) Index: linux/arch/ppc/kernel/smp-tbsync.c =================================================================== --- linux.orig/arch/ppc/kernel/smp-tbsync.c +++ linux/arch/ppc/kernel/smp-tbsync.c @@ -49,7 +49,7 @@ smp_generic_take_timebase( void ) { int cmd, tbl, tbu; - local_irq_disable(); + raw_local_irq_disable(); while( !running ) ; rmb(); @@ -78,7 +78,7 @@ smp_generic_take_timebase( void ) } enter_contest( tbsync->mark, -1 ); } - local_irq_enable(); + raw_local_irq_enable(); } static int __devinit @@ -88,7 +88,7 @@ start_contest( int cmd, int offset, int tbsync->cmd = cmd; - local_irq_disable(); + raw_local_irq_disable(); for( i=-3; itbu = tbu = get_tbu(); @@ -114,7 +114,7 @@ start_contest( int cmd, int offset, int if( i++ > 0 ) score += tbsync->race_result; } - local_irq_enable(); + raw_local_irq_enable(); return score; } Index: linux/arch/ppc/kernel/smp.c =================================================================== --- linux.orig/arch/ppc/kernel/smp.c +++ linux/arch/ppc/kernel/smp.c @@ -138,6 +138,16 @@ void smp_send_reschedule(int cpu) smp_message_pass(cpu, PPC_MSG_RESCHEDULE, 0, 0); } +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + smp_message_pass(MSG_ALL_BUT_SELF, PPC_MSG_RESCHEDULE, 0, 0); +} + #ifdef CONFIG_XMON void smp_send_xmon_break(int cpu) { @@ -147,7 +157,7 @@ void smp_send_xmon_break(int cpu) static void stop_this_cpu(void *dummy) { - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } @@ -162,7 +172,7 @@ void smp_send_stop(void) * static memory requirements. It also looks cleaner. * Stolen from the i386 version. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); static struct call_data_struct { void (*func) (void *info); @@ -197,7 +207,7 @@ int smp_call_function(void (*func) (void if (num_online_cpus() <= 1) return 0; /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); + WARN_ON(raw_irqs_disabled()); return __smp_call_function(func, info, wait, MSG_ALL_BUT_SELF); } @@ -357,7 +367,7 @@ int __devinit start_secondary(void *unus cpu_set(cpu, cpu_online_map); spin_unlock(&call_lock); - local_irq_enable(); + raw_local_irq_enable(); cpu_idle(); return 0; Index: linux/arch/ppc/kernel/temp.c =================================================================== --- linux.orig/arch/ppc/kernel/temp.c +++ linux/arch/ppc/kernel/temp.c @@ -142,7 +142,7 @@ static void tau_timeout(void * info) int shrink; /* disabling interrupts *should* be okay */ - local_irq_save(flags); + raw_local_irq_save(flags); cpu = smp_processor_id(); #ifndef CONFIG_TAU_INT @@ -185,7 +185,7 @@ static void tau_timeout(void * info) */ mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void tau_timeout_smp(unsigned long unused) Index: linux/arch/ppc/kernel/time.c =================================================================== --- linux.orig/arch/ppc/kernel/time.c +++ linux/arch/ppc/kernel/time.c @@ -66,10 +66,8 @@ #include -/* XXX false sharing with below? */ -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); +unsigned long cpu_khz; /* Detected as we calibrate the TSC */ +EXPORT_SYMBOL(cpu_khz); unsigned long disarm_decr[NR_CPUS]; @@ -91,7 +89,7 @@ extern unsigned long wall_jiffies; /* used for timezone offset */ static long timezone_offset; -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); @@ -109,7 +107,7 @@ static inline int tb_delta(unsigned *jif } #ifdef CONFIG_SMP -unsigned long profile_pc(struct pt_regs *regs) +unsigned long notrace profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); Index: linux/arch/ppc/kernel/traps.c =================================================================== --- linux.orig/arch/ppc/kernel/traps.c +++ linux/arch/ppc/kernel/traps.c @@ -72,7 +72,7 @@ void (*debugger_fault_handler)(struct pt * Trap & Exception support */ -DEFINE_SPINLOCK(die_lock); +DEFINE_RAW_SPINLOCK(die_lock); void die(const char * str, struct pt_regs * fp, long err) { @@ -113,6 +113,10 @@ void _exception(int signr, struct pt_reg debugger(regs); die("Exception in kernel mode", regs, signr); } +#ifdef CONFIG_PREEMPT_RT + raw_local_irq_enable(); + preempt_check_resched(); +#endif info.si_signo = signr; info.si_errno = 0; info.si_code = code; Index: linux/arch/ppc/lib/locks.c =================================================================== --- linux.orig/arch/ppc/lib/locks.c +++ linux/arch/ppc/lib/locks.c @@ -43,7 +43,7 @@ static inline unsigned long __spin_trylo return ret; } -void _raw_spin_lock(spinlock_t *lock) +void __raw_spin_lock(raw_spinlock_t *lock) { int cpu = smp_processor_id(); unsigned int stuck = INIT_STUCK; @@ -63,9 +63,9 @@ void _raw_spin_lock(spinlock_t *lock) lock->owner_pc = (unsigned long)__builtin_return_address(0); lock->owner_cpu = cpu; } -EXPORT_SYMBOL(_raw_spin_lock); +EXPORT_SYMBOL(__raw_spin_lock); -int _raw_spin_trylock(spinlock_t *lock) +int __raw_spin_trylock(raw_spinlock_t *lock) { if (__spin_trylock(&lock->lock)) return 0; @@ -73,9 +73,9 @@ int _raw_spin_trylock(spinlock_t *lock) lock->owner_pc = (unsigned long)__builtin_return_address(0); return 1; } -EXPORT_SYMBOL(_raw_spin_trylock); +EXPORT_SYMBOL(__raw_spin_trylock); -void _raw_spin_unlock(spinlock_t *lp) +void __raw_spin_unlock(raw_spinlock_t *lp) { if ( !lp->lock ) printk("_spin_unlock(%p): no lock cpu %d curr PC %p %s/%d\n", @@ -89,13 +89,13 @@ void _raw_spin_unlock(spinlock_t *lp) wmb(); lp->lock = 0; } -EXPORT_SYMBOL(_raw_spin_unlock); +EXPORT_SYMBOL(__raw_spin_unlock); /* * For rwlocks, zero is unlocked, -1 is write-locked, * positive is read-locked. */ -static __inline__ int __read_trylock(rwlock_t *rw) +static __inline__ int __read_trylock(raw_rwlock_t *rw) { signed int tmp; @@ -115,13 +115,13 @@ static __inline__ int __read_trylock(rwl return tmp; } -int _raw_read_trylock(rwlock_t *rw) +int __raw_read_trylock(raw_rwlock_t *rw) { return __read_trylock(rw) > 0; } -EXPORT_SYMBOL(_raw_read_trylock); +EXPORT_SYMBOL(__raw_read_trylock); -void _raw_read_lock(rwlock_t *rw) +void __raw_read_lock(rwlock_t *rw) { unsigned int stuck; @@ -136,9 +136,9 @@ void _raw_read_lock(rwlock_t *rw) } } } -EXPORT_SYMBOL(_raw_read_lock); +EXPORT_SYMBOL(__raw_read_lock); -void _raw_read_unlock(rwlock_t *rw) +void __raw_read_unlock(raw_rwlock_t *rw) { if ( rw->lock == 0 ) printk("_read_unlock(): %s/%d (nip %08lX) lock %d\n", @@ -147,9 +147,9 @@ void _raw_read_unlock(rwlock_t *rw) wmb(); atomic_dec((atomic_t *) &(rw)->lock); } -EXPORT_SYMBOL(_raw_read_unlock); +EXPORT_SYMBOL(__raw_read_unlock); -void _raw_write_lock(rwlock_t *rw) +void __raw_write_lock(raw_rwlock_t *rw) { unsigned int stuck; @@ -165,18 +165,18 @@ void _raw_write_lock(rwlock_t *rw) } wmb(); } -EXPORT_SYMBOL(_raw_write_lock); +EXPORT_SYMBOL(__raw_write_lock); -int _raw_write_trylock(rwlock_t *rw) +int __raw_write_trylock(raw_rwlock_t *rw) { if (cmpxchg(&rw->lock, 0, -1) != 0) return 0; wmb(); return 1; } -EXPORT_SYMBOL(_raw_write_trylock); +EXPORT_SYMBOL(__raw_write_trylock); -void _raw_write_unlock(rwlock_t *rw) +void __raw_write_unlock(raw_rwlock_t *rw) { if (rw->lock >= 0) printk("_write_lock(): %s/%d (nip %08lX) lock %d\n", @@ -185,6 +185,6 @@ void _raw_write_unlock(rwlock_t *rw) wmb(); rw->lock = 0; } -EXPORT_SYMBOL(_raw_write_unlock); +EXPORT_SYMBOL(__raw_write_unlock); #endif Index: linux/arch/ppc/mm/fault.c =================================================================== --- linux.orig/arch/ppc/mm/fault.c +++ linux/arch/ppc/mm/fault.c @@ -92,7 +92,7 @@ static int store_updates_sp(struct pt_re * the error_code parameter is ESR for a data fault, 0 for an instruction * fault. */ -int do_page_fault(struct pt_regs *regs, unsigned long address, +int notrace do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { struct vm_area_struct * vma; Index: linux/arch/ppc/mm/init.c =================================================================== --- linux.orig/arch/ppc/mm/init.c +++ linux/arch/ppc/mm/init.c @@ -56,7 +56,7 @@ #endif #define MAX_LOW_MEM CONFIG_LOWMEM_SIZE -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); unsigned long total_memory; unsigned long total_lowmem; Index: linux/arch/ppc/platforms/4xx/xilinx_ml300.c =================================================================== --- linux.orig/arch/ppc/platforms/4xx/xilinx_ml300.c +++ linux/arch/ppc/platforms/4xx/xilinx_ml300.c @@ -62,7 +62,7 @@ static volatile unsigned *powerdown_base static void xilinx_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); out_be32(powerdown_base, XPAR_POWER_0_POWERDOWN_VALUE); while (1) ; } Index: linux/arch/ppc/platforms/apus_setup.c =================================================================== --- linux.orig/arch/ppc/platforms/apus_setup.c +++ linux/arch/ppc/platforms/apus_setup.c @@ -282,6 +282,7 @@ void apus_calibrate_decr(void) freq/1000000, freq%1000000); tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; __bus_speed = bus_speed; __speed_test_failed = speed_test_failed; @@ -480,7 +481,7 @@ void cache_clear(__u32 addr, int length) void apus_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); APUS_WRITE(APUS_REG_LOCK, REGLOCK_BLACKMAGICK1|REGLOCK_BLACKMAGICK2); @@ -598,7 +599,7 @@ int __debug_serinit( void ) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* turn off Rx and Tx interrupts */ custom.intena = IF_RBF | IF_TBE; @@ -606,7 +607,7 @@ int __debug_serinit( void ) /* clear any pending interrupt */ custom.intreq = IF_RBF | IF_TBE; - local_irq_restore(flags); + raw_local_irq_restore(flags); /* * set the appropriate directions for the modem control flags, Index: linux/arch/ppc/platforms/chestnut.c =================================================================== --- linux.orig/arch/ppc/platforms/chestnut.c +++ linux/arch/ppc/platforms/chestnut.c @@ -455,7 +455,7 @@ chestnut_restart(char *cmd) { volatile ulong i = 10000000; - local_irq_disable(); + raw_local_irq_disable(); /* * Set CPLD Reg 3 bit 0 to 1 to allow MPP signals on reset to work @@ -474,7 +474,7 @@ chestnut_restart(char *cmd) static void chestnut_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); for (;;); /* NOTREACHED */ } Index: linux/arch/ppc/platforms/chrp_smp.c =================================================================== --- linux.orig/arch/ppc/platforms/chrp_smp.c +++ linux/arch/ppc/platforms/chrp_smp.c @@ -57,7 +57,7 @@ smp_chrp_setup_cpu(int cpu_nr) do_openpic_setup_cpu(); } -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned int timebase_upper = 0, timebase_lower = 0; void __devinit Index: linux/arch/ppc/platforms/chrp_time.c =================================================================== --- linux.orig/arch/ppc/platforms/chrp_time.c +++ linux/arch/ppc/platforms/chrp_time.c @@ -188,4 +188,5 @@ void __init chrp_calibrate_decr(void) freq/1000000, freq%1000000); tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; } Index: linux/arch/ppc/platforms/cpci690.c =================================================================== --- linux.orig/arch/ppc/platforms/cpci690.c +++ linux/arch/ppc/platforms/cpci690.c @@ -321,7 +321,7 @@ cpci690_reset_board(void) { u32 i = 10000; - local_irq_disable(); + raw_local_irq_disable(); out_8((cpci690_br_base + CPCI690_BR_SW_RESET), 0x11); while (i != 0) i++; Index: linux/arch/ppc/platforms/ev64260.c =================================================================== --- linux.orig/arch/ppc/platforms/ev64260.c +++ linux/arch/ppc/platforms/ev64260.c @@ -445,7 +445,7 @@ ev64260_platform_notify(struct device *d static void ev64260_reset_board(void *addr) { - local_irq_disable(); + raw_local_irq_disable(); /* disable and invalidate the L2 cache */ _set_L2CR(0); @@ -513,7 +513,7 @@ ev64260_restart(char *cmd) static void ev64260_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); /* NOTREACHED */ } @@ -552,6 +552,7 @@ ev64260_calibrate_decr(void) tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; return; } Index: linux/arch/ppc/platforms/gemini_setup.c =================================================================== --- linux.orig/arch/ppc/platforms/gemini_setup.c +++ linux/arch/ppc/platforms/gemini_setup.c @@ -302,7 +302,7 @@ void __init gemini_init_l2(void) void gemini_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* make a clean restart, not via the MPIC */ _gemini_reboot(); for(;;); @@ -461,6 +461,7 @@ void __init gemini_calibrate_decr(void) divisor = 4; tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } unsigned long __init gemini_find_end_of_memory(void) Index: linux/arch/ppc/platforms/hdpu.c =================================================================== --- linux.orig/arch/ppc/platforms/hdpu.c +++ linux/arch/ppc/platforms/hdpu.c @@ -473,7 +473,7 @@ static void hdpu_reset_board(void) hdpu_cpustate_set(CPUSTATE_KERNEL_MAJOR | CPUSTATE_KERNEL_RESET); - local_irq_disable(); + raw_local_irq_disable(); /* Clear all the LEDs */ mv64x60_write(&bh, MV64x60_GPP_VALUE_CLR, ((1 << 4) | @@ -515,7 +515,7 @@ static void hdpu_restart(char *cmd) static void hdpu_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); hdpu_cpustate_set(CPUSTATE_KERNEL_MAJOR | CPUSTATE_KERNEL_HALT); Index: linux/arch/ppc/platforms/lopec.c =================================================================== --- linux.orig/arch/ppc/platforms/lopec.c +++ linux/arch/ppc/platforms/lopec.c @@ -162,7 +162,7 @@ lopec_restart(char *cmd) reg |= 0x80; *((unsigned char *) LOPEC_SYSSTAT1) = reg; - local_irq_disable(); + raw_local_irq_disable(); while(1); #undef LOPEC_SYSSTAT1 } @@ -170,7 +170,7 @@ lopec_restart(char *cmd) static void lopec_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while(1); } Index: linux/arch/ppc/platforms/mvme5100.c =================================================================== --- linux.orig/arch/ppc/platforms/mvme5100.c +++ linux/arch/ppc/platforms/mvme5100.c @@ -266,7 +266,7 @@ mvme5100_map_io(void) static void mvme5100_reset_board(void) { - local_irq_disable(); + raw_local_irq_disable(); /* Set exception prefix high - to the firmware */ _nmask_and_or_msr(0, MSR_IP); @@ -290,7 +290,7 @@ mvme5100_restart(char *cmd) static void mvme5100_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); } Index: linux/arch/ppc/platforms/pal4_setup.c =================================================================== --- linux.orig/arch/ppc/platforms/pal4_setup.c +++ linux/arch/ppc/platforms/pal4_setup.c @@ -81,7 +81,7 @@ pal4_show_cpuinfo(struct seq_file *m) static void pal4_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); __asm__ __volatile__("lis 3,0xfff0\n \ ori 3,3,0x100\n \ mtspr 26,3\n \ @@ -95,7 +95,7 @@ pal4_restart(char *cmd) static void pal4_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } Index: linux/arch/ppc/platforms/pmac_cpufreq.c =================================================================== --- linux.orig/arch/ppc/platforms/pmac_cpufreq.c +++ linux/arch/ppc/platforms/pmac_cpufreq.c @@ -285,7 +285,7 @@ static int __pmac pmu_set_cpu_speed(int asm volatile("mtdec %0" : : "r" (0x7fffffff)); /* We can now disable MSR_EE */ - local_irq_save(flags); + raw_local_irq_save(flags); /* Giveup the FPU & vec */ enable_kernel_fp(); @@ -341,7 +341,7 @@ static int __pmac pmu_set_cpu_speed(int openpic_set_priority(pic_prio); /* Let interrupts flow again ... */ - local_irq_restore(flags); + raw_local_irq_restore(flags); #ifdef DEBUG_FREQ debug_calc_bogomips(); Index: linux/arch/ppc/platforms/pmac_feature.c =================================================================== --- linux.orig/arch/ppc/platforms/pmac_feature.c +++ linux/arch/ppc/platforms/pmac_feature.c @@ -63,7 +63,7 @@ extern struct device_node *k2_skiplist[2 * We use a single global lock to protect accesses. Each driver has * to take care of its own locking */ -static DEFINE_SPINLOCK(feature_lock __pmacdata); +static DEFINE_RAW_SPINLOCK(feature_lock __pmacdata); #define LOCK(flags) spin_lock_irqsave(&feature_lock, flags); #define UNLOCK(flags) spin_unlock_irqrestore(&feature_lock, flags); Index: linux/arch/ppc/platforms/pmac_nvram.c =================================================================== --- linux.orig/arch/ppc/platforms/pmac_nvram.c +++ linux/arch/ppc/platforms/pmac_nvram.c @@ -80,7 +80,7 @@ static volatile unsigned char *nvram_dat static int nvram_mult, is_core_99; static int core99_bank = 0; static int nvram_partitions[3]; -static DEFINE_SPINLOCK(nv_lock); +static DEFINE_RAW_SPINLOCK(nv_lock); extern int pmac_newworld; extern int system_running; Index: linux/arch/ppc/platforms/pmac_pic.c =================================================================== --- linux.orig/arch/ppc/platforms/pmac_pic.c +++ linux/arch/ppc/platforms/pmac_pic.c @@ -68,7 +68,7 @@ static int max_irqs __pmacdata; static int max_real_irqs __pmacdata; static u32 level_mask[4] __pmacdata; -static DEFINE_SPINLOCK(pmac_pic_lock __pmacdata); +static DEFINE_RAW_SPINLOCK(pmac_pic_lock __pmacdata); #define GATWICK_IRQ_POOL_SIZE 10 Index: linux/arch/ppc/platforms/pmac_smp.c =================================================================== --- linux.orig/arch/ppc/platforms/pmac_smp.c +++ linux/arch/ppc/platforms/pmac_smp.c @@ -500,8 +500,8 @@ static void __devinit smp_core99_kick_cp return; if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu", 0x346); - local_irq_save(flags); - local_irq_disable(); + raw_local_irq_save(flags); + raw_local_irq_disable(); /* Save reset vector */ save_vector = *vector; @@ -529,7 +529,7 @@ static void __devinit smp_core99_kick_cp *vector = save_vector; flush_icache_range((unsigned long) vector, (unsigned long) vector + 4); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); } @@ -571,7 +571,7 @@ void smp_core99_take_timebase(void) mb(); /* set our stuff the same as the primary */ - local_irq_save(flags); + raw_local_irq_save(flags); set_dec(1); set_tb(pri_tb_hi, pri_tb_lo); last_jiffy_stamp(smp_processor_id()) = pri_tb_stamp; @@ -580,7 +580,7 @@ void smp_core99_take_timebase(void) /* tell the primary we're done */ sec_tb_reset = 0; mb(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* not __init, called in sleep/wakeup code */ @@ -600,7 +600,7 @@ void smp_core99_give_timebase(void) /* freeze the timebase and read it */ /* disable interrupts so the timebase is disabled for the shortest possible time */ - local_irq_save(flags); + raw_local_irq_save(flags); pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 4); pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0); mb(); @@ -624,7 +624,7 @@ void smp_core99_give_timebase(void) /* Now, restart the timebase by leaving the GPIO to an open collector */ pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 0); pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0); - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux/arch/ppc/platforms/pmac_time.c =================================================================== --- linux.orig/arch/ppc/platforms/pmac_time.c +++ linux/arch/ppc/platforms/pmac_time.c @@ -197,6 +197,7 @@ via_calibrate_decr(void) tb_ticks_per_jiffy = (dstart - dend) / ((6 * HZ)/100); tb_to_us = mulhwu_scale_factor(dstart - dend, 60000); + cpu_khz = (dstart - dend) / 60; printk(KERN_INFO "via_calibrate_decr: ticks per jiffy = %u (%u ticks)\n", tb_ticks_per_jiffy, dstart - dend); @@ -288,4 +289,5 @@ pmac_calibrate_decr(void) freq/1000000, freq%1000000); tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; } Index: linux/arch/ppc/platforms/powerpmc250.c =================================================================== --- linux.orig/arch/ppc/platforms/powerpmc250.c +++ linux/arch/ppc/platforms/powerpmc250.c @@ -166,12 +166,13 @@ powerpmc250_calibrate_decr(void) tb_ticks_per_jiffy = freq / (HZ * divisor); tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } static void powerpmc250_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* Hard reset */ writeb(0x11, 0xfe000332); while(1); @@ -180,7 +181,7 @@ powerpmc250_restart(char *cmd) static void powerpmc250_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); } Index: linux/arch/ppc/platforms/pplus.c =================================================================== --- linux.orig/arch/ppc/platforms/pplus.c +++ linux/arch/ppc/platforms/pplus.c @@ -607,7 +607,7 @@ static void pplus_restart(char *cmd) { unsigned long i = 10000; - local_irq_disable(); + raw_local_irq_disable(); /* set VIA IDE controller into native mode */ pplus_set_VIA_IDE_native(); Index: linux/arch/ppc/platforms/prep_setup.c =================================================================== --- linux.orig/arch/ppc/platforms/prep_setup.c +++ linux/arch/ppc/platforms/prep_setup.c @@ -458,7 +458,7 @@ static void __prep prep_restart(char *cmd) { #define PREP_SP92 0x92 /* Special Port 92 */ - local_irq_disable(); /* no interrupts */ + raw_local_irq_disable(); /* no interrupts */ /* set exception prefix high - to the prom */ _nmask_and_or_msr(0, MSR_IP); @@ -476,7 +476,7 @@ prep_restart(char *cmd) static void __prep prep_halt(void) { - local_irq_disable(); /* no interrupts */ + raw_local_irq_disable(); /* no interrupts */ /* set exception prefix high - to the prom */ _nmask_and_or_msr(0, MSR_IP); @@ -544,7 +544,7 @@ prep_sig750_poweroff(void) { /* tweak the power manager found in most IBM PRePs (except Thinkpads) */ - local_irq_disable(); + raw_local_irq_disable(); /* set exception prefix high - to the prom */ _nmask_and_or_msr(0, MSR_IP); @@ -938,6 +938,7 @@ prep_calibrate_decr(void) (freq/divisor)/1000000, (freq/divisor)%1000000); tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; tb_ticks_per_jiffy = freq / HZ / divisor; } } Index: linux/arch/ppc/platforms/prpmc750.c =================================================================== --- linux.orig/arch/ppc/platforms/prpmc750.c +++ linux/arch/ppc/platforms/prpmc750.c @@ -271,18 +271,19 @@ static void __init prpmc750_calibrate_de tb_ticks_per_jiffy = freq / (HZ * divisor); tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } static void prpmc750_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); writeb(PRPMC750_MODRST_MASK, PRPMC750_MODRST_REG); while (1) ; } static void prpmc750_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } Index: linux/arch/ppc/platforms/prpmc800.c =================================================================== --- linux.orig/arch/ppc/platforms/prpmc800.c +++ linux/arch/ppc/platforms/prpmc800.c @@ -330,6 +330,7 @@ static void __init prpmc800_calibrate_de tb_ticks_per_second = 100000000 / 4; tb_ticks_per_jiffy = tb_ticks_per_second / HZ; tb_to_us = mulhwu_scale_factor(tb_ticks_per_second, 1000000); + cpu_khz = tb_ticks_per_second / 1000; return; } @@ -370,13 +371,14 @@ static void __init prpmc800_calibrate_de tb_ticks_per_second = (tbl_end - tbl_start) * 2; tb_ticks_per_jiffy = tb_ticks_per_second / HZ; tb_to_us = mulhwu_scale_factor(tb_ticks_per_second, 1000000); + cpu_khz = tb_ticks_per_second / 1000; } static void prpmc800_restart(char *cmd) { ulong temp; - local_irq_disable(); + raw_local_irq_disable(); temp = in_be32((uint *) HARRIER_MISC_CSR_REG); temp |= HARRIER_RSTOUT; out_be32((uint *) HARRIER_MISC_CSR_REG, temp); @@ -385,7 +387,7 @@ static void prpmc800_restart(char *cmd) static void prpmc800_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } Index: linux/arch/ppc/platforms/radstone_ppc7d.c =================================================================== --- linux.orig/arch/ppc/platforms/radstone_ppc7d.c +++ linux/arch/ppc/platforms/radstone_ppc7d.c @@ -175,7 +175,7 @@ static void ppc7d_power_off(void) { u32 data; - local_irq_disable(); + raw_local_irq_disable(); /* Ensure that internal MV643XX watchdog is disabled. * The Disco watchdog uses MPP17 on this hardware. Index: linux/arch/ppc/platforms/sandpoint.c =================================================================== --- linux.orig/arch/ppc/platforms/sandpoint.c +++ linux/arch/ppc/platforms/sandpoint.c @@ -544,7 +544,7 @@ sandpoint_map_io(void) static void sandpoint_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* Set exception prefix high - to the firmware */ _nmask_and_or_msr(0, MSR_IP); @@ -558,7 +558,7 @@ sandpoint_restart(char *cmd) static void sandpoint_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); /* No way to shut power off with software */ /* NOTREACHED */ } Index: linux/arch/ppc/platforms/sbc82xx.c =================================================================== --- linux.orig/arch/ppc/platforms/sbc82xx.c +++ linux/arch/ppc/platforms/sbc82xx.c @@ -68,7 +68,7 @@ static void sbc82xx_time_init(void) static volatile char *sbc82xx_i8259_map; static char sbc82xx_i8259_mask = 0xff; -static DEFINE_SPINLOCK(sbc82xx_i8259_lock); +static DEFINE_RAW_SPINLOCK(sbc82xx_i8259_lock); static void sbc82xx_i8259_mask_and_ack_irq(unsigned int irq_nr) { Index: linux/arch/ppc/platforms/spruce.c =================================================================== --- linux.orig/arch/ppc/platforms/spruce.c +++ linux/arch/ppc/platforms/spruce.c @@ -150,6 +150,7 @@ spruce_calibrate_decr(void) freq = SPRUCE_BUS_SPEED; tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } static int @@ -236,7 +237,7 @@ spruce_setup_arch(void) static void spruce_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* SRR0 has system reset vector, SRR1 has default MSR value */ /* rfi restores MSR from SRR1 and sets the PC to the SRR0 value */ Index: linux/arch/ppc/syslib/cpm2_common.c =================================================================== --- linux.orig/arch/ppc/syslib/cpm2_common.c +++ linux/arch/ppc/syslib/cpm2_common.c @@ -114,7 +114,7 @@ cpm2_fastbrg(uint brg, uint rate, int di /* * dpalloc / dpfree bits. */ -static spinlock_t cpm_dpmem_lock; +static raw_spinlock_t cpm_dpmem_lock; /* 16 blocks should be enough to satisfy all requests * until the memory subsystem goes up... */ static rh_block_t cpm_boot_dpmem_rh_block[16]; Index: linux/arch/ppc/syslib/ibm440gx_common.c =================================================================== --- linux.orig/arch/ppc/syslib/ibm440gx_common.c +++ linux/arch/ppc/syslib/ibm440gx_common.c @@ -157,7 +157,7 @@ void __init ibm440gx_l2c_enable(void){ return; } - local_irq_save(flags); + raw_local_irq_save(flags); asm volatile ("sync" ::: "memory"); /* Disable SRAM */ @@ -201,7 +201,7 @@ void __init ibm440gx_l2c_enable(void){ mtdcr(DCRN_L2C0_CFG, r); asm volatile ("sync; isync" ::: "memory"); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* Disable L2 cache */ @@ -209,7 +209,7 @@ void __init ibm440gx_l2c_disable(void){ u32 r; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); asm volatile ("sync" ::: "memory"); /* Disable L2C mode */ @@ -228,7 +228,7 @@ void __init ibm440gx_l2c_disable(void){ SRAM_SBCR_BAS3 | SRAM_SBCR_BS_64KB | SRAM_SBCR_BU_RW); asm volatile ("sync; isync" ::: "memory"); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __init ibm440gx_l2c_setup(struct ibm44x_clocks* p) Index: linux/arch/ppc/syslib/ibm44x_common.c =================================================================== --- linux.orig/arch/ppc/syslib/ibm44x_common.c +++ linux/arch/ppc/syslib/ibm44x_common.c @@ -60,6 +60,7 @@ void __init ibm44x_calibrate_decr(unsign { tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; /* Set the time base to zero */ mtspr(SPRN_TBWL, 0); @@ -76,19 +77,19 @@ extern void abort(void); static void ibm44x_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); abort(); } static void ibm44x_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } static void ibm44x_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } Index: linux/arch/ppc/syslib/m8260_pci_erratum9.c =================================================================== --- linux.orig/arch/ppc/syslib/m8260_pci_erratum9.c +++ linux/arch/ppc/syslib/m8260_pci_erratum9.c @@ -132,7 +132,7 @@ idma_pci9_read(u8 *dst, u8 *src, int byt volatile idma_bd_t *bd = &idma_dpram->bd; volatile cpm2_map_t *immap = cpm2_immr; - local_irq_save(flags); + raw_local_irq_save(flags); /* initialize IDMA parameter RAM for this transfer */ if (sinc) @@ -161,7 +161,7 @@ idma_pci9_read(u8 *dst, u8 *src, int byt /* wait for transfer to complete */ while(bd->flags & IDMA_BD_V); - local_irq_restore(flags); + raw_local_irq_restore(flags); return; } @@ -184,7 +184,7 @@ idma_pci9_write(u8 *dst, u8 *src, int by volatile idma_bd_t *bd = &idma_dpram->bd; volatile cpm2_map_t *immap = cpm2_immr; - local_irq_save(flags); + raw_local_irq_save(flags); /* initialize IDMA parameter RAM for this transfer */ if (dinc) @@ -213,7 +213,7 @@ idma_pci9_write(u8 *dst, u8 *src, int by /* wait for transfer to complete */ while(bd->flags & IDMA_BD_V); - local_irq_restore(flags); + raw_local_irq_restore(flags); return; } Index: linux/arch/ppc/syslib/m8260_setup.c =================================================================== --- linux.orig/arch/ppc/syslib/m8260_setup.c +++ linux/arch/ppc/syslib/m8260_setup.c @@ -78,6 +78,7 @@ m8260_calibrate_decr(void) divisor = 4; tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } /* The 8260 has an internal 1-second timer update register that @@ -128,7 +129,7 @@ m8260_restart(char *cmd) static void m8260_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); } Index: linux/arch/ppc/syslib/m8xx_setup.c =================================================================== --- linux.orig/arch/ppc/syslib/m8xx_setup.c +++ linux/arch/ppc/syslib/m8xx_setup.c @@ -159,6 +159,7 @@ void __init m8xx_calibrate_decr(void) printk("Decrementer Frequency = %d/%d\n", freq, divisor); tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; /* Perform some more timer/timebase initialization. This used * to be done elsewhere, but other changes caused it to get @@ -234,7 +235,7 @@ m8xx_restart(char *cmd) { __volatile__ unsigned char dummy; - local_irq_disable(); + raw_local_irq_disable(); ((immap_t *)IMAP_ADDR)->im_clkrst.car_plprcr |= 0x00000080; /* Clear the ME bit in MSR to cause checkstop on machine check Index: linux/arch/ppc/syslib/mpc52xx_setup.c =================================================================== --- linux.orig/arch/ppc/syslib/mpc52xx_setup.c +++ linux/arch/ppc/syslib/mpc52xx_setup.c @@ -40,7 +40,7 @@ mpc52xx_restart(char *cmd) { struct mpc52xx_gpt __iomem *gpt0 = MPC52xx_VA(MPC52xx_GPTx_OFFSET(0)); - local_irq_disable(); + raw_local_irq_disable(); /* Turn on the watchdog and wait for it to expire. It effectively does a reset */ @@ -53,7 +53,7 @@ mpc52xx_restart(char *cmd) void mpc52xx_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); } @@ -214,6 +214,7 @@ mpc52xx_calibrate_decr(void) tb_ticks_per_jiffy = xlbfreq / HZ / divisor; tb_to_us = mulhwu_scale_factor(xlbfreq / divisor, 1000000); + cpu_khz = (xlbfreq / divisor) / 1000; } int mpc52xx_match_psc_function(int psc_idx, const char *func) Index: linux/arch/ppc/syslib/ocp.c =================================================================== --- linux.orig/arch/ppc/syslib/ocp.c +++ linux/arch/ppc/syslib/ocp.c @@ -45,11 +45,11 @@ #include #include #include +#include #include #include #include -#include #include //#define DBG(x) printk x Index: linux/arch/ppc/syslib/open_pic.c =================================================================== --- linux.orig/arch/ppc/syslib/open_pic.c +++ linux/arch/ppc/syslib/open_pic.c @@ -528,7 +528,7 @@ void openpic_reset_processor_phys(u_int } #if defined(CONFIG_SMP) || defined(CONFIG_PM) -static DEFINE_SPINLOCK(openpic_setup_lock); +static DEFINE_RAW_SPINLOCK(openpic_setup_lock); #endif #ifdef CONFIG_SMP Index: linux/arch/ppc/syslib/open_pic2.c =================================================================== --- linux.orig/arch/ppc/syslib/open_pic2.c +++ linux/arch/ppc/syslib/open_pic2.c @@ -382,7 +382,7 @@ static void openpic2_set_spurious(u_int vec); } -static DEFINE_SPINLOCK(openpic2_setup_lock); +static DEFINE_RAW_SPINLOCK(openpic2_setup_lock); /* * Initialize a timer interrupt (and disable it) Index: linux/arch/ppc/syslib/ppc4xx_setup.c =================================================================== --- linux.orig/arch/ppc/syslib/ppc4xx_setup.c +++ linux/arch/ppc/syslib/ppc4xx_setup.c @@ -142,7 +142,7 @@ static void ppc4xx_power_off(void) { printk("System Halted\n"); - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } @@ -150,7 +150,7 @@ static void ppc4xx_halt(void) { printk("System Halted\n"); - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } @@ -173,6 +173,7 @@ ppc4xx_calibrate_decr(void) freq = bip->bi_tbfreq; tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; /* Set the time base to zero. ** At 200 Mhz, time base will rollover in ~2925 years. Index: linux/arch/ppc/syslib/ppc83xx_setup.c =================================================================== --- linux.orig/arch/ppc/syslib/ppc83xx_setup.c +++ linux/arch/ppc/syslib/ppc83xx_setup.c @@ -137,7 +137,7 @@ mpc83xx_restart(char *cmd) reg = ioremap(BCSR_PHYS_ADDR, BCSR_SIZE); - local_irq_disable(); + raw_local_irq_disable(); /* * Unlock the BCSR bits so a PRST will update the contents. @@ -166,14 +166,14 @@ mpc83xx_restart(char *cmd) void mpc83xx_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } void mpc83xx_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } Index: linux/arch/ppc/syslib/ppc85xx_setup.c =================================================================== --- linux.orig/arch/ppc/syslib/ppc85xx_setup.c +++ linux/arch/ppc/syslib/ppc85xx_setup.c @@ -59,6 +59,7 @@ mpc85xx_calibrate_decr(void) divisor = 8; tb_ticks_per_jiffy = freq / divisor / HZ; tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; /* Set the time base to zero */ mtspr(SPRN_TBWL, 0); @@ -114,21 +115,21 @@ mpc85xx_early_serial_map(void) void mpc85xx_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); abort(); } void mpc85xx_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } void mpc85xx_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } Index: linux/arch/ppc/syslib/prom.c =================================================================== --- linux.orig/arch/ppc/syslib/prom.c +++ linux/arch/ppc/syslib/prom.c @@ -1397,7 +1397,7 @@ print_properties(struct device_node *np) } #endif -static DEFINE_SPINLOCK(rtas_lock); +static DEFINE_RAW_SPINLOCK(rtas_lock); /* this can be called after setup -- Cort */ int __openfirmware Index: linux/arch/ppc/syslib/todc_time.c =================================================================== --- linux.orig/arch/ppc/syslib/todc_time.c +++ linux/arch/ppc/syslib/todc_time.c @@ -508,6 +508,7 @@ todc_calibrate_decr(void) tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; return; } Index: linux/arch/ppc/xmon/xmon.c =================================================================== --- linux.orig/arch/ppc/xmon/xmon.c +++ linux/arch/ppc/xmon/xmon.c @@ -291,10 +291,10 @@ irqreturn_t xmon_irq(int irq, void *d, struct pt_regs *regs) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); printf("Keyboard interrupt\n"); xmon(regs); - local_irq_restore(flags); + raw_local_irq_restore(flags); return IRQ_HANDLED; } Index: linux/arch/ppc64/kernel/time.c =================================================================== --- linux.orig/arch/ppc64/kernel/time.c +++ linux/arch/ppc64/kernel/time.c @@ -68,10 +68,6 @@ #include #include -u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - /* keep track of when we need to update the rtc */ time_t last_rtc_update; extern int piranha_simulator; Index: linux/arch/s390/kernel/time.c =================================================================== --- linux.orig/arch/s390/kernel/time.c +++ linux/arch/s390/kernel/time.c @@ -49,10 +49,6 @@ #define TICK_SIZE tick -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - static ext_int_info_t ext_int_info_cc; static u64 init_timer_cc; static u64 jiffies_timer_cc; Index: linux/arch/sh/kernel/time.c =================================================================== --- linux.orig/arch/sh/kernel/time.c +++ linux/arch/sh/kernel/time.c @@ -56,10 +56,6 @@ extern unsigned long wall_jiffies; #define TICK_SIZE (tick_nsec / 1000) DEFINE_SPINLOCK(tmu0_lock); -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - /* XXX: Can we initialize this in a routine somewhere? Dreamcast doesn't want * these routines anywhere... */ #ifdef CONFIG_SH_RTC Index: linux/arch/sh64/kernel/time.c =================================================================== --- linux.orig/arch/sh64/kernel/time.c +++ linux/arch/sh64/kernel/time.c @@ -116,8 +116,6 @@ extern unsigned long wall_jiffies; -u64 jiffies_64 = INITIAL_JIFFIES; - static unsigned long tmu_base, rtc_base; unsigned long cprc_base; Index: linux/arch/sparc/kernel/time.c =================================================================== --- linux.orig/arch/sparc/kernel/time.c +++ linux/arch/sparc/kernel/time.c @@ -45,10 +45,6 @@ extern unsigned long wall_jiffies; -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - DEFINE_SPINLOCK(rtc_lock); enum sparc_clock_type sp_clock_typ; DEFINE_SPINLOCK(mostek_lock); Index: linux/arch/sparc64/kernel/time.c =================================================================== --- linux.orig/arch/sparc64/kernel/time.c +++ linux/arch/sparc64/kernel/time.c @@ -55,10 +55,6 @@ unsigned long ds1287_regs = 0UL; extern unsigned long wall_jiffies; -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - static void __iomem *mstk48t08_regs; static void __iomem *mstk48t59_regs; Index: linux/arch/um/kernel/time.c =================================================================== --- linux.orig/arch/um/kernel/time.c +++ linux/arch/um/kernel/time.c @@ -114,8 +114,8 @@ void time_init(void) wall_to_monotonic.tv_nsec = -now.tv_nsec; } -/* Declared in linux/time.h, which can't be included here */ -extern void clock_was_set(void); +/* Defined in linux/ktimer.h, which can't be included here */ +#define clock_was_set() do { } while (0) void do_gettimeofday(struct timeval *tv) { Index: linux/arch/um/kernel/time_kern.c =================================================================== --- linux.orig/arch/um/kernel/time_kern.c +++ linux/arch/um/kernel/time_kern.c @@ -22,10 +22,6 @@ #include "mode.h" #include "os.h" -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - int hz(void) { return(HZ); Index: linux/arch/v850/kernel/time.c =================================================================== --- linux.orig/arch/v850/kernel/time.c +++ linux/arch/v850/kernel/time.c @@ -26,10 +26,6 @@ #include "mach.h" -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - #define TICK_SIZE (tick_nsec / 1000) /* Index: linux/arch/x86_64/Kconfig =================================================================== --- linux.orig/arch/x86_64/Kconfig +++ linux/arch/x86_64/Kconfig @@ -24,6 +24,18 @@ config X86 bool default y +config GENERIC_TIME + bool + default y + +config GENERIC_TIME_VSYSCALL + bool + default y + +config PARANOID_GENERIC_TIME + default y + bool "Paraniod Timekeeping Checks" + config SEMAPHORE_SLEEPERS bool default y @@ -38,13 +50,6 @@ config ISA config SBUS bool -config RWSEM_GENERIC_SPINLOCK - bool - default y - -config RWSEM_XCHGADD_ALGORITHM - bool - config GENERIC_CALIBRATE_DELAY bool default y @@ -226,6 +231,14 @@ config SCHED_SMT source "kernel/Kconfig.preempt" +config RWSEM_GENERIC_SPINLOCK + bool + depends on PREEMPT_RT + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + config K8_NUMA bool "K8 NUMA support" select NUMA Index: linux/arch/x86_64/Kconfig.debug =================================================================== --- linux.orig/arch/x86_64/Kconfig.debug +++ linux/arch/x86_64/Kconfig.debug @@ -33,6 +33,14 @@ config IOMMU_DEBUG options. See Documentation/x86_64/boot-options.txt for more details. +config DEBUG_STACKOVERFLOW + bool "Check for stack overflows" + depends on DEBUG_KERNEL + default y + help + This option will cause messages to be printed if free stack space + drops below a certain limit. + config KPROBES bool "Kprobes" depends on DEBUG_KERNEL Index: linux/arch/x86_64/boot/compressed/misc.c =================================================================== --- linux.orig/arch/x86_64/boot/compressed/misc.c +++ linux/arch/x86_64/boot/compressed/misc.c @@ -114,6 +114,7 @@ static char *vidmem = (char *)0xb8000; static int vidport; static int lines, cols; +#define ZLIB_INFLATE_NO_INFLATE_LOCK #include "../../../../lib/inflate.c" static void *malloc(int size) Index: linux/arch/x86_64/ia32/sys_ia32.c =================================================================== --- linux.orig/arch/x86_64/ia32/sys_ia32.c +++ linux/arch/x86_64/ia32/sys_ia32.c @@ -456,6 +456,10 @@ sys32_settimeofday(struct compat_timeval struct timespec kts; struct timezone ktz; + int ret = timeofday_API_hacks(tv, tz); + if (ret != 1) + return ret; + if (tv) { if (get_tv32(&ktv, tv)) return -EFAULT; Index: linux/arch/x86_64/kernel/Makefile =================================================================== --- linux.orig/arch/x86_64/kernel/Makefile +++ linux/arch/x86_64/kernel/Makefile @@ -29,7 +29,7 @@ obj-$(CONFIG_GART_IOMMU) += pci-gart.o a obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o +obj-$(CONFIG_SYSFS) += switch2poll.o obj-$(CONFIG_MODULES) += module.o Index: linux/arch/x86_64/kernel/apic.c =================================================================== --- linux.orig/arch/x86_64/kernel/apic.c +++ linux/arch/x86_64/kernel/apic.c @@ -485,10 +485,9 @@ static int lapic_suspend(struct sys_devi apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); - local_save_flags(flags); - local_irq_disable(); + raw_local_irq_save(flags); disable_local_APIC(); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -503,7 +502,7 @@ static int lapic_resume(struct sys_devic /* XXX: Pavel needs this for S3 resume, but can't explain why */ set_fixmap_nocache(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); - local_irq_save(flags); + raw_local_irq_save(flags); rdmsr(MSR_IA32_APICBASE, l, h); l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; @@ -526,7 +525,7 @@ static int lapic_resume(struct sys_devic apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -680,7 +679,7 @@ static void setup_APIC_timer(unsigned in { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* For some reasons this doesn't work on Simics, so fake it for now */ if (!strstr(boot_cpu_data.x86_model_id, "Screwdriver")) { @@ -710,7 +709,7 @@ static void setup_APIC_timer(unsigned in __setup_APIC_LVTT(clocks); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -767,7 +766,7 @@ void __init setup_boot_APIC_clock (void) printk(KERN_INFO "Using local APIC timer interrupts.\n"); using_apic_timer = 1; - local_irq_disable(); + raw_local_irq_disable(); calibration_result = calibrate_APIC_clock(); /* @@ -775,14 +774,14 @@ void __init setup_boot_APIC_clock (void) */ setup_APIC_timer(calibration_result); - local_irq_enable(); + raw_local_irq_enable(); } void __cpuinit setup_secondary_APIC_clock(void) { - local_irq_disable(); /* FIXME: Do we need this? --RR */ + raw_local_irq_disable(); /* FIXME: Do we need this? --RR */ setup_APIC_timer(calibration_result); - local_irq_enable(); + raw_local_irq_enable(); } void __cpuinit disable_APIC_timer(void) Index: linux/arch/x86_64/kernel/early_printk.c =================================================================== --- linux.orig/arch/x86_64/kernel/early_printk.c +++ linux/arch/x86_64/kernel/early_printk.c @@ -206,7 +206,7 @@ static int early_console_initialized = 0 void early_printk(const char *fmt, ...) { - char buf[512]; + static char buf[512]; int n; va_list ap; Index: linux/arch/x86_64/kernel/entry.S =================================================================== --- linux.orig/arch/x86_64/kernel/entry.S +++ linux/arch/x86_64/kernel/entry.S @@ -48,6 +48,15 @@ #define retint_kernel retint_restore_args #endif +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING +# define CALL_TRACE_IRQS_ON \ + push %rbp; \ + mov %rsp, %rbp; \ + call trace_irqs_on; \ + leaveq +#else +# define CALL_TRACE_IRQS_ON +#endif /* * C code is not supposed to know about undefined top of stack. Every time * a C function with an pt_regs argument is called from the SYSCALL based @@ -230,8 +239,8 @@ sysret_check: /* edx: work, edi: workmask */ sysret_careful: CFI_RESTORE_STATE - bt $TIF_NEED_RESCHED,%edx - jnc sysret_signal + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edx + jz sysret_signal sti pushq %rdi CFI_ADJUST_CFA_OFFSET 8 @@ -252,7 +261,7 @@ sysret_signal: leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 xorl %esi,%esi # oldset -> arg2 call ptregscall_common -1: movl $_TIF_NEED_RESCHED,%edi +1: movl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edi jmp sysret_check badsys: @@ -319,8 +328,8 @@ int_with_check: /* First do a reschedule test. */ /* edx: work, edi: workmask */ int_careful: - bt $TIF_NEED_RESCHED,%edx - jnc int_very_careful + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edx + jz int_very_careful sti pushq %rdi CFI_ADJUST_CFA_OFFSET 8 @@ -353,7 +362,7 @@ int_signal: movq %rsp,%rdi # &ptregs -> arg1 xorl %esi,%esi # oldset -> arg2 call do_notify_resume -1: movl $_TIF_NEED_RESCHED,%edi +1: movl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edi int_restore_rest: RESTORE_REST cli @@ -554,8 +563,8 @@ bad_iret: /* edi: workmask, edx: work */ retint_careful: CFI_RESTORE_STATE - bt $TIF_NEED_RESCHED,%edx - jnc retint_signal + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edx + jz retint_signal sti pushq %rdi CFI_ADJUST_CFA_OFFSET 8 @@ -577,7 +586,7 @@ retint_signal: call do_notify_resume RESTORE_REST cli - movl $_TIF_NEED_RESCHED,%edi + movl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edi GET_THREAD_INFO(%rcx) jmp retint_check @@ -593,6 +602,7 @@ retint_kernel: bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ jnc retint_restore_args call preempt_schedule_irq + CALL_TRACE_IRQS_ON jmp exit_intr #endif CFI_ENDPROC @@ -1038,3 +1048,41 @@ ENTRY(call_softirq) CFI_ADJUST_CFA_OFFSET -8 ret CFI_ENDPROC + +#ifdef CONFIG_LATENCY_TRACE + +ENTRY(mcount) + cmpq $0, trace_enabled + jz out + + push %rbp + mov %rsp,%rbp + + push %r9 + push %r8 + push %rdi + push %rsi + push %rdx + push %rcx + push %rax + + mov 0x0(%rbp),%rax + mov 0x8(%rbp),%rdi + mov 0x8(%rax),%rsi + + call __trace + + pop %rax + pop %rcx + pop %rdx + pop %rsi + pop %rdi + pop %r8 + pop %r9 + + leaveq +out: + ret + +#endif + Index: linux/arch/x86_64/kernel/genapic_flat.c =================================================================== --- linux.orig/arch/x86_64/kernel/genapic_flat.c +++ linux/arch/x86_64/kernel/genapic_flat.c @@ -50,8 +50,8 @@ static void flat_send_IPI_mask(cpumask_t unsigned long cfg; unsigned long flags; - local_save_flags(flags); - local_irq_disable(); + raw_local_save_flags(flags); + raw_local_irq_disable(); /* * Wait for idle. @@ -73,7 +73,7 @@ static void flat_send_IPI_mask(cpumask_t * Send the IPI. The write to APIC_ICR fires this off. */ apic_write(APIC_ICR, cfg); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void flat_send_IPI_allbutself(int vector) Index: linux/arch/x86_64/kernel/i8259.c =================================================================== --- linux.orig/arch/x86_64/kernel/i8259.c +++ linux/arch/x86_64/kernel/i8259.c @@ -127,7 +127,7 @@ void (*interrupt[NR_IRQS])(void) = { * moves to arch independent land */ -DEFINE_SPINLOCK(i8259A_lock); +DEFINE_RAW_SPINLOCK(i8259A_lock); static void end_8259A_irq (unsigned int irq) { @@ -448,7 +448,7 @@ device_initcall(i8259A_init_sysfs); * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { no_action, SA_NODELAY, CPU_MASK_NONE, "cascade", NULL, NULL}; void __init init_ISA_irqs (void) { Index: linux/arch/x86_64/kernel/init_task.c =================================================================== --- linux.orig/arch/x86_64/kernel/init_task.c +++ linux/arch/x86_64/kernel/init_task.c @@ -10,8 +10,8 @@ #include #include -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); Index: linux/arch/x86_64/kernel/io_apic.c =================================================================== --- linux.orig/arch/x86_64/kernel/io_apic.c +++ linux/arch/x86_64/kernel/io_apic.c @@ -46,7 +46,7 @@ static int no_timer_check; int disable_timer_pin_1 __initdata; -static DEFINE_SPINLOCK(ioapic_lock); +static DEFINE_RAW_SPINLOCK(ioapic_lock); /* * # of IRQ routing registers @@ -93,6 +93,9 @@ int vector_irq[NR_VECTORS] __read_mostly reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ reg ACTION; \ io_apic_modify(entry->apic, reg); \ + /* Force POST flush by reading: */ \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + \ if (!entry->next) \ break; \ entry = irq_2_pin + entry->next; \ @@ -156,10 +159,8 @@ static void add_pin_to_irq(unsigned int static void name##_IO_APIC_irq (unsigned int irq) \ __DO_ACTION(R, ACTION, FINAL) -DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) ) - /* mask = 1 */ -DO_ACTION( __unmask, 0, &= 0xfffeffff, ) - /* mask = 0 */ +DO_ACTION( __mask, 0, |= 0x00010000, ) /* mask = 1 */ +DO_ACTION( __unmask, 0, &= 0xfffeffff, ) /* mask = 0 */ static void mask_IO_APIC_irq (unsigned int irq) { @@ -1273,7 +1274,7 @@ static int __init timer_irq_works(void) { unsigned long t1 = jiffies; - local_irq_enable(); + raw_local_irq_enable(); /* Let ten ticks pass... */ mdelay((10 * 1000) / HZ); @@ -1366,12 +1367,50 @@ static unsigned int startup_level_ioapic return 0; /* don't check for pending */ } +/* + * In the preemptible case mask the IRQ first then handle it and ack it. + * + * (In the non-preemptible case we keep the IRQ unacked in the local APIC + * and dont need to do the masking, because the code executes atomically.) + */ +#ifdef CONFIG_PREEMPT_HARDIRQS + +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ + move_irq(irq); + mask_IO_APIC_irq(irq); + ack_APIC_irq(); +} + +static void end_level_ioapic_irq(unsigned int irq) +{ + if (!(irq_desc[irq].status & IRQ_INPROGRESS)) + unmask_IO_APIC_irq(irq); +} + +static void enable_level_ioapic_irq(unsigned int irq) +{ + unmask_IO_APIC_irq(irq); +} + +#else /* !CONFIG_PREEMPT_HARDIRQS */ + +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ +} + static void end_level_ioapic_irq (unsigned int irq) { move_irq(irq); ack_APIC_irq(); } +static void enable_level_ioapic_irq(unsigned int irq) +{ + unmask_IO_APIC_irq(irq); +} +#endif /* !CONFIG_PREEMPT_HARDIRQS */ + #ifdef CONFIG_PCI_MSI static unsigned int startup_edge_ioapic_vector(unsigned int vector) { @@ -1395,6 +1434,13 @@ static unsigned int startup_level_ioapic return startup_level_ioapic_irq (irq); } +static void mask_and_ack_level_ioapic_vector (unsigned int vector) +{ + int irq = vector_to_irq(vector); + + mask_and_ack_level_ioapic_irq(irq); +} + static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); @@ -1403,6 +1449,11 @@ static void end_level_ioapic_vector (uns end_level_ioapic_irq(irq); } +static void enable_level_ioapic_vector(unsigned int vector) +{ + enable_level_ioapic_irq(vector_to_irq(vector)); +} + static void mask_IO_APIC_vector (unsigned int vector) { int irq = vector_to_irq(vector); Index: linux/arch/x86_64/kernel/irq.c =================================================================== --- linux.orig/arch/x86_64/kernel/irq.c +++ linux/arch/x86_64/kernel/irq.c @@ -129,9 +129,9 @@ void fixup_irqs(cpumask_t map) } /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); + raw_local_irq_enable(); mdelay(1); - local_irq_disable(); + raw_local_irq_disable(); } #endif @@ -145,11 +145,11 @@ asmlinkage void do_softirq(void) if (in_interrupt()) return; - local_irq_save(flags); + raw_local_irq_save(flags); pending = local_softirq_pending(); /* Switch to interrupt stack */ if (pending) call_softirq(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(do_softirq); Index: linux/arch/x86_64/kernel/machine_kexec.c =================================================================== --- linux.orig/arch/x86_64/kernel/machine_kexec.c +++ linux/arch/x86_64/kernel/machine_kexec.c @@ -190,7 +190,7 @@ NORET_TYPE void machine_kexec(struct kim relocate_new_kernel_t rnk; /* Interrupts aren't acceptable while we reboot */ - local_irq_disable(); + raw_local_irq_disable(); /* Calculate the offsets */ page_list = image->head; Index: linux/arch/x86_64/kernel/nmi.c =================================================================== --- linux.orig/arch/x86_64/kernel/nmi.c +++ linux/arch/x86_64/kernel/nmi.c @@ -43,7 +43,7 @@ * This is maintained separately from nmi_active because the NMI * watchdog may also be driven from the I/O APIC timer. */ -static DEFINE_SPINLOCK(lapic_nmi_owner_lock); +static DEFINE_RAW_SPINLOCK(lapic_nmi_owner_lock); static unsigned int lapic_nmi_owner; #define LAPIC_NMI_WATCHDOG (1<<0) #define LAPIC_NMI_RESERVED (1<<1) @@ -127,7 +127,7 @@ void __cpuinit nmi_watchdog_default(void static __init void nmi_cpu_busy(void *data) { volatile int *endflag = data; - local_irq_enable(); + raw_local_irq_enable(); /* Intentionally don't use cpu_relax here. This is to make sure that the performance counter really ticks, even if there is a simulator or similar that catches the @@ -156,7 +156,7 @@ int __init check_nmi_watchdog (void) for (cpu = 0; cpu < NR_CPUS; cpu++) counts[cpu] = cpu_pda[cpu].__nmi_count; - local_irq_enable(); + raw_local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks for (cpu = 0; cpu < NR_CPUS; cpu++) { @@ -466,12 +466,42 @@ void touch_nmi_watchdog (void) touch_softlockup_watchdog(); } +int nmi_show_regs[NR_CPUS]; + +void nmi_show_all_regs(void) +{ + int i; + + if (nmi_watchdog == NMI_NONE) + return; + if (system_state != SYSTEM_RUNNING) { + printk("nmi_show_all_regs(): system state %d, not doing.\n", + system_state); + return; + } + + for_each_online_cpu(i) + nmi_show_regs[i] = 1; + for_each_online_cpu(i) + while (nmi_show_regs[i] == 1) + barrier(); +} + +static DEFINE_RAW_SPINLOCK(nmi_print_lock); + void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) { int sum; int touched = 0; + int cpu = safe_smp_processor_id(); sum = read_pda(apic_timer_irqs); + if (nmi_show_regs[cpu]) { + nmi_show_regs[cpu] = 0; + spin_lock(&nmi_print_lock); + show_regs(regs); + spin_unlock(&nmi_print_lock); + } if (__get_cpu_var(nmi_touch)) { __get_cpu_var(nmi_touch) = 0; touched = 1; @@ -483,6 +513,11 @@ void nmi_watchdog_tick (struct pt_regs * */ local_inc(&__get_cpu_var(alert_counter)); if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) { + int i; + + for (i = 0; i < NR_CPUS; i++) + nmi_show_regs[i] = 1; + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) { local_set(&__get_cpu_var(alert_counter), 0); Index: linux/arch/x86_64/kernel/pmtimer.c =================================================================== --- linux.orig/arch/x86_64/kernel/pmtimer.c +++ /dev/null @@ -1,101 +0,0 @@ -/* Ported over from i386 by AK, original copyright was: - * - * (C) Dominik Brodowski 2003 - * - * Driver to use the Power Management Timer (PMTMR) available in some - * southbridges as primary timing source for the Linux kernel. - * - * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, - * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. - * - * This file is licensed under the GPL v2. - * - * Dropped all the hardware bug workarounds for now. Hopefully they - * are not needed on 64bit chipsets. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* The I/O port the PMTMR resides at. - * The location is detected during setup_arch(), - * in arch/i386/kernel/acpi/boot.c */ -u32 pmtmr_ioport; - -/* value of the Power timer at last timer interrupt */ -static u32 offset_delay; -static u32 last_pmtmr_tick; - -#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ - -static inline u32 cyc2us(u32 cycles) -{ - /* The Power Management Timer ticks at 3.579545 ticks per microsecond. - * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] - * - * Even with HZ = 100, delta is at maximum 35796 ticks, so it can - * easily be multiplied with 286 (=0x11E) without having to fear - * u32 overflows. - */ - cycles *= 286; - return (cycles >> 10); -} - -int pmtimer_mark_offset(void) -{ - static int first_run = 1; - unsigned long tsc; - u32 lost; - - u32 tick = inl(pmtmr_ioport); - u32 delta; - - delta = cyc2us((tick - last_pmtmr_tick) & ACPI_PM_MASK); - - last_pmtmr_tick = tick; - monotonic_base += delta * NSEC_PER_USEC; - - delta += offset_delay; - - lost = delta / (USEC_PER_SEC / HZ); - offset_delay = delta % (USEC_PER_SEC / HZ); - - rdtscll(tsc); - vxtime.last_tsc = tsc - offset_delay * cpu_khz; - - /* don't calculate delay for first run, - or if we've got less then a tick */ - if (first_run || (lost < 1)) { - first_run = 0; - offset_delay = 0; - } - - return lost - 1; -} - -unsigned int do_gettimeoffset_pm(void) -{ - u32 now, offset, delta = 0; - - offset = last_pmtmr_tick; - now = inl(pmtmr_ioport); - delta = (now - offset) & ACPI_PM_MASK; - - return offset_delay + cyc2us(delta); -} - - -static int __init nopmtimer_setup(char *s) -{ - pmtmr_ioport = 0; - return 0; -} - -__setup("nopmtimer", nopmtimer_setup); Index: linux/arch/x86_64/kernel/process.c =================================================================== --- linux.orig/arch/x86_64/kernel/process.c +++ linux/arch/x86_64/kernel/process.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -60,6 +61,12 @@ static atomic_t hlt_counter = ATOMIC_INI unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); +DEFINE_SPINLOCK(pm_idle_switch_lock); +EXPORT_SYMBOL_GPL(pm_idle_switch_lock); + +int pm_idle_locked = 0; +EXPORT_SYMBOL_GPL(pm_idle_locked); + /* * Powermanagement idle function, if any.. */ @@ -87,12 +94,13 @@ EXPORT_SYMBOL(enable_hlt); void default_idle(void) { if (!atomic_read(&hlt_counter)) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); + raw_local_irq_disable(); + if (!need_resched() && !need_resched_delayed()) + raw_safe_halt(); else - local_irq_enable(); - } + raw_local_irq_enable(); + } else + raw_local_irq_enable(); } /* @@ -100,11 +108,11 @@ void default_idle(void) * to poll the ->need_resched flag instead of waiting for the * cross-CPU IPI to arrive. Use this option with caution. */ -static void poll_idle (void) +void poll_idle (void) { int oldval; - local_irq_enable(); + raw_local_irq_enable(); /* * Deal with another CPU just having chosen a thread to @@ -120,7 +128,7 @@ static void poll_idle (void) "rep; nop;" "je 2b;" : : - "i" (_TIF_NEED_RESCHED), + "i" (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED), "m" (current_thread_info()->flags)); clear_thread_flag(TIF_POLLING_NRFLAG); } else { @@ -189,7 +197,9 @@ void cpu_idle (void) { /* endless idle loop with no priority at all */ while (1) { - while (!need_resched()) { + BUG_ON(raw_irqs_disabled()); + + while (!need_resched() && !need_resched_delayed()) { void (*idle)(void); if (__get_cpu_var(cpu_idle_state)) @@ -201,10 +211,13 @@ void cpu_idle (void) idle = default_idle; if (cpu_is_offline(smp_processor_id())) play_dead(); + stop_critical_timing(); + propagate_preempt_locks_value(); idle(); } - - schedule(); + raw_local_irq_disable(); + __schedule(); + raw_local_irq_enable(); } } @@ -217,16 +230,16 @@ void cpu_idle (void) */ static void mwait_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { set_thread_flag(TIF_POLLING_NRFLAG); do { __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (need_resched()) + if (need_resched() || need_resched_delayed()) break; __mwait(0, 0); - } while (!need_resched()); + } while (!need_resched() && !need_resched_delayed()); clear_thread_flag(TIF_POLLING_NRFLAG); } } @@ -315,7 +328,7 @@ void show_regs(struct pt_regs *regs) { printk("CPU %d:", smp_processor_id()); __show_regs(regs); - show_trace(®s->rsp); + show_trace(current, ®s->rsp); } /* @@ -334,13 +347,14 @@ void exit_thread(void) kprobe_flush_task(me); if (me->thread.io_bitmap_ptr) { - struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); + struct tss_struct *tss; kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; /* * Careful, clear this in the TSS too: */ + tss = &per_cpu(init_tss, get_cpu()); memset(tss->io_bitmap, 0xff, t->io_bitmap_max); t->io_bitmap_max = 0; put_cpu(); Index: linux/arch/x86_64/kernel/reboot.c =================================================================== --- linux.orig/arch/x86_64/kernel/reboot.c +++ linux/arch/x86_64/kernel/reboot.c @@ -98,7 +98,7 @@ void machine_shutdown(void) smp_send_stop(); #endif - local_irq_disable(); + raw_local_irq_disable(); #ifndef CONFIG_SMP disable_local_APIC(); @@ -106,7 +106,7 @@ void machine_shutdown(void) disable_IO_APIC(); - local_irq_enable(); + raw_local_irq_enable(); } void machine_emergency_restart(void) Index: linux/arch/x86_64/kernel/signal.c =================================================================== --- linux.orig/arch/x86_64/kernel/signal.c +++ linux/arch/x86_64/kernel/signal.c @@ -417,6 +417,13 @@ int do_signal(struct pt_regs *regs, sigs siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + raw_local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which * is why we may in certain cases get here from Index: linux/arch/x86_64/kernel/smp.c =================================================================== --- linux.orig/arch/x86_64/kernel/smp.c +++ linux/arch/x86_64/kernel/smp.c @@ -299,10 +299,20 @@ void smp_send_reschedule(int cpu) } /* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + send_IPI_allbutself(RESCHEDULE_VECTOR); +} + +/* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); struct call_data_struct { void (*func) (void *info); @@ -456,9 +466,9 @@ void smp_stop_cpu(void) * Remove this CPU: */ cpu_clear(smp_processor_id(), cpu_online_map); - local_irq_disable(); + raw_local_irq_disable(); disable_local_APIC(); - local_irq_enable(); + raw_local_irq_enable(); } static void smp_really_stop_cpu(void *dummy) @@ -482,9 +492,9 @@ void smp_send_stop(void) if (!nolock) spin_unlock(&call_lock); - local_irq_disable(); + raw_local_irq_disable(); disable_local_APIC(); - local_irq_enable(); + raw_local_irq_enable(); } /* Index: linux/arch/x86_64/kernel/smpboot.c =================================================================== --- linux.orig/arch/x86_64/kernel/smpboot.c +++ linux/arch/x86_64/kernel/smpboot.c @@ -198,7 +198,7 @@ static void __cpuinit smp_store_cpu_info latency and low latency is the primary objective here. -AK */ #define no_cpu_relax() barrier() -static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock); +static __cpuinitdata __DEFINE_RAW_SPINLOCK(tsc_sync_lock); static volatile __cpuinitdata unsigned long go[SLAVE + 1]; static int notscsync __cpuinitdata; @@ -214,7 +214,7 @@ static __cpuinit void sync_master(void * go[MASTER] = 0; - local_irq_save(flags); + raw_local_irq_save(flags); { for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { while (!go[MASTER]) @@ -223,7 +223,7 @@ static __cpuinit void sync_master(void * rdtscll(go[SLAVE]); } } - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -1022,7 +1022,7 @@ int __cpuinit __cpu_up(unsigned int cpu) int err; int apicid = cpu_present_to_apicid(cpu); - WARN_ON(irqs_disabled()); + WARN_ON(raw_irqs_disabled()); Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); Index: linux/arch/x86_64/kernel/switch2poll.c =================================================================== --- /dev/null +++ linux/arch/x86_64/kernel/switch2poll.c @@ -0,0 +1,112 @@ +#include +#include +#include +#include +#include + +extern void poll_idle (void); + +#define KERNEL_ATTR_RW(_name) \ +static struct subsys_attribute _name##_attr = \ + __ATTR(_name, 0644, _name##_show, _name##_store) + +static struct idlep_kobject +{ + struct kobject kobj; + int is_poll; + void (*idle)(void); +} idle_kobj; + +static ssize_t idle_poll_show(struct subsystem *subsys, char *page) +{ + return sprintf(page, "%s\n", (idle_kobj.is_poll ? "on" : "off")); +} + +static ssize_t idle_poll_store(struct subsystem *subsys, + const char *buf, size_t len) +{ + unsigned long flags; + + spin_lock_irqsave(&pm_idle_switch_lock, flags); + + /* + * If power management is handling the idle function, + * then leave it be. + */ + if (pm_idle_locked) { + len = -EBUSY; + goto out; + } + + if (strncmp(buf,"1",1)==0 || + (len >=2 && strncmp(buf,"on",2)==0)) { + if (idle_kobj.is_poll != 1) { + idle_kobj.is_poll = 1; + boot_option_idle_override = 1; + idle_kobj.idle = pm_idle; + pm_idle = poll_idle; + } + } else if (strncmp(buf,"0",1)==0 || + (len >= 3 && strncmp(buf,"off",3)==0)) { + if (idle_kobj.is_poll != 0) { + boot_option_idle_override = 0; + idle_kobj.is_poll = 0; + pm_idle = idle_kobj.idle; + } + } + +out: + spin_unlock_irqrestore(&pm_idle_switch_lock, flags); + + return len; +} + + +KERNEL_ATTR_RW(idle_poll); + +static struct attribute * idle_attrs[] = { + &idle_poll_attr.attr, + NULL +}; + +static struct attribute_group idle_attr_group = { + .attrs = idle_attrs, +}; + +static int __init idle_poll_set_init(void) +{ + int err; + + /* + * If the default is alread poll_idle then + * don't even bother with this. + */ + if (pm_idle == poll_idle) + return 0; + + memset(&idle_kobj, 0, sizeof(idle_kobj)); + + idle_kobj.is_poll = 0; + idle_kobj.idle = pm_idle; + + err = kobject_set_name(&idle_kobj.kobj, "%s", "idle"); + if (err) + goto out; + + idle_kobj.kobj.parent = &kernel_subsys.kset.kobj; + err = kobject_register(&idle_kobj.kobj); + if (err) + goto out; + + err = sysfs_create_group(&idle_kobj.kobj, + &idle_attr_group); + if (err) + goto out; + + return 0; +out: + printk(KERN_INFO "Problem setting up sysfs idle_poll\n"); + return 0; +} + +late_initcall(idle_poll_set_init); Index: linux/arch/x86_64/kernel/time.c =================================================================== --- linux.orig/arch/x86_64/kernel/time.c +++ linux/arch/x86_64/kernel/time.c @@ -38,29 +38,27 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #endif +#include +#include -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - -#ifdef CONFIG_CPU_FREQ -static void cpufreq_delayed_get(void); -#endif extern void i8254_timer_resume(void); extern int using_apic_timer; -DEFINE_SPINLOCK(rtc_lock); -DEFINE_SPINLOCK(i8253_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(i8253_lock); static int nohpet __initdata = 0; static int notsc __initdata = 0; #undef HPET_HACK_ENABLE_DANGEROUS -unsigned int cpu_khz; /* TSC clocks / usec, not used here */ +unsigned int cpu_khz; /* CPU clocks / usec, not used here */ +unsigned int tsc_khz; /* TSC clocks / usec, not used here */ +unsigned long hpet_address; static unsigned long hpet_period; /* fsecs / HPET clock */ unsigned long hpet_tick; /* HPET clocks / interrupt */ static int hpet_use_timer; @@ -83,107 +81,6 @@ static inline void rdtscll_sync(unsigned rdtscll(*tsc); } -/* - * do_gettimeoffset() returns microseconds since last timer interrupt was - * triggered by hardware. A memory read of HPET is slower than a register read - * of TSC, but much more reliable. It's also synchronized to the timer - * interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a - * timer interrupt has happened already, but vxtime.trigger wasn't updated yet. - * This is not a problem, because jiffies hasn't updated either. They are bound - * together by xtime_lock. - */ - -static inline unsigned int do_gettimeoffset_tsc(void) -{ - unsigned long t; - unsigned long x; - rdtscll_sync(&t); - if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */ - x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32; - return x; -} - -static inline unsigned int do_gettimeoffset_hpet(void) -{ - /* cap counter read to one tick to avoid inconsistencies */ - unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last; - return (min(counter,hpet_tick) * vxtime.quot) >> 32; -} - -unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; - -/* - * This version of gettimeofday() has microsecond resolution and better than - * microsecond precision, as we're using at least a 10 MHz (usually 14.31818 - * MHz) HPET timer. - */ - -void do_gettimeofday(struct timeval *tv) -{ - unsigned long seq, t; - unsigned int sec, usec; - - do { - seq = read_seqbegin(&xtime_lock); - - sec = xtime.tv_sec; - usec = xtime.tv_nsec / 1000; - - /* i386 does some correction here to keep the clock - monotonous even when ntpd is fixing drift. - But they didn't work for me, there is a non monotonic - clock anyways with ntp. - I dropped all corrections now until a real solution can - be found. Note when you fix it here you need to do the same - in arch/x86_64/kernel/vsyscall.c and export all needed - variables in vmlinux.lds. -AK */ - - t = (jiffies - wall_jiffies) * (1000000L / HZ) + - do_gettimeoffset(); - usec += t; - - } while (read_seqretry(&xtime_lock, seq)); - - tv->tv_sec = sec + usec / 1000000; - tv->tv_usec = usec % 1000000; -} - -EXPORT_SYMBOL(do_gettimeofday); - -/* - * settimeofday() first undoes the correction that gettimeofday would do - * on the time, and then saves it. This is ugly, but has been like this for - * ages already. - */ - -int do_settimeofday(struct timespec *tv) -{ - time_t wtm_sec, sec = tv->tv_sec; - long wtm_nsec, nsec = tv->tv_nsec; - - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) - return -EINVAL; - - write_seqlock_irq(&xtime_lock); - - nsec -= do_gettimeoffset() * 1000 + - (jiffies - wall_jiffies) * (NSEC_PER_SEC/HZ); - - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); - - set_normalized_timespec(&xtime, sec, nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - - ntp_clear(); - - write_sequnlock_irq(&xtime_lock); - clock_was_set(); - return 0; -} - -EXPORT_SYMBOL(do_settimeofday); - unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); @@ -283,90 +180,8 @@ static void set_rtc_mmss(unsigned long n spin_unlock(&rtc_lock); } - -/* monotonic_clock(): returns # of nanoseconds passed since time_init() - * Note: This function is required to return accurate - * time even in the absence of multiple timer ticks. - */ -unsigned long long monotonic_clock(void) -{ - unsigned long seq; - u32 last_offset, this_offset, offset; - unsigned long long base; - - if (vxtime.mode == VXTIME_HPET) { - do { - seq = read_seqbegin(&xtime_lock); - - last_offset = vxtime.last; - base = monotonic_base; - this_offset = hpet_readl(HPET_COUNTER); - - } while (read_seqretry(&xtime_lock, seq)); - offset = (this_offset - last_offset); - offset *=(NSEC_PER_SEC/HZ)/hpet_tick; - return base + offset; - }else{ - do { - seq = read_seqbegin(&xtime_lock); - - last_offset = vxtime.last_tsc; - base = monotonic_base; - } while (read_seqretry(&xtime_lock, seq)); - sync_core(); - rdtscll(this_offset); - offset = (this_offset - last_offset)*1000/cpu_khz; - return base + offset; - } - - -} -EXPORT_SYMBOL(monotonic_clock); - -static noinline void handle_lost_ticks(int lost, struct pt_regs *regs) -{ - static long lost_count; - static int warned; - - if (report_lost_ticks) { - printk(KERN_WARNING "time.c: Lost %d timer " - "tick(s)! ", lost); - print_symbol("rip %s)\n", regs->rip); - } - - if (lost_count == 1000 && !warned) { - printk(KERN_WARNING - "warning: many lost ticks.\n" - KERN_WARNING "Your time source seems to be instable or " - "some driver is hogging interupts\n"); - print_symbol("rip %s\n", regs->rip); - if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) { - printk(KERN_WARNING "Falling back to HPET\n"); - vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; - vxtime.mode = VXTIME_HPET; - do_gettimeoffset = do_gettimeoffset_hpet; - } - /* else should fall back to PIT, but code missing. */ - warned = 1; - } else - lost_count++; - -#ifdef CONFIG_CPU_FREQ - /* In some cases the CPU can change frequency without us noticing - (like going into thermal throttle) - Give cpufreq a change to catch up. */ - if ((lost_count+1) % 25 == 0) { - cpufreq_delayed_get(); - } -#endif -} - static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { - static unsigned long rtc_update = 0; - unsigned long tsc; - int delay, offset = 0, lost = 0; - /* * Here we are in the timer irq handler. We have irqs locally disabled (so we * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running @@ -376,67 +191,6 @@ static irqreturn_t timer_interrupt(int i write_seqlock(&xtime_lock); - if (vxtime.hpet_address) - offset = hpet_readl(HPET_COUNTER); - - if (hpet_use_timer) { - /* if we're using the hpet timer functionality, - * we can more accurately know the counter value - * when the timer interrupt occured. - */ - offset = hpet_readl(HPET_T0_CMP) - hpet_tick; - delay = hpet_readl(HPET_COUNTER) - offset; - } else { - spin_lock(&i8253_lock); - outb_p(0x00, 0x43); - delay = inb_p(0x40); - delay |= inb(0x40) << 8; - spin_unlock(&i8253_lock); - delay = LATCH - 1 - delay; - } - - rdtscll_sync(&tsc); - - if (vxtime.mode == VXTIME_HPET) { - if (offset - vxtime.last > hpet_tick) { - lost = (offset - vxtime.last) / hpet_tick - 1; - } - - monotonic_base += - (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; - - vxtime.last = offset; -#ifdef CONFIG_X86_PM_TIMER - } else if (vxtime.mode == VXTIME_PMTMR) { - lost = pmtimer_mark_offset(); -#endif - } else { - offset = (((tsc - vxtime.last_tsc) * - vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); - - if (offset < 0) - offset = 0; - - if (offset > (USEC_PER_SEC / HZ)) { - lost = offset / (USEC_PER_SEC / HZ); - offset %= (USEC_PER_SEC / HZ); - } - - monotonic_base += (tsc - vxtime.last_tsc)*1000000/cpu_khz ; - - vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot; - - if ((((tsc - vxtime.last_tsc) * - vxtime.tsc_quot) >> 32) < offset) - vxtime.last_tsc = tsc - - (((long) offset << 32) / vxtime.tsc_quot) - 1; - } - - if (lost > 0) { - handle_lost_ticks(lost, regs); - jiffies += lost; - } - /* * Do the timer stuff. */ @@ -459,20 +213,6 @@ static irqreturn_t timer_interrupt(int i smp_local_timer_interrupt(regs); #endif -/* - * If we have an externally synchronized Linux clock, then update CMOS clock - * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy - * closest to exactly 500 ms before the next second. If the update fails, we - * don't care, as it'll be updated on the next turn, and the problem (time way - * off) isn't likely to go away much sooner anyway. - */ - - if (ntp_synced() && xtime.tv_sec > rtc_update && - abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) { - set_rtc_mmss(xtime.tv_sec); - rtc_update = xtime.tv_sec + 660; - } - write_sequnlock(&xtime_lock); return IRQ_HANDLED; @@ -481,9 +221,9 @@ static irqreturn_t timer_interrupt(int i static unsigned int cyc2ns_scale; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ -static inline void set_cyc2ns_scale(unsigned long cpu_mhz) +static inline void set_cyc2ns_scale(unsigned long cpu_khz) { - cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; + cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; } static inline unsigned long long cycles_2_ns(unsigned long long cyc) @@ -513,6 +253,32 @@ unsigned long long sched_clock(void) return cycles_2_ns(a); } +/* code to compensate for TSC C3 stalls: */ +static u64 tsc_c3_offset; +static int tsc_unstable; + +static inline int check_tsc_unstable(void) +{ + return tsc_unstable; +} +static inline void mark_tsc_unstable(void) +{ + tsc_unstable = 1;; +} + +void tsc_c3_compensate(unsigned long nsecs) +{ + u64 cycles = ((u64)nsecs * tsc_khz)/1000000; + + tsc_c3_offset += cycles; +} +EXPORT_SYMBOL_GPL(tsc_c3_compensate); + +static inline u64 tsc_read_c3_time(void) +{ + return tsc_c3_offset; +} + unsigned long get_cmos_time(void) { unsigned int timeout, year, mon, day, hour, min, sec; @@ -573,6 +339,30 @@ unsigned long get_cmos_time(void) return mktime(year, mon, day, hour, min, sec); } +/* arch specific timeofday hooks: */ +u64 read_persistent_clock(void) +{ + return (u64)get_cmos_time() * NSEC_PER_SEC; +} + +void sync_persistent_clock(struct timespec ts) +{ + static unsigned long rtc_update = 0; + /* + * If we have an externally synchronized Linux clock, then update + * CMOS clock accordingly every ~11 minutes. set_rtc_mmss() will + * be called in the jiffy closest to exactly 500 ms before the + * next second. If the update fails, we don't care, as it'll be + * updated on the next turn, and the problem (time way off) isn't + * likely to go away much sooner anyway. + */ + if (ts.tv_sec > rtc_update && + abs(ts.tv_nsec - 500000000) <= tick_nsec / 2) { + set_rtc_mmss(xtime.tv_sec); + rtc_update = xtime.tv_sec + 660; + } +} + #ifdef CONFIG_CPU_FREQ /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency @@ -600,23 +390,6 @@ static void handle_cpufreq_delayed_get(v cpufreq_delayed_issched = 0; } -/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries - * to verify the CPU frequency the timing core thinks the CPU is running - * at is still correct. - */ -static void cpufreq_delayed_get(void) -{ - static int warned; - if (cpufreq_init && !cpufreq_delayed_issched) { - cpufreq_delayed_issched = 1; - if (!warned) { - warned = 1; - printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n"); - } - schedule_work(&cpufreq_delayed_get_work); - } -} - static unsigned int ref_freq = 0; static unsigned long loops_per_jiffy_ref = 0; @@ -651,11 +424,14 @@ static int time_cpufreq_notifier(struct cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { vxtime.tsc_quot = (1000L << 32) / cpu_khz; + tsc_khz = cpu_khz; + } + } - set_cyc2ns_scale(cpu_khz_ref / 1000); + set_cyc2ns_scale(cpu_khz_ref); return 0; } @@ -690,18 +466,18 @@ static unsigned int __init hpet_calibrat int tsc_now, hpet_now; unsigned long flags; - local_irq_save(flags); - local_irq_disable(); + raw_local_irq_save(flags); + raw_local_irq_disable(); hpet_start = hpet_readl(HPET_COUNTER); rdtscl(tsc_start); do { - local_irq_disable(); + raw_local_irq_disable(); hpet_now = hpet_readl(HPET_COUNTER); sync_core(); rdtscl(tsc_now); - local_irq_restore(flags); + raw_local_irq_restore(flags); } while ((tsc_now - tsc_start) < TICK_COUNT && (hpet_now - hpet_start) < TICK_COUNT); @@ -879,7 +655,7 @@ int __init time_setup(char *str) } static struct irqaction irq0 = { - timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL + timer_interrupt, SA_INTERRUPT | SA_NODELAY, CPU_MASK_NONE, "timer", NULL, NULL }; extern void __init config_acpi_tables(void); @@ -916,18 +692,12 @@ void __init time_init(void) if (hpet_use_timer) { cpu_khz = hpet_calibrate_tsc(); timename = "HPET"; -#ifdef CONFIG_X86_PM_TIMER - } else if (pmtmr_ioport) { - vxtime_hz = PM_TIMER_FREQUENCY; - timename = "PM"; - pit_init(); - cpu_khz = pit_calibrate_tsc(); -#endif } else { pit_init(); cpu_khz = pit_calibrate_tsc(); timename = "PIT"; } + tsc_khz = cpu_khz; printk(KERN_INFO "time.c: Using %ld.%06ld MHz %s timer.\n", vxtime_hz / 1000000, vxtime_hz % 1000000, timename); @@ -939,7 +709,7 @@ void __init time_init(void) rdtscll_sync(&vxtime.last_tsc); setup_irq(0, &irq0); - set_cyc2ns_scale(cpu_khz / 1000); + set_cyc2ns_scale(cpu_khz); #ifndef CONFIG_SMP time_init_gtod(); @@ -969,31 +739,8 @@ static __init int unsynchronized_tsc(voi */ void __init time_init_gtod(void) { - char *timetype; - if (unsynchronized_tsc()) - notsc = 1; - if (vxtime.hpet_address && notsc) { - timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; - vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; - vxtime.mode = VXTIME_HPET; - do_gettimeoffset = do_gettimeoffset_hpet; -#ifdef CONFIG_X86_PM_TIMER - /* Using PM for gettimeofday is quite slow, but we have no other - choice because the TSC is too unreliable on some systems. */ - } else if (pmtmr_ioport && !vxtime.hpet_address && notsc) { - timetype = "PM"; - do_gettimeoffset = do_gettimeoffset_pm; - vxtime.mode = VXTIME_PMTMR; - sysctl_vsyscall = 0; - printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n"); -#endif - } else { - timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC"; - vxtime.mode = VXTIME_TSC; - } - - printk(KERN_INFO "time.c: Using %s based timekeeping.\n", timetype); + mark_tsc_unstable(); } __setup("report_lost_ticks", time_setup); @@ -1016,7 +763,6 @@ static int timer_suspend(struct sys_devi static int timer_resume(struct sys_device *dev) { - unsigned long flags; unsigned long sec; unsigned long ctime = get_cmos_time(); unsigned long sleep_length = (ctime - sleep_start) * HZ; @@ -1027,10 +773,6 @@ static int timer_resume(struct sys_devic i8254_timer_resume(); sec = ctime + clock_cmos_diff; - write_seqlock_irqsave(&xtime_lock,flags); - xtime.tv_sec = sec; - xtime.tv_nsec = 0; - write_sequnlock_irqrestore(&xtime_lock,flags); jiffies += sleep_length; wall_jiffies += sleep_length; touch_softlockup_watchdog(); @@ -1125,11 +867,11 @@ int hpet_rtc_timer_init(void) else hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; - local_irq_save(flags); + raw_local_irq_save(flags); cnt = hpet_readl(HPET_COUNTER); cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); hpet_writel(cnt, HPET_T1_CMP); - local_irq_restore(flags); + raw_local_irq_restore(flags); cfg = hpet_readl(HPET_T1_CFG); cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT; @@ -1305,3 +1047,214 @@ static int __init notsc_setup(char *s) __setup("notsc", notsc_setup); +/* clock-source code: */ + +static unsigned long current_tsc_khz = 0; + +static int tsc_update_callback(void); + +#ifdef CONFIG_PARANOID_GENERIC_TIME +/* This will hurt performance! */ +static DEFINE_RAW_SPINLOCK(checktsc_lock); +static cycle_t last_tsc; + +static cycle_t read_tsc(void) +{ + unsigned long flags; + cycle_t ret; + + spin_lock_irqsave(&checktsc_lock, flags); + + rdtscll(ret); + + if (ret < last_tsc) + printk("read_tsc: ACK! TSC went backward! Unsynced TSCs?\n"); + last_tsc = ret; + + spin_unlock_irqrestore(&checktsc_lock, flags); + + return ret; +} + +static cycle_t __vsyscall_fn vread_tsc(void* unused) +{ + cycle_t ret; + + rdtscll(ret); + + return ret; +} + +static cycle_t read_tsc_c3(void) +{ + unsigned long flags; + cycle_t ret; + + spin_lock_irqsave(&checktsc_lock, flags); + + rdtscll(ret); + ret += tsc_read_c3_time(); + + if (ret < last_tsc) + printk("read_tsc_c3: ACK! TSC went backward! Unsynced TSCs?\n"); + last_tsc = ret; + + spin_unlock_irqrestore(&checktsc_lock, flags); + + return ret; +} + +#else /* CONFIG_PARANOID_GENERIC_TIME */ + +static cycle_t read_tsc(void) +{ + cycle_t ret; + + rdtscll(ret); + + return ret; +} + +static cycle_t __vsyscall_fn vread_tsc(void* unused) +{ + cycle_t ret; + + rdtscll(ret); + + return ret; +} + +static cycle_t read_tsc_c3(void) +{ + cycle_t ret; + + rdtscll(ret); + + return ret + tsc_read_c3_time(); +} + +#endif /* CONFIG_PARANOID_GENERIC_TIME */ + +static struct clocksource clocksource_tsc = { + .name = "tsc", + .rating = 300, + .read = read_tsc, + .vread = vread_tsc, + .mask = (cycle_t)-1, + .mult = 0, /* to be set */ + .shift = 22, + .update_callback = tsc_update_callback, + .is_continuous = 1, +}; + +static int tsc_update_callback(void) +{ + int change = 0; + + /* check to see if we should switch to the safe clocksource: */ + if (tsc_read_c3_time() && + strncmp(clocksource_tsc.name, "c3tsc", 5)) { + printk("Falling back to C3 safe TSC\n"); + clocksource_tsc.read = read_tsc_c3; + clocksource_tsc.vread = 0; + clocksource_tsc.name = "c3tsc"; + change = 1; + } + + if (clocksource_tsc.rating != 50 && check_tsc_unstable()) { + clocksource_tsc.rating = 50; + reselect_clocksource(); + change = 1; + } + + /* only update if tsc_khz has changed: */ + if (current_tsc_khz != tsc_khz){ + current_tsc_khz = tsc_khz; + clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, + clocksource_tsc.shift); + change = 1; + } + return change; +} + +static int __init init_tsc_clocksource(void) +{ + if (!notsc && tsc_khz) { + current_tsc_khz = tsc_khz; + clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, + clocksource_tsc.shift); + register_clocksource(&clocksource_tsc); + } + return 0; +} + +module_init(init_tsc_clocksource); + + +#define HPET_MASK 0xFFFFFFFF +#define HPET_SHIFT 22 + +/* FSEC = 10^-15 NSEC = 10^-9 */ +#define FSEC_PER_NSEC 1000000 + +static void *hpet_ptr; + +static cycle_t read_hpet(void) +{ + return (cycle_t)readl(hpet_ptr); +} + +static cycle_t __vsyscall_fn vread_hpet(void* ptr) +{ + return (cycle_t)readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0); +} + +struct clocksource clocksource_hpet = { + .name = "hpet", + .rating = 250, + .read = read_hpet, + .vread = vread_hpet, + .mask = (cycle_t)HPET_MASK, + .mult = 0, /* set below */ + .shift = HPET_SHIFT, + .is_continuous = 1, +}; + +static int __init init_hpet_clocksource(void) +{ + unsigned long hpet_period; + void __iomem *hpet_base; + u64 tmp; + + if (!hpet_address) + return -ENODEV; + + /* calculate the hpet address: */ + hpet_base = + (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE); + hpet_ptr = hpet_base + HPET_COUNTER; + + /* calculate the frequency: */ + hpet_period = readl(hpet_base + HPET_PERIOD); + + /* + * hpet period is in femto seconds per cycle + * so we need to convert this to ns/cyc units + * aproximated by mult/2^shift + * + * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift + * fsec/cyc * 1ns/1000000fsec * 2^shift = mult + * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult + * (fsec/cyc << shift)/1000000 = mult + * (hpet_period << shift)/FSEC_PER_NSEC = mult + */ + tmp = (u64)hpet_period << HPET_SHIFT; + do_div(tmp, FSEC_PER_NSEC); + clocksource_hpet.mult = (u32)tmp; + + register_clocksource(&clocksource_hpet); + + return 0; +} + +module_init(init_hpet_clocksource); Index: linux/arch/x86_64/kernel/traps.c =================================================================== --- linux.orig/arch/x86_64/kernel/traps.c +++ linux/arch/x86_64/kernel/traps.c @@ -88,7 +88,7 @@ int register_die_notifier(struct notifie static inline void conditional_sti(struct pt_regs *regs) { if (regs->eflags & X86_EFLAGS_IF) - local_irq_enable(); + raw_local_irq_enable(); } static int kstack_depth_to_print = 10; @@ -154,7 +154,7 @@ static unsigned long *in_exception_stack * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack */ -void show_trace(unsigned long *stack) +void show_trace(struct task_struct *task, unsigned long *stack) { unsigned long addr; const unsigned cpu = safe_smp_processor_id(); @@ -219,6 +219,7 @@ void show_trace(unsigned long *stack) HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); #undef HANDLE_STACK printk("\n"); + print_traces(task); } void show_stack(struct task_struct *tsk, unsigned long * rsp) @@ -255,7 +256,7 @@ void show_stack(struct task_struct *tsk, printk("%016lx ", *stack++); touch_nmi_watchdog(); } - show_trace((unsigned long *)rsp); + show_trace(tsk, (unsigned long *)rsp); } /* @@ -264,7 +265,7 @@ void show_stack(struct task_struct *tsk, void dump_stack(void) { unsigned long dummy; - show_trace(&dummy); + show_trace(current, &dummy); } EXPORT_SYMBOL(dump_stack); @@ -337,7 +338,7 @@ void out_of_line_bug(void) } #endif -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); static int die_owner = -1; unsigned long oops_begin(void) @@ -346,7 +347,7 @@ unsigned long oops_begin(void) unsigned long flags; /* racy, but better than risking deadlock. */ - local_irq_save(flags); + raw_local_irq_save(flags); if (!spin_trylock(&die_lock)) { if (cpu == die_owner) /* nested oops. should stop eventually */; Index: linux/arch/x86_64/kernel/vmlinux.lds.S =================================================================== --- linux.orig/arch/x86_64/kernel/vmlinux.lds.S +++ linux/arch/x86_64/kernel/vmlinux.lds.S @@ -99,6 +99,18 @@ SECTIONS .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) } jiffies = VVIRT(.jiffies); + .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) } + .vsyscall_data : AT(VLOAD(.vsyscall_data)) { *(.vsyscall_data) } + + .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { *(.vsyscall_gtod_data) } + vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); + + .vsyscall_gtod_lock : AT(VLOAD(.vsyscall_gtod_lock)) { *(.vsyscall_gtod_lock) } + vsyscall_gtod_lock = VVIRT(.vsyscall_gtod_lock); + + .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) } + .vsyscall_data : AT(VLOAD(.vsyscall_data)) { *(.vsyscall_data) } + .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { *(.vsyscall_1) } .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { *(.vsyscall_2) } .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { *(.vsyscall_3) } Index: linux/arch/x86_64/kernel/vsyscall.c =================================================================== --- linux.orig/arch/x86_64/kernel/vsyscall.c +++ linux/arch/x86_64/kernel/vsyscall.c @@ -19,6 +19,8 @@ * want per guest time just set the kernel.vsyscall64 sysctl to 0. */ +#include +#include #include #include #include @@ -27,20 +29,33 @@ #include #include + #include #include +#include #include #include #include #include -#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) +#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) notrace #define force_inline __attribute__((always_inline)) inline int __sysctl_vsyscall __section_sysctl_vsyscall = 1; -seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; +raw_seqlock_t __xtime_lock __section_xtime_lock = RAW_SEQLOCK_UNLOCKED; -#include +struct vsyscall_gtod_data_t { + struct timeval wall_time_tv; + struct timezone sys_tz; + cycle_t offset_base; + struct clocksource clock; +}; + +extern struct vsyscall_gtod_data_t vsyscall_gtod_data; +struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data; + +extern raw_seqlock_t vsyscall_gtod_lock; +raw_seqlock_t __vsyscall_gtod_lock __section_vsyscall_gtod_lock = RAW_SEQLOCK_UNLOCKED; static force_inline void timeval_normalize(struct timeval * tv) { @@ -53,40 +68,66 @@ static force_inline void timeval_normali } } -static force_inline void do_vgettimeofday(struct timeval * tv) +/* + * XXX - this is ugly. gettimeofday() has a label in it so we can't + * call it twice. + */ +static force_inline int syscall_gtod(struct timeval *tv, struct timezone *tz) +{ + int ret; + + asm volatile("syscall" + : "=a" (ret) + : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) + : __syscall_clobber); + + return ret; +} + +static force_inline void do_vgettimeofday(struct timeval *tv) { - long sequence, t; - unsigned long sec, usec; + cycle_t now, cycle_delta; + nsec_t nsec_delta; + unsigned long seq; do { - sequence = read_seqbegin(&__xtime_lock); - - sec = __xtime.tv_sec; - usec = (__xtime.tv_nsec / 1000) + - (__jiffies - __wall_jiffies) * (1000000 / HZ); - - if (__vxtime.mode != VXTIME_HPET) { - sync_core(); - rdtscll(t); - if (t < __vxtime.last_tsc) - t = __vxtime.last_tsc; - usec += ((t - __vxtime.last_tsc) * - __vxtime.tsc_quot) >> 32; - /* See comment in x86_64 do_gettimeofday. */ - } else { - usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) - - __vxtime.last) * __vxtime.quot) >> 32; + seq = read_seqbegin(&__vsyscall_gtod_lock); + + if (!__vsyscall_gtod_data.clock.vread) { + syscall_gtod(tv, NULL); + return; } - } while (read_seqretry(&__xtime_lock, sequence)); - tv->tv_sec = sec + usec / 1000000; - tv->tv_usec = usec % 1000000; + /* read the timeosurce and calc cycle_delta */ + now = __vsyscall_gtod_data.clock.vread( + __vsyscall_gtod_data.clock.vdata); + + cycle_delta = (now - __vsyscall_gtod_data.offset_base) + & __vsyscall_gtod_data.clock.mask; + + /* convert cycles to nsecs */ + nsec_delta = cycle_delta * __vsyscall_gtod_data.clock.mult; + nsec_delta = nsec_delta >> __vsyscall_gtod_data.clock.shift; + + /* add nsec offset to wall_time_tv */ + *tv = __vsyscall_gtod_data.wall_time_tv; + do_div(nsec_delta, NSEC_PER_USEC); + tv->tv_usec += (unsigned long) nsec_delta; + + while (tv->tv_usec > USEC_PER_SEC) { + tv->tv_sec += 1; + tv->tv_usec -= USEC_PER_SEC; + } + } while (read_seqretry(&__vsyscall_gtod_lock, seq)); } -/* RED-PEN may want to readd seq locking, but then the variable should be write-once. */ +/* + * RED-PEN may want to readd seq locking, but then the variable should be + * write-once. + */ static force_inline void do_get_tz(struct timezone * tz) { - *tz = __sys_tz; + *tz = __vsyscall_gtod_data.sys_tz; } static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz) @@ -122,11 +163,16 @@ int __vsyscall(0) vgettimeofday(struct t * unlikely */ time_t __vsyscall(1) vtime(time_t *t) { + struct timeval tv; + if (unlikely(!__sysctl_vsyscall)) return time_syscall(t); - else if (t) - *t = __xtime.tv_sec; - return __xtime.tv_sec; + + vgettimeofday(&tv, 0); + if (t) + *t = tv.tv_sec; + + return tv.tv_sec; } long __vsyscall(2) venosys_0(void) @@ -139,6 +185,38 @@ long __vsyscall(3) venosys_1(void) return -ENOSYS; } +struct clocksource *curr_clock; + +void arch_update_vsyscall_gtod(struct timespec wall_time, cycle_t offset_base, + struct clocksource *clock, int ntp_adj) +{ + unsigned long flags; + + write_seqlock_irqsave(&vsyscall_gtod_lock, flags); + + /* XXX - hackitty hack hack. this is terrible! */ + if (curr_clock != clock) + curr_clock = clock; + + /* save off wall time as timeval: */ + vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time.tv_sec; + vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time.tv_nsec/1000; + + /* save offset_base: */ + vsyscall_gtod_data.offset_base = offset_base; + + /* copy current clocksource: */ + vsyscall_gtod_data.clock = *clock; + + /* apply ntp adjustment to clocksource mult: */ + vsyscall_gtod_data.clock.mult += ntp_adj; + + /* save off current timezone: */ + vsyscall_gtod_data.sys_tz = sys_tz; + + write_sequnlock_irqrestore(&vsyscall_gtod_lock, flags); +} + #ifdef CONFIG_SYSCTL #define SYSCALL 0x050f @@ -217,6 +295,7 @@ static int __init vsyscall_init(void) BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); map_vsyscall(); + sysctl_vsyscall = 1; #ifdef CONFIG_SYSCTL register_sysctl_table(kernel_root_table2, 0); #endif Index: linux/arch/x86_64/kernel/x8664_ksyms.c =================================================================== --- linux.orig/arch/x86_64/kernel/x8664_ksyms.c +++ linux/arch/x86_64/kernel/x8664_ksyms.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -33,8 +34,6 @@ #include #include -extern spinlock_t rtc_lock; - #ifdef CONFIG_SMP extern void __write_lock_failed(rwlock_t *rw); extern void __read_lock_failed(rwlock_t *rw); @@ -62,10 +61,12 @@ EXPORT_SYMBOL(pm_idle); EXPORT_SYMBOL(pm_power_off); EXPORT_SYMBOL(get_cmos_time); -EXPORT_SYMBOL(__down_failed); -EXPORT_SYMBOL(__down_failed_interruptible); -EXPORT_SYMBOL(__down_failed_trylock); -EXPORT_SYMBOL(__up_wakeup); +#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK +EXPORT_SYMBOL(__compat_down_failed); +EXPORT_SYMBOL(__compat_down_failed_interruptible); +EXPORT_SYMBOL(__compat_down_failed_trylock); +EXPORT_SYMBOL(__compat_up_wakeup); +#endif /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_nocheck); EXPORT_SYMBOL(ip_compute_csum); Index: linux/arch/x86_64/lib/thunk.S =================================================================== --- linux.orig/arch/x86_64/lib/thunk.S +++ linux/arch/x86_64/lib/thunk.S @@ -43,11 +43,13 @@ thunk rwsem_downgrade_thunk,rwsem_downgrade_wake #endif thunk do_softirq_thunk,do_softirq - - thunk __down_failed,__down - thunk_retrax __down_failed_interruptible,__down_interruptible - thunk_retrax __down_failed_trylock,__down_trylock - thunk __up_wakeup,__up + +#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK + thunk __compat_down_failed,__compat_down + thunk_retrax __compat_down_failed_interruptible,__compat_down_interruptible + thunk_retrax __compat_down_failed_trylock,__compat_down_trylock + thunk __compat_up_wakeup,__compat_up +#endif /* SAVE_ARGS below is used only for the .cfi directives it contains. */ CFI_STARTPROC Index: linux/arch/x86_64/mm/fault.c =================================================================== --- linux.orig/arch/x86_64/mm/fault.c +++ linux/arch/x86_64/mm/fault.c @@ -39,6 +39,7 @@ void bust_spinlocks(int yes) { int loglevel_save = console_loglevel; if (yes) { + stop_trace(); oops_in_progress = 1; } else { #ifdef CONFIG_VT @@ -327,7 +328,7 @@ asmlinkage void __kprobes do_page_fault( return; if (likely(regs->eflags & X86_EFLAGS_IF)) - local_irq_enable(); + raw_local_irq_enable(); if (unlikely(page_fault_trace)) printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", Index: linux/arch/x86_64/mm/init.c =================================================================== --- linux.orig/arch/x86_64/mm/init.c +++ linux/arch/x86_64/mm/init.c @@ -47,7 +47,7 @@ extern int swiotlb; extern char _stext[]; -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); /* * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the Index: linux/arch/xtensa/kernel/time.c =================================================================== --- linux.orig/arch/xtensa/kernel/time.c +++ linux/arch/xtensa/kernel/time.c @@ -29,9 +29,6 @@ extern volatile unsigned long wall_jiffies; -u64 jiffies_64 = INITIAL_JIFFIES; -EXPORT_SYMBOL(jiffies_64); - spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; EXPORT_SYMBOL(rtc_lock); Index: linux/drivers/Makefile =================================================================== --- linux.orig/drivers/Makefile +++ linux/drivers/Makefile @@ -67,3 +67,4 @@ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_IOC4) += sn/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ +obj-$(CONFIG_GENERIC_TIME) += clocksource/ Index: linux/drivers/acpi/events/evgpe.c =================================================================== --- linux.orig/drivers/acpi/events/evgpe.c +++ linux/drivers/acpi/events/evgpe.c @@ -377,7 +377,7 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_x struct acpi_gpe_register_info *gpe_register_info; u32 status_reg; u32 enable_reg; - u32 flags; + unsigned long flags; acpi_status status; struct acpi_gpe_block_info *gpe_block; acpi_native_uint i; Index: linux/drivers/acpi/events/evgpeblk.c =================================================================== --- linux.orig/drivers/acpi/events/evgpeblk.c +++ linux/drivers/acpi/events/evgpeblk.c @@ -136,7 +136,7 @@ acpi_status acpi_ev_walk_gpe_list(ACPI_G struct acpi_gpe_block_info *gpe_block; struct acpi_gpe_xrupt_info *gpe_xrupt_info; acpi_status status = AE_OK; - u32 flags; + unsigned long flags; ACPI_FUNCTION_TRACE("ev_walk_gpe_list"); @@ -479,7 +479,7 @@ static struct acpi_gpe_xrupt_info *acpi_ struct acpi_gpe_xrupt_info *next_gpe_xrupt; struct acpi_gpe_xrupt_info *gpe_xrupt; acpi_status status; - u32 flags; + unsigned long flags; ACPI_FUNCTION_TRACE("ev_get_gpe_xrupt_block"); @@ -553,7 +553,7 @@ static acpi_status acpi_ev_delete_gpe_xrupt(struct acpi_gpe_xrupt_info *gpe_xrupt) { acpi_status status; - u32 flags; + unsigned long flags; ACPI_FUNCTION_TRACE("ev_delete_gpe_xrupt"); @@ -610,7 +610,7 @@ acpi_ev_install_gpe_block(struct acpi_gp struct acpi_gpe_block_info *next_gpe_block; struct acpi_gpe_xrupt_info *gpe_xrupt_block; acpi_status status; - u32 flags; + unsigned long flags; ACPI_FUNCTION_TRACE("ev_install_gpe_block"); @@ -663,7 +663,7 @@ acpi_ev_install_gpe_block(struct acpi_gp acpi_status acpi_ev_delete_gpe_block(struct acpi_gpe_block_info *gpe_block) { acpi_status status; - u32 flags; + unsigned long flags; ACPI_FUNCTION_TRACE("ev_install_gpe_block"); Index: linux/drivers/acpi/events/evxface.c =================================================================== --- linux.orig/drivers/acpi/events/evxface.c +++ linux/drivers/acpi/events/evxface.c @@ -562,7 +562,7 @@ acpi_install_gpe_handler(acpi_handle gpe struct acpi_gpe_event_info *gpe_event_info; struct acpi_handler_info *handler; acpi_status status; - u32 flags; + unsigned long flags; ACPI_FUNCTION_TRACE("acpi_install_gpe_handler"); @@ -653,7 +653,7 @@ acpi_remove_gpe_handler(acpi_handle gpe_ struct acpi_gpe_event_info *gpe_event_info; struct acpi_handler_info *handler; acpi_status status; - u32 flags; + unsigned long flags; ACPI_FUNCTION_TRACE("acpi_remove_gpe_handler"); Index: linux/drivers/acpi/osl.c =================================================================== --- linux.orig/drivers/acpi/osl.c +++ linux/drivers/acpi/osl.c @@ -729,14 +729,14 @@ void acpi_os_delete_lock(acpi_handle han acpi_status acpi_os_create_semaphore(u32 max_units, u32 initial_units, acpi_handle * handle) { - struct semaphore *sem = NULL; + struct compat_semaphore *sem = NULL; ACPI_FUNCTION_TRACE("os_create_semaphore"); - sem = acpi_os_allocate(sizeof(struct semaphore)); + sem = acpi_os_allocate(sizeof(struct compat_semaphore)); if (!sem) return_ACPI_STATUS(AE_NO_MEMORY); - memset(sem, 0, sizeof(struct semaphore)); + memset(sem, 0, sizeof(struct compat_semaphore)); sema_init(sem, initial_units); @@ -759,7 +759,7 @@ EXPORT_SYMBOL(acpi_os_create_semaphore); acpi_status acpi_os_delete_semaphore(acpi_handle handle) { - struct semaphore *sem = (struct semaphore *)handle; + struct compat_semaphore *sem = (struct compat_semaphore *)handle; ACPI_FUNCTION_TRACE("os_delete_semaphore"); @@ -788,7 +788,7 @@ EXPORT_SYMBOL(acpi_os_delete_semaphore); acpi_status acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 timeout) { acpi_status status = AE_OK; - struct semaphore *sem = (struct semaphore *)handle; + struct compat_semaphore *sem = (struct compat_semaphore *)handle; int ret = 0; ACPI_FUNCTION_TRACE("os_wait_semaphore"); @@ -870,7 +870,7 @@ EXPORT_SYMBOL(acpi_os_wait_semaphore); */ acpi_status acpi_os_signal_semaphore(acpi_handle handle, u32 units) { - struct semaphore *sem = (struct semaphore *)handle; + struct compat_semaphore *sem = (struct compat_semaphore *)handle; ACPI_FUNCTION_TRACE("os_signal_semaphore"); Index: linux/drivers/acpi/processor_idle.c =================================================================== --- linux.orig/drivers/acpi/processor_idle.c +++ linux/drivers/acpi/processor_idle.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -167,6 +168,7 @@ acpi_processor_power_activate(struct acp } static atomic_t c3_cpu_count; +extern void tsc_c3_compensate(unsigned long nsecs); static void acpi_processor_idle(void) { @@ -184,14 +186,14 @@ static void acpi_processor_idle(void) * Interrupts must be disabled during bus mastering calculations and * for C2/C3 transitions. */ - local_irq_disable(); + raw_local_irq_disable(); /* * Check whether we truly need to go idle, or should * reschedule: */ if (unlikely(need_resched())) { - local_irq_enable(); + raw_local_irq_enable(); return; } @@ -253,7 +255,7 @@ static void acpi_processor_idle(void) * issues (e.g. floppy DMA transfer overrun/underrun). */ if (pr->power.bm_activity & cx->demotion.threshold.bm) { - local_irq_enable(); + raw_local_irq_enable(); next_state = cx->demotion.state; goto end; } @@ -277,7 +279,7 @@ static void acpi_processor_idle(void) if (pm_idle_save) pm_idle_save(); else - safe_halt(); + raw_safe_halt(); /* * TBD: Can't get time duration while in C1, as resumes * go to an ISR rather than here. Need to instrument @@ -296,7 +298,7 @@ static void acpi_processor_idle(void) /* Get end time (ticks) */ t2 = inl(acpi_fadt.xpm_tmr_blk.address); /* Re-enable interrupts */ - local_irq_enable(); + raw_local_irq_enable(); /* Compute time (ticks) that we were actually asleep */ sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; @@ -334,15 +336,20 @@ static void acpi_processor_idle(void) ACPI_MTX_DO_NOT_LOCK); } +#ifdef CONFIG_GENERIC_TIME + /* compensate for TSC pause */ + tsc_c3_compensate((u32)(((u64)((t2-t1)&0xFFFFFF)*286070)>>10)); +#endif + /* Re-enable interrupts */ - local_irq_enable(); + raw_local_irq_enable(); /* Compute time (ticks) that we were actually asleep */ sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD; break; default: - local_irq_enable(); + raw_local_irq_enable(); return; } @@ -421,7 +428,7 @@ static void acpi_processor_idle(void) if (pm_idle_save) pm_idle_save(); else - safe_halt(); + raw_safe_halt(); return; } @@ -986,6 +993,7 @@ int acpi_processor_power_init(struct acp static int first_run = 0; struct proc_dir_entry *entry = NULL; unsigned int i; + unsigned long flags; ACPI_FUNCTION_TRACE("acpi_processor_power_init"); @@ -1019,6 +1027,7 @@ int acpi_processor_power_init(struct acp * Note that we use previously set idle handler will be used on * platforms that only support C1. */ + spin_lock_irqsave(&pm_idle_switch_lock, flags); if ((pr->flags.power) && (!boot_option_idle_override)) { printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id); for (i = 1; i <= pr->power.count; i++) @@ -1030,8 +1039,13 @@ int acpi_processor_power_init(struct acp if (pr->id == 0) { pm_idle_save = pm_idle; pm_idle = acpi_processor_idle; + /* + * Don't allow switching of the pm_idle to poll. + */ + pm_idle_locked = 1; } } + spin_unlock_irqrestore(&pm_idle_switch_lock, flags); /* 'power' [R] */ entry = create_proc_entry(ACPI_PROCESSOR_FILE_POWER, @@ -1074,5 +1088,7 @@ int acpi_processor_power_exit(struct acp cpu_idle_wait(); } + pm_idle_locked = 0; + return_VALUE(0); } Index: linux/drivers/acpi/processor_throttling.c =================================================================== --- linux.orig/drivers/acpi/processor_throttling.c +++ linux/drivers/acpi/processor_throttling.c @@ -69,7 +69,7 @@ static int acpi_processor_get_throttling duty_mask <<= pr->throttling.duty_offset; - local_irq_disable(); + raw_local_irq_disable(); value = inl(pr->throttling.address); @@ -87,7 +87,7 @@ static int acpi_processor_get_throttling pr->throttling.state = state; - local_irq_enable(); + raw_local_irq_enable(); ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Throttling state is T%d (%d%% throttling applied)\n", @@ -131,7 +131,7 @@ int acpi_processor_set_throttling(struct duty_mask = ~duty_mask; } - local_irq_disable(); + raw_local_irq_disable(); /* * Disable throttling by writing a 0 to bit 4. Note that we must @@ -158,7 +158,7 @@ int acpi_processor_set_throttling(struct pr->throttling.state = state; - local_irq_enable(); + raw_local_irq_enable(); ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Throttling state set to T%d (%d%%)\n", state, Index: linux/drivers/acpi/sleep/main.c =================================================================== --- linux.orig/drivers/acpi/sleep/main.c +++ linux/drivers/acpi/sleep/main.c @@ -82,7 +82,7 @@ static int acpi_pm_enter(suspend_state_t return error; } - local_irq_save(flags); + raw_local_irq_save(flags); acpi_enable_wakeup_device(acpi_state); switch (pm_state) { case PM_SUSPEND_STANDBY: @@ -105,7 +105,7 @@ static int acpi_pm_enter(suspend_state_t default: return -EINVAL; } - local_irq_restore(flags); + raw_local_irq_restore(flags); printk(KERN_DEBUG "Back to C!\n"); /* restore processor state Index: linux/drivers/acpi/sleep/poweroff.c =================================================================== --- linux.orig/drivers/acpi/sleep/poweroff.c +++ linux/drivers/acpi/sleep/poweroff.c @@ -46,7 +46,7 @@ void acpi_power_off(void) { /* acpi_sleep_prepare(ACPI_STATE_S5) should have already been called */ printk("%s called\n", __FUNCTION__); - local_irq_disable(); + raw_local_irq_disable(); /* Some SMP machines only can poweroff in boot CPU */ acpi_enter_sleep_state(ACPI_STATE_S5); } Index: linux/drivers/atm/atmtcp.c =================================================================== --- linux.orig/drivers/atm/atmtcp.c +++ linux/drivers/atm/atmtcp.c @@ -368,7 +368,7 @@ static struct atm_dev atmtcp_control_dev .ops = &atmtcp_c_dev_ops, .type = "atmtcp", .number = 999, - .lock = SPIN_LOCK_UNLOCKED + .lock = SPIN_LOCK_UNLOCKED(atmtcp_control_dev.lock) }; Index: linux/drivers/base/class.c =================================================================== --- linux.orig/drivers/base/class.c +++ linux/drivers/base/class.c @@ -520,8 +520,10 @@ int class_device_add(struct class_device class_name = make_class_name(class_dev); sysfs_create_link(&class_dev->kobj, &class_dev->dev->kobj, "device"); + /* sysfs_create_link(&class_dev->dev->kobj, &class_dev->kobj, class_name); + */ } /* notify any interfaces this device is now here */ @@ -618,7 +620,9 @@ void class_device_del(struct class_devic if (class_dev->dev) { class_name = make_class_name(class_dev); sysfs_remove_link(&class_dev->kobj, "device"); + /* sysfs_remove_link(&class_dev->dev->kobj, class_name); + */ } if (class_dev->devt_attr) class_device_remove_file(class_dev, class_dev->devt_attr); Index: linux/drivers/block/cfq-iosched.c =================================================================== --- linux.orig/drivers/block/cfq-iosched.c +++ linux/drivers/block/cfq-iosched.c @@ -1382,10 +1382,9 @@ static void cfq_exit_single_io_context(s { struct cfq_data *cfqd = cic->cfqq->cfqd; request_queue_t *q = cfqd->queue; + unsigned long flags; - WARN_ON(!irqs_disabled()); - - spin_lock(q->queue_lock); + spin_lock_irqsave(q->queue_lock, flags); if (unlikely(cic->cfqq == cfqd->active_queue)) { __cfq_slice_expired(cfqd, cic->cfqq, 0); @@ -1394,7 +1393,7 @@ static void cfq_exit_single_io_context(s cfq_put_queue(cic->cfqq); cic->cfqq = NULL; - spin_unlock(q->queue_lock); + spin_unlock_irqrestore(q->queue_lock, flags); } /* @@ -1405,9 +1404,6 @@ static void cfq_exit_io_context(struct c { struct cfq_io_context *__cic; struct list_head *entry; - unsigned long flags; - - local_irq_save(flags); /* * put the reference this task is holding to the various queues @@ -1418,7 +1414,6 @@ static void cfq_exit_io_context(struct c } cfq_exit_single_io_context(cic); - local_irq_restore(flags); } static struct cfq_io_context * Index: linux/drivers/block/ll_rw_blk.c =================================================================== --- linux.orig/drivers/block/ll_rw_blk.c +++ linux/drivers/block/ll_rw_blk.c @@ -29,6 +29,7 @@ #include #include #include +#include /* * for max sense size @@ -1413,7 +1414,7 @@ static int ll_merge_requests_fn(request_ */ void blk_plug_device(request_queue_t *q) { - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); /* * don't plug a stopped queue, it must be paired with blk_start_queue() @@ -1434,7 +1435,7 @@ EXPORT_SYMBOL(blk_plug_device); */ int blk_remove_plug(request_queue_t *q) { - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) return 0; @@ -3369,13 +3370,17 @@ void exit_io_context(void) unsigned long flags; struct io_context *ioc; - local_irq_save(flags); task_lock(current); + /* + * CHECKME: what does this protect against - can interrupt + * contexts access current->io_context? + */ + local_irq_save_nort(flags); ioc = current->io_context; current->io_context = NULL; ioc->task = NULL; + local_irq_restore_nort(flags); task_unlock(current); - local_irq_restore(flags); if (ioc->aic && ioc->aic->exit) ioc->aic->exit(ioc->aic); Index: linux/drivers/block/loop.c =================================================================== --- linux.orig/drivers/block/loop.c +++ linux/drivers/block/loop.c @@ -514,12 +514,12 @@ static int loop_make_request(request_que lo->lo_pending++; loop_add_bio(lo, old_bio); spin_unlock_irq(&lo->lo_lock); - up(&lo->lo_bh_mutex); + complete(&lo->lo_bh_done); return 0; out: if (lo->lo_pending == 0) - up(&lo->lo_bh_mutex); + complete(&lo->lo_bh_done); spin_unlock_irq(&lo->lo_lock); bio_io_error(old_bio, old_bio->bi_size); return 0; @@ -580,23 +580,20 @@ static int loop_thread(void *data) lo->lo_pending = 1; /* - * up sem, we are running + * complete it, we are running */ - up(&lo->lo_sem); + complete(&lo->lo_done); for (;;) { int pending; - /* - * interruptible just to not contribute to load avg - */ - if (down_interruptible(&lo->lo_bh_mutex)) + if (wait_for_completion_interruptible(&lo->lo_bh_done)) continue; spin_lock_irq(&lo->lo_lock); /* - * could be upped because of tear-down, not pending work + * could be completed because of tear-down, not pending work */ if (unlikely(!lo->lo_pending)) { spin_unlock_irq(&lo->lo_lock); @@ -619,7 +616,7 @@ static int loop_thread(void *data) break; } - up(&lo->lo_sem); + complete(&lo->lo_done); return 0; } @@ -830,7 +827,7 @@ static int loop_set_fd(struct loop_devic set_blocksize(bdev, lo_blocksize); kernel_thread(loop_thread, lo, CLONE_KERNEL); - down(&lo->lo_sem); + wait_for_completion(&lo->lo_done); return 0; out_putf: @@ -896,10 +893,10 @@ static int loop_clr_fd(struct loop_devic lo->lo_state = Lo_rundown; lo->lo_pending--; if (!lo->lo_pending) - up(&lo->lo_bh_mutex); + complete(&lo->lo_bh_done); spin_unlock_irq(&lo->lo_lock); - down(&lo->lo_sem); + wait_for_completion(&lo->lo_done); lo->lo_backing_file = NULL; @@ -1276,8 +1273,8 @@ static int __init loop_init(void) if (!lo->lo_queue) goto out_mem4; init_MUTEX(&lo->lo_ctl_mutex); - init_MUTEX_LOCKED(&lo->lo_sem); - init_MUTEX_LOCKED(&lo->lo_bh_mutex); + init_completion(&lo->lo_done); + init_completion(&lo->lo_bh_done); lo->lo_number = i; spin_lock_init(&lo->lo_lock); disk->major = LOOP_MAJOR; Index: linux/drivers/block/paride/pseudo.h =================================================================== --- linux.orig/drivers/block/paride/pseudo.h +++ linux/drivers/block/paride/pseudo.h @@ -43,7 +43,7 @@ static unsigned long ps_timeout; static int ps_tq_active = 0; static int ps_nice = 0; -static DEFINE_SPINLOCK(ps_spinlock __attribute__((unused))); +static __attribute__((unused)) DEFINE_SPINLOCK(ps_spinlock); static DECLARE_WORK(ps_tq, ps_tq_int, NULL); Index: linux/drivers/block/sx8.c =================================================================== --- linux.orig/drivers/block/sx8.c +++ linux/drivers/block/sx8.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -280,7 +281,7 @@ struct carm_host { struct work_struct fsm_task; - struct semaphore probe_sem; + struct completion probe_comp; }; struct carm_response { @@ -1342,7 +1343,7 @@ static void carm_fsm_task (void *_data) } case HST_PROBE_FINISHED: - up(&host->probe_sem); + complete(&host->probe_comp); break; case HST_ERROR: @@ -1618,7 +1619,7 @@ static int carm_init_one (struct pci_dev host->flags = pci_dac ? FL_DAC : 0; spin_lock_init(&host->lock); INIT_WORK(&host->fsm_task, carm_fsm_task, host); - init_MUTEX_LOCKED(&host->probe_sem); + init_completion(&host->probe_comp); for (i = 0; i < ARRAY_SIZE(host->req); i++) host->req[i].tag = i; @@ -1687,8 +1688,8 @@ static int carm_init_one (struct pci_dev if (rc) goto err_out_free_irq; - DPRINTK("waiting for probe_sem\n"); - down(&host->probe_sem); + DPRINTK("waiting for probe_comp\n"); + wait_for_completion(&host->probe_comp); printk(KERN_INFO "%s: pci %s, ports %d, io %lx, irq %u, major %d\n", host->name, pci_name(pdev), (int) CARM_MAX_PORTS, Index: linux/drivers/char/Kconfig =================================================================== --- linux.orig/drivers/char/Kconfig +++ linux/drivers/char/Kconfig @@ -711,6 +711,45 @@ config RTC To compile this driver as a module, choose M here: the module will be called rtc. +config RTC_HISTOGRAM + tristate "Real Time Clock Histogram Support" + default n + depends on RTC + ---help--- + If you say Y here then the kernel will track the delivery and + wakeup latency of /dev/rtc using tasks and will report a + histogram to the kernel log when the application closes /dev/rtc. + +config BLOCKER + tristate "Priority Inheritance Debugging (Blocker) Device Support" + default y + ---help--- + If you say Y here then a device will be created that the userspace + pi_test suite uses to test and measure kernel locking primitives. + +config LPPTEST + tristate "Parallel Port Based Latency Measurement Device" + depends on !PARPORT && X86 + default y + ---help--- + If you say Y here then a device will be created that the userspace + testlpp utility uses to measure IRQ latencies of a target system + from an independent measurement system. + + NOTE: this code assumes x86 PCs and that the parallel port is + bidirectional and is on IRQ 7. + + to use the device, both the target and the source system needs to + run a kernel with CONFIG_LPPTEST enabled. To measure latencies, + use the scripts/testlpp utility in your kernel source directory, + and run it (as root) on the source system - it will start printing + out the latencies it took to get a response from the target system: + + Latency of response: 12.2 usecs (121265 cycles) + + then generate various workloads on the target system to see how + (worst-case-) latencies are impacted. + config SGI_DS1286 tristate "SGI DS1286 RTC support" depends on SGI_IP22 Index: linux/drivers/char/Makefile =================================================================== --- linux.orig/drivers/char/Makefile +++ linux/drivers/char/Makefile @@ -57,6 +57,8 @@ obj-$(CONFIG_R3964) += n_r3964.o obj-$(CONFIG_APPLICOM) += applicom.o obj-$(CONFIG_SONYPI) += sonypi.o obj-$(CONFIG_RTC) += rtc.o +obj-$(CONFIG_BLOCKER) += blocker.o +obj-$(CONFIG_LPPTEST) += lpptest.o obj-$(CONFIG_HPET) += hpet.o obj-$(CONFIG_GEN_RTC) += genrtc.o obj-$(CONFIG_EFI_RTC) += efirtc.o Index: linux/drivers/char/blocker.c =================================================================== --- /dev/null +++ linux/drivers/char/blocker.c @@ -0,0 +1,108 @@ +/* + * priority inheritance testing device + */ + +#include +#include +#include + +#define BLOCKER_MINOR 221 + +#define BLOCK_IOCTL 4245 +#define BLOCK_SET_DEPTH 4246 + +#define MAX_LOCK_DEPTH 10 + +void loop(int loops) +{ + int i; + + for (i = 0; i < loops; i++) + get_cycles(); +} + +static spinlock_t blocker_lock[MAX_LOCK_DEPTH]; + +static unsigned int lock_depth = 1; + +void do_the_lock_and_loop(unsigned int args) +{ + int i, max; + + if (rt_task(current)) + max = lock_depth; + else if (lock_depth > 1) + max = (current->pid % lock_depth) + 1; + else + max = 1; + + /* Always lock from the top down */ + for (i = max-1; i >= 0; i--) + spin_lock(&blocker_lock[i]); + loop(args); + for (i = 0; i < max; i++) + spin_unlock(&blocker_lock[i]); +} + +static int blocker_open(struct inode *in, struct file *file) +{ + printk(KERN_INFO "blocker_open called\n"); + + return 0; +} + +static long blocker_ioctl(struct file *file, + unsigned int cmd, unsigned long args) +{ + switch(cmd) { + case BLOCK_IOCTL: + do_the_lock_and_loop(args); + return 0; + case BLOCK_SET_DEPTH: + if (args >= MAX_LOCK_DEPTH) + return -EINVAL; + lock_depth = args; + return 0; + default: + return -EINVAL; + } +} + +static struct file_operations blocker_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .unlocked_ioctl = blocker_ioctl, + .open = blocker_open, +}; + +static struct miscdevice blocker_dev = +{ + BLOCKER_MINOR, + "blocker", + &blocker_fops +}; + +static int __init blocker_init(void) +{ + int i; + + if (misc_register(&blocker_dev)) + return -ENODEV; + + for (i = 0; i < MAX_LOCK_DEPTH; i++) + spin_lock_init(blocker_lock + i); + + return 0; +} + +void __exit blocker_exit(void) +{ + printk(KERN_INFO "blocker device uninstalled\n"); + misc_deregister(&blocker_dev); +} + +module_init(blocker_init); +module_exit(blocker_exit); + +MODULE_LICENSE("GPL"); + Index: linux/drivers/char/epca.c =================================================================== --- linux.orig/drivers/char/epca.c +++ linux/drivers/char/epca.c @@ -80,7 +80,7 @@ static int invalid_lilo_config; /* The ISA boards do window flipping into the same spaces so its only sane with a single lock. It's still pretty efficient */ -static spinlock_t epca_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(epca_lock); /* ----------------------------------------------------------------------- MAXBOARDS is typically 12, but ISA and EISA cards are restricted to Index: linux/drivers/char/hangcheck-timer.c =================================================================== --- linux.orig/drivers/char/hangcheck-timer.c +++ linux/drivers/char/hangcheck-timer.c @@ -49,6 +49,7 @@ #include #include #include +#include #define VERSION_STR "0.9.0" @@ -130,8 +131,12 @@ __setup("hcheck_dump_tasks", hangcheck_p #endif #ifdef HAVE_MONOTONIC +#ifndef CONFIG_GENERIC_TIME extern unsigned long long monotonic_clock(void); #else +#define monotonic_clock() ktime_to_ns(get_monotonic_clock()) +#endif +#else static inline unsigned long long monotonic_clock(void) { # ifdef __s390__ Index: linux/drivers/char/ipmi/ipmi_si_intf.c =================================================================== --- linux.orig/drivers/char/ipmi/ipmi_si_intf.c +++ linux/drivers/char/ipmi/ipmi_si_intf.c @@ -52,7 +52,7 @@ #include #include #include -#ifdef CONFIG_HIGH_RES_TIMERS +#ifdef CONFIG_HIGH_RES_TIMERS_OLD #include # if defined(schedule_next_int) /* Old high-res timer code, do translations. */ @@ -785,7 +785,7 @@ static int initialized = 0; /* Must be called with interrupts off and with the si_lock held. */ static void si_restart_short_timer(struct smi_info *smi_info) { -#if defined(CONFIG_HIGH_RES_TIMERS) +#if defined(CONFIG_HIGH_RES_TIMERS_OLD) unsigned long flags; unsigned long jiffies_now; unsigned long seq; @@ -855,13 +855,13 @@ static void smi_timeout(unsigned long da /* If the state machine asks for a short delay, then shorten the timer timeout. */ if (smi_result == SI_SM_CALL_WITH_DELAY) { -#if defined(CONFIG_HIGH_RES_TIMERS) +#if defined(CONFIG_HIGH_RES_TIMERS_OLD) unsigned long seq; #endif spin_lock_irqsave(&smi_info->count_lock, flags); smi_info->short_timeouts++; spin_unlock_irqrestore(&smi_info->count_lock, flags); -#if defined(CONFIG_HIGH_RES_TIMERS) +#if defined(CONFIG_HIGH_RES_TIMERS_OLD) do { seq = read_seqbegin_irqsave(&xtime_lock, flags); smi_info->si_timer.expires = jiffies; @@ -877,7 +877,7 @@ static void smi_timeout(unsigned long da smi_info->long_timeouts++; spin_unlock_irqrestore(&smi_info->count_lock, flags); smi_info->si_timer.expires = jiffies + SI_TIMEOUT_JIFFIES; -#if defined(CONFIG_HIGH_RES_TIMERS) +#if defined(CONFIG_HIGH_RES_TIMERS_OLD) smi_info->si_timer.arch_cycle_expires = 0; #endif } Index: linux/drivers/char/ipmi/ipmi_watchdog.c =================================================================== --- linux.orig/drivers/char/ipmi/ipmi_watchdog.c +++ linux/drivers/char/ipmi/ipmi_watchdog.c @@ -366,7 +366,8 @@ static void panic_halt_ipmi_set_timeout( when both messages are free. */ static atomic_t heartbeat_tofree = ATOMIC_INIT(0); static DECLARE_MUTEX(heartbeat_lock); -static DECLARE_MUTEX_LOCKED(heartbeat_wait_lock); +/* PREEMPT_RT: should be a completion instead */ +static COMPAT_DECLARE_MUTEX_LOCKED(heartbeat_wait_lock); static void heartbeat_free_smi(struct ipmi_smi_msg *msg) { if (atomic_dec_and_test(&heartbeat_tofree)) Index: linux/drivers/char/lpptest.c =================================================================== --- /dev/null +++ linux/drivers/char/lpptest.c @@ -0,0 +1,163 @@ +/* + * /dev/lpptest device: test IRQ handling latencies over parallel port + * + * Copyright (C) 2005 Thomas Gleixner, Ingo Molnar + * + * licensed under the GPL + * + * You need to have CONFIG_PARPORT disabled for this device, it is a + * completely self-contained device that assumes sole ownership of the + * parallel port. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define LPPTEST_CHAR_MAJOR 245 +#define LPPTEST_DEVICE_NAME "lpptest" + +#define LPPTEST_IRQ 7 + +#define LPPTEST_TEST _IOR (LPPTEST_CHAR_MAJOR, 1, unsigned long long) +#define LPPTEST_DISABLE _IOR (LPPTEST_CHAR_MAJOR, 2, unsigned long long) +#define LPPTEST_ENABLE _IOR (LPPTEST_CHAR_MAJOR, 3, unsigned long long) + +static char dev_id[] = "lpptest"; + +#define INIT_PORT() outb(0x04, 0x37a) +#define ENABLE_IRQ() outb(0x10, 0x37a) +#define DISABLE_IRQ() outb(0, 0x37a) + +static unsigned char out = 0x5a; + +/** + * Interrupt handler. Flip a bit in the reply. + */ +static int lpptest_irq (int irq, void *dev_id, struct pt_regs *regs) +{ + out ^= 0xff; + outb(out, 0x378); + + return IRQ_HANDLED; +} + +static cycles_t test_response(void) +{ + cycles_t now, end; + unsigned char in; + int timeout = 0; + + raw_local_irq_disable(); + in = inb(0x379); + inb(0x378); + outb(0x08, 0x378); + now = get_cycles(); + while(1) { + if (inb(0x379) != in) + break; + if (timeout++ > 1000000) { + outb(0x00, 0x378); + raw_local_irq_enable(); + + return 0; + } + } + end = get_cycles(); + outb(0x00, 0x378); + raw_local_irq_enable(); + + return end - now; +} + +static int lpptest_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static int lpptest_close(struct inode *inode, struct file *file) +{ + return 0; +} + +int lpptest_ioctl(struct inode *inode, struct file *file, unsigned int ioctl_num, unsigned long ioctl_param) +{ + int retval = 0; + + switch (ioctl_num) { + + case LPPTEST_DISABLE: + DISABLE_IRQ(); + break; + + case LPPTEST_ENABLE: + ENABLE_IRQ(); + break; + + case LPPTEST_TEST: { + + cycles_t diff = test_response(); + if (copy_to_user((void *)ioctl_param, (void*) &diff, sizeof(diff))) + goto errcpy; + break; + } + default: retval = -EINVAL; + } + + return retval; + + errcpy: + return -EFAULT; +} + +static struct file_operations lpptest_dev_fops = { + .ioctl = lpptest_ioctl, + .open = lpptest_open, + .release = lpptest_close, +}; + +static int __init lpptest_init (void) +{ + if (register_chrdev(LPPTEST_CHAR_MAJOR, LPPTEST_DEVICE_NAME, &lpptest_dev_fops)) + { + printk(KERN_NOTICE "Can't allocate major number %d for lpptest.\n", + LPPTEST_CHAR_MAJOR); + return -EAGAIN; + } + + if (request_irq (LPPTEST_IRQ, lpptest_irq, 0, "lpptest", dev_id)) { + printk (KERN_WARNING "lpptest: irq %d in use. Unload parport module!\n", LPPTEST_IRQ); + unregister_chrdev(LPPTEST_CHAR_MAJOR, LPPTEST_DEVICE_NAME); + return -EAGAIN; + } + irq_desc[LPPTEST_IRQ].status |= IRQ_NODELAY; + irq_desc[LPPTEST_IRQ].action->flags |= SA_NODELAY | SA_INTERRUPT; + + INIT_PORT(); + ENABLE_IRQ(); + + return 0; +} +module_init (lpptest_init); + +static void __exit lpptest_exit (void) +{ + DISABLE_IRQ(); + + free_irq(LPPTEST_IRQ, dev_id); + unregister_chrdev(LPPTEST_CHAR_MAJOR, LPPTEST_DEVICE_NAME); +} +module_exit (lpptest_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("lpp test module"); + Index: linux/drivers/char/random.c =================================================================== --- linux.orig/drivers/char/random.c +++ linux/drivers/char/random.c @@ -417,7 +417,7 @@ static struct entropy_store input_pool = .poolinfo = &poolinfo_table[0], .name = "input", .limit = 1, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(input_pool.lock), .pool = input_pool_data }; @@ -426,7 +426,7 @@ static struct entropy_store blocking_poo .name = "blocking", .limit = 1, .pull = &input_pool, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(blocking_pool.lock), .pool = blocking_pool_data }; @@ -434,7 +434,7 @@ static struct entropy_store nonblocking_ .poolinfo = &poolinfo_table[1], .name = "nonblocking", .pull = &input_pool, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(nonblocking_pool.lock), .pool = nonblocking_pool_data }; @@ -581,8 +581,11 @@ static void add_timer_randomness(struct preempt_disable(); /* if over the trickle threshold, use only 1 in 4096 samples */ if (input_pool.entropy_count > trickle_thresh && - (__get_cpu_var(trickle_count)++ & 0xfff)) - goto out; + (__get_cpu_var(trickle_count)++ & 0xfff)) { + preempt_enable(); + return; + } + preempt_enable(); sample.jiffies = jiffies; sample.cycles = get_cycles(); @@ -627,9 +630,6 @@ static void add_timer_randomness(struct if(input_pool.entropy_count >= random_read_wakeup_thresh) wake_up_interruptible(&random_read_wait); - -out: - preempt_enable(); } extern void add_input_randomness(unsigned int type, unsigned int code, Index: linux/drivers/char/rtc.c =================================================================== --- linux.orig/drivers/char/rtc.c +++ linux/drivers/char/rtc.c @@ -84,10 +84,36 @@ #include #include +#ifdef CONFIG_MIPS +# include +#endif + #if defined(__i386__) #include #endif +#ifdef CONFIG_RTC_HISTOGRAM + +static cycles_t last_interrupt_time; + +#include + +#define CPU_MHZ (cpu_khz / 1000) + +#define HISTSIZE 10000 +static int histogram[HISTSIZE]; + +static int rtc_state; + +enum rtc_states { + S_STARTUP, /* First round - let the application start */ + S_IDLE, /* Waiting for an interrupt */ + S_WAITING_FOR_READ, /* Signal delivered. waiting for rtc_read() */ + S_READ_MISSED, /* Signal delivered, read() deadline missed */ +}; + +#endif + #ifdef __sparc__ #include #include @@ -205,7 +231,146 @@ static inline unsigned char rtc_is_updat return uip; } +#ifndef RTC_IRQ +# undef CONFIG_RTC_HISTOGRAM +#endif + +static inline void rtc_open_event(void) +{ +#ifdef CONFIG_RTC_HISTOGRAM + int i; + + last_interrupt_time = 0; + rtc_state = S_STARTUP; + rtc_irq_data = 0; + + for (i = 0; i < HISTSIZE; i++) + histogram[i] = 0; +#endif +} + +static inline void rtc_wake_event(void) +{ +#ifndef CONFIG_RTC_HISTOGRAM + kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); +#else + if (!(rtc_status & RTC_IS_OPEN)) + return; + + switch (rtc_state) { + /* Startup */ + case S_STARTUP: + kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); + break; + /* Waiting for an interrupt */ + case S_IDLE: + kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); + last_interrupt_time = get_cycles(); + rtc_state = S_WAITING_FOR_READ; + break; + + /* Signal has been delivered. waiting for rtc_read() */ + case S_WAITING_FOR_READ: + /* + * Well foo. The usermode application didn't + * schedule and read in time. + */ + last_interrupt_time = get_cycles(); + rtc_state = S_READ_MISSED; + printk("Read missed before next interrupt\n"); + break; + /* Signal has been delivered, read() deadline was missed */ + case S_READ_MISSED: + /* + * Not much we can do here. We're waiting for the usermode + * application to read the rtc + */ + last_interrupt_time = get_cycles(); + break; + } +#endif +} + +static inline void rtc_read_event(void) +{ +#ifdef CONFIG_RTC_HISTOGRAM + cycles_t now = get_cycles(); + + switch (rtc_state) { + /* Startup */ + case S_STARTUP: + rtc_state = S_IDLE; + break; + + /* Waiting for an interrupt */ + case S_IDLE: + printk("bug in rtc_read(): called in state S_IDLE!\n"); + break; + case S_WAITING_FOR_READ: /* + * Signal has been delivered. + * waiting for rtc_read() + */ + /* + * Well done + */ + case S_READ_MISSED: /* + * Signal has been delivered, read() + * deadline was missed + */ + /* + * So, you finally got here. + */ + if (!last_interrupt_time) + printk("bug in rtc_read(): last_interrupt_time = 0\n"); + rtc_state = S_IDLE; + { + cycles_t latency = now - last_interrupt_time; + unsigned long delta; /* Microseconds */ + + delta = latency; + delta /= CPU_MHZ; + + if (delta > 1000 * 1000) { + printk("rtc: eek\n"); + } else { + unsigned long slot = delta; + if (slot >= HISTSIZE) + slot = HISTSIZE - 1; + histogram[slot]++; + if (delta > 2000) + printk("wow! That was a " + "%ld millisec bump\n", + delta / 1000); + } + } + rtc_state = S_IDLE; + break; + } +#endif +} + +static inline void rtc_close_event(void) +{ +#ifdef CONFIG_RTC_HISTOGRAM + int i = 0; + unsigned long total = 0; + + for (i = 0; i < HISTSIZE; i++) + total += histogram[i]; + if (!total) + return; + + printk("\nrtc latency histogram of {%s/%d, %lu samples}:\n", + current->comm, current->pid, total); + for (i = 0; i < HISTSIZE; i++) { + if (histogram[i]) + printk("%d %d\n", i, histogram[i]); + } +#endif +} + #ifdef RTC_IRQ + /* * A very tiny interrupt handler. It runs with SA_INTERRUPT set, * but there is possibility of conflicting with the set_rtc_mmss() @@ -218,6 +383,8 @@ static inline unsigned char rtc_is_updat irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) { + int mod; + /* * Can be an alarm interrupt, update complete interrupt, * or a periodic interrupt. We store the status in the @@ -239,19 +406,22 @@ irqreturn_t rtc_interrupt(int irq, void rtc_irq_data |= (CMOS_READ(RTC_INTR_FLAGS) & 0xF0); } + mod = 0; if (rtc_status & RTC_TIMER_ON) - mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100); + mod = 1; spin_unlock (&rtc_lock); + if (mod) + mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100); /* Now do the rest of the actions */ spin_lock(&rtc_task_lock); if (rtc_callback) rtc_callback->func(rtc_callback->private_data); spin_unlock(&rtc_task_lock); - wake_up_interruptible(&rtc_wait); - kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); + rtc_wake_event(); + wake_up_interruptible(&rtc_wait); return IRQ_HANDLED; } @@ -355,6 +525,8 @@ static ssize_t rtc_read(struct file *fil schedule(); } while (1); + rtc_read_event(); + if (count < sizeof(unsigned long)) retval = put_user(data, (unsigned int __user *)buf) ?: sizeof(int); else @@ -405,8 +577,8 @@ static int rtc_do_ioctl(unsigned int cmd if (rtc_status & RTC_TIMER_ON) { spin_lock_irq (&rtc_lock); rtc_status &= ~RTC_TIMER_ON; - del_timer(&rtc_irq_timer); spin_unlock_irq (&rtc_lock); + del_timer(&rtc_irq_timer); } return 0; } @@ -424,9 +596,9 @@ static int rtc_do_ioctl(unsigned int cmd if (!(rtc_status & RTC_TIMER_ON)) { spin_lock_irq (&rtc_lock); rtc_irq_timer.expires = jiffies + HZ/rtc_freq + 2*HZ/100; - add_timer(&rtc_irq_timer); rtc_status |= RTC_TIMER_ON; spin_unlock_irq (&rtc_lock); + add_timer(&rtc_irq_timer); } set_rtc_irq_bit(RTC_PIE); return 0; @@ -584,6 +756,11 @@ static int rtc_do_ioctl(unsigned int cmd save_freq_select = CMOS_READ(RTC_FREQ_SELECT); CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + /* + * Make CMOS date writes nonpreemptible even on PREEMPT_RT. + * There's a limit to everything! =B-) + */ + preempt_disable(); #ifdef CONFIG_MACH_DECSTATION CMOS_WRITE(real_yrs, RTC_DEC_YEAR); #endif @@ -593,6 +770,7 @@ static int rtc_do_ioctl(unsigned int cmd CMOS_WRITE(hrs, RTC_HOURS); CMOS_WRITE(min, RTC_MINUTES); CMOS_WRITE(sec, RTC_SECONDS); + preempt_enable(); CMOS_WRITE(save_control, RTC_CONTROL); CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); @@ -690,6 +868,7 @@ static int rtc_open(struct inode *inode, if(rtc_status & RTC_IS_OPEN) goto out_busy; + rtc_open_event(); rtc_status |= RTC_IS_OPEN; rtc_irq_data = 0; @@ -711,6 +890,7 @@ static int rtc_release(struct inode *ino { #ifdef RTC_IRQ unsigned char tmp; + int del; if (rtc_has_irq == 0) goto no_irq; @@ -729,11 +909,14 @@ static int rtc_release(struct inode *ino CMOS_WRITE(tmp, RTC_CONTROL); CMOS_READ(RTC_INTR_FLAGS); } + del = 0; if (rtc_status & RTC_TIMER_ON) { rtc_status &= ~RTC_TIMER_ON; - del_timer(&rtc_irq_timer); + del = 1; } spin_unlock_irq(&rtc_lock); + if (del) + del_timer(&rtc_irq_timer); if (file->f_flags & FASYNC) { rtc_fasync (-1, file, 0); @@ -745,6 +928,7 @@ no_irq: rtc_irq_data = 0; rtc_status &= ~RTC_IS_OPEN; spin_unlock_irq (&rtc_lock); + rtc_close_event(); return 0; } @@ -809,6 +993,7 @@ int rtc_unregister(rtc_task_t *task) return -EIO; #else unsigned char tmp; + int del; spin_lock_irq(&rtc_lock); spin_lock(&rtc_task_lock); @@ -828,12 +1013,15 @@ int rtc_unregister(rtc_task_t *task) CMOS_WRITE(tmp, RTC_CONTROL); CMOS_READ(RTC_INTR_FLAGS); } + del = 0; if (rtc_status & RTC_TIMER_ON) { rtc_status &= ~RTC_TIMER_ON; - del_timer(&rtc_irq_timer); + del = 1; } rtc_status &= ~RTC_IS_OPEN; spin_unlock(&rtc_task_lock); + if (del) + del_timer(&rtc_irq_timer); spin_unlock_irq(&rtc_lock); return 0; #endif @@ -1092,6 +1280,7 @@ module_exit(rtc_exit); static void rtc_dropped_irq(unsigned long data) { unsigned long freq; + int mod; spin_lock_irq (&rtc_lock); @@ -1101,8 +1290,9 @@ static void rtc_dropped_irq(unsigned lon } /* Just in case someone disabled the timer from behind our back... */ + mod = 0; if (rtc_status & RTC_TIMER_ON) - mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100); + mod = 1; rtc_irq_data += ((rtc_freq/HZ)<<8); rtc_irq_data &= ~0xff; @@ -1111,6 +1301,8 @@ static void rtc_dropped_irq(unsigned lon freq = rtc_freq; spin_unlock_irq(&rtc_lock); + if (mod) + mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100); printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", freq); Index: linux/drivers/char/s3c2410-rtc.c =================================================================== --- linux.orig/drivers/char/s3c2410-rtc.c +++ linux/drivers/char/s3c2410-rtc.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include Index: linux/drivers/char/specialix.c =================================================================== --- linux.orig/drivers/char/specialix.c +++ linux/drivers/char/specialix.c @@ -2491,7 +2491,7 @@ static int __init specialix_init(void) #endif for (i = 0; i < SX_NBOARD; i++) - sx_board[i].lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&sx_board[i].lock); if (sx_init_drivers()) { func_exit(); Index: linux/drivers/char/sx.c =================================================================== --- linux.orig/drivers/char/sx.c +++ linux/drivers/char/sx.c @@ -2321,7 +2321,7 @@ static int sx_init_portstructs (int nboa #ifdef NEW_WRITE_LOCKING port->gs.port_write_sem = MUTEX; #endif - port->gs.driver_lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&port->gs.driver_lock); /* * Initializing wait queue */ Index: linux/drivers/char/sysrq.c =================================================================== --- linux.orig/drivers/char/sysrq.c +++ linux/drivers/char/sysrq.c @@ -114,7 +114,7 @@ static struct sysrq_key_op sysrq_crashdu static void sysrq_handle_reboot(int key, struct pt_regs *pt_regs, struct tty_struct *tty) { - local_irq_enable(); + raw_local_irq_enable(); emergency_restart(); } @@ -169,6 +169,38 @@ static struct sysrq_key_op sysrq_showreg .enable_mask = SYSRQ_ENABLE_DUMP, }; +#ifdef CONFIG_DEBUG_DEADLOCKS + +static void sysrq_handle_showlocks(int key, struct pt_regs *pt_regs, + struct tty_struct *tty) +{ + show_all_locks(); +} + +static struct sysrq_key_op sysrq_showlocks_op = { + .handler = sysrq_handle_showlocks, + .help_msg = "show-all-locks(D)", + .action_msg = "Show Locks Held", +}; + +#endif + +#if defined(__i386__) + +static void sysrq_handle_showallregs(int key, struct pt_regs *pt_regs, + struct tty_struct *tty) +{ + nmi_show_all_regs(); +} + +static struct sysrq_key_op sysrq_showallregs_op = { + .handler = sysrq_handle_showallregs, + .help_msg = "showalLcpupc", + .action_msg = "Show Regs On All CPUs", +}; + +#endif + static void sysrq_handle_showstate(int key, struct pt_regs *pt_regs, struct tty_struct *tty) @@ -294,7 +326,11 @@ static struct sysrq_key_op *sysrq_key_ta #else /* c */ NULL, #endif +#ifdef CONFIG_DEBUG_DEADLOCKS +/* d */ &sysrq_showlocks_op, +#else /* d */ NULL, +#endif /* e */ &sysrq_term_op, /* f */ &sysrq_moom_op, /* g */ NULL, @@ -306,7 +342,11 @@ static struct sysrq_key_op *sysrq_key_ta #else /* k */ NULL, #endif +#if defined(__i386__) +/* l */ &sysrq_showallregs_op, +#else /* l */ NULL, +#endif /* m */ &sysrq_showmem_op, /* n */ &sysrq_unrt_op, /* o */ NULL, /* This will often be registered Index: linux/drivers/char/tty_io.c =================================================================== --- linux.orig/drivers/char/tty_io.c +++ linux/drivers/char/tty_io.c @@ -224,6 +224,7 @@ static int check_tty_count(struct tty_st printk(KERN_WARNING "Warning: dev (%s) tty->count(%d) " "!= #fd's(%d) in %s\n", tty->name, tty->count, count, routine); + dump_stack(); return count; } #endif @@ -867,8 +868,8 @@ static void do_tty_hangup(void *data) p->signal->tty = NULL; if (!p->signal->leader) continue; - send_group_sig_info(SIGHUP, SEND_SIG_PRIV, p); - send_group_sig_info(SIGCONT, SEND_SIG_PRIV, p); + group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p); + group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p); if (tty->pgrp > 0) p->signal->tty_old_pgrp = tty->pgrp; } while_each_task_pid(tty->session, PIDTYPE_SID, p); Index: linux/drivers/char/watchdog/cpu5wdt.c =================================================================== --- linux.orig/drivers/char/watchdog/cpu5wdt.c +++ linux/drivers/char/watchdog/cpu5wdt.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -56,7 +57,7 @@ static int ticks = 10000; /* some device data */ static struct { - struct semaphore stop; + struct completion stop; volatile int running; struct timer_list timer; volatile int queue; @@ -84,7 +85,7 @@ static void cpu5wdt_trigger(unsigned lon } else { /* ticks doesn't matter anyway */ - up(&cpu5wdt_device.stop); + complete(&cpu5wdt_device.stop); } } @@ -238,7 +239,7 @@ static int __devinit cpu5wdt_init(void) if ( !val ) printk(KERN_INFO PFX "sorry, was my fault\n"); - init_MUTEX_LOCKED(&cpu5wdt_device.stop); + init_completion(&cpu5wdt_device.stop); cpu5wdt_device.queue = 0; clear_bit(0, &cpu5wdt_device.inuse); @@ -268,7 +269,7 @@ static void __devexit cpu5wdt_exit(void) { if ( cpu5wdt_device.queue ) { cpu5wdt_device.queue = 0; - down(&cpu5wdt_device.stop); + wait_for_completion(&cpu5wdt_device.stop); } misc_deregister(&cpu5wdt_misc); Index: linux/drivers/clocksource/Makefile =================================================================== --- /dev/null +++ linux/drivers/clocksource/Makefile @@ -0,0 +1,3 @@ +#XXX doesn't boot! obj-$(CONFIG_X86) += tsc-interp.o +obj-$(CONFIG_X86_CYCLONE_TIMER) += cyclone.o +obj-$(CONFIG_X86_PM_TIMER) += acpi_pm.o Index: linux/drivers/clocksource/acpi_pm.c =================================================================== --- /dev/null +++ linux/drivers/clocksource/acpi_pm.c @@ -0,0 +1,154 @@ +/* + * linux/drivers/clocksource/acpi_pm.c + * + * This file contains the ACPI PM based clocksource. + * + * This code was largely moved from the i386 timer_pm.c file + * which was (C) Dominik Brodowski 2003 + * and contained the following comments: + * + * Driver to use the Power Management Timer (PMTMR) available in some + * southbridges as primary timing source for the Linux kernel. + * + * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, + * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. + * + * This file is licensed under the GPL v2. + */ + +#include +#include +#include +#include + +/* Number of PMTMR ticks expected during calibration run */ +#define PMTMR_TICKS_PER_SEC 3579545 + +#if (defined(CONFIG_X86) && (!defined(CONFIG_X86_64))) +# include "mach_timer.h" +# define PMTMR_EXPECTED_RATE ((PMTMR_TICKS_PER_SEC*CALIBRATE_TIME_MSEC)/1000) +#endif + +/* + * The I/O port the PMTMR resides at. + * The location is detected during setup_arch(), + * in arch/i386/acpi/boot.c + */ +extern u32 acpi_pmtmr_ioport; +extern int acpi_pmtmr_buggy; + +#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ + +static inline u32 read_pmtmr(void) +{ + /* mask the output to 24 bits */ + return inl(acpi_pmtmr_ioport) & ACPI_PM_MASK; +} + +static cycle_t acpi_pm_read_verified(void) +{ + u32 v1 = 0, v2 = 0, v3 = 0; + + /* + * It has been reported that because of various broken + * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM clock + * source is not latched, so you must read it multiple + * times to ensure a safe value is read: + */ + do { + v1 = read_pmtmr(); + v2 = read_pmtmr(); + v3 = read_pmtmr(); + } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) + || (v3 > v1 && v3 < v2)); + + return (cycle_t)v2; +} + +static cycle_t acpi_pm_read(void) +{ + return (cycle_t)read_pmtmr(); +} + +struct clocksource clocksource_acpi_pm = { + .name = "acpi_pm", + .rating = 200, + .read = acpi_pm_read, + .mask = (cycle_t)ACPI_PM_MASK, + .mult = 0, /*to be caluclated*/ + .shift = 22, + .is_continuous = 1, +}; + +#if defined(CONFIG_X86) && !defined(CONFIG_X86_64) +/* + * Some boards have the PMTMR running way too fast. We check + * the PMTMR rate against PIT channel 2 to catch these cases. + */ +static int __init verify_pmtmr_rate(void) +{ + unsigned long count, delta; + u32 value1, value2; + + mach_prepare_counter(); + value1 = read_pmtmr(); + mach_countup(&count); + value2 = read_pmtmr(); + delta = (value2 - value1) & ACPI_PM_MASK; + + /* check that the PMTMR delta is within 5% of what we expect: */ + if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 || + delta > (PMTMR_EXPECTED_RATE * 21) / 20) { + printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE); + return -1; + } + + return 0; +} +#else +# define verify_pmtmr_rate() (0) +#endif + +static int __init init_acpi_pm_clocksource(void) +{ + u32 value1, value2; + unsigned int i; + + if (!acpi_pmtmr_ioport) + return -ENODEV; + + clocksource_acpi_pm.mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, + clocksource_acpi_pm.shift); + + /* "verify" this timing source: */ + value1 = read_pmtmr(); + for (i = 0; i < 10000; i++) { + value2 = read_pmtmr(); + if (value2 == value1) + continue; + if (value2 > value1) + goto pm_good; + if ((value2 < value1) && ((value2) < 0xFFF)) + goto pm_good; + printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2); + return -EINVAL; + } + printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1); + return -ENODEV; + +pm_good: + if (verify_pmtmr_rate() != 0) + return -ENODEV; + + /* check to see if pmtmr is known buggy: */ + if (acpi_pmtmr_buggy) { + clocksource_acpi_pm.read = acpi_pm_read_verified; + clocksource_acpi_pm.rating = 110; + } + + register_clocksource(&clocksource_acpi_pm); + + return 0; +} + +module_init(init_acpi_pm_clocksource); Index: linux/drivers/clocksource/cyclone.c =================================================================== --- /dev/null +++ linux/drivers/clocksource/cyclone.c @@ -0,0 +1,121 @@ +#include +#include +#include +#include +#include + +#include +#include + +#include "mach_timer.h" + +#define CYCLONE_CBAR_ADDR 0xFEB00CD0 /* base address ptr */ +#define CYCLONE_PMCC_OFFSET 0x51A0 /* offset to control register */ +#define CYCLONE_MPCS_OFFSET 0x51A8 /* offset to select register */ +#define CYCLONE_MPMC_OFFSET 0x51D0 /* offset to count register */ +#define CYCLONE_TIMER_FREQ 99780000 /* 100Mhz, but not really */ +#define CYCLONE_TIMER_MASK 0xFFFFFFFF /* 32 bit mask */ + +int use_cyclone = 0; +static void __iomem *cyclone_ptr; + +static cycle_t read_cyclone(void) +{ + return (cycle_t)readl(cyclone_ptr); +} + +struct clocksource clocksource_cyclone = { + .name = "cyclone", + .rating = 250, + .read = read_cyclone, + .mask = (cycle_t)CYCLONE_TIMER_MASK, + .mult = 10, + .shift = 0, + .is_continuous = 1, +}; + +static int __init init_cyclone_clocksource(void) +{ + unsigned long base; /* saved value from CBAR */ + unsigned long offset; + u32 __iomem* volatile cyclone_timer; /* Cyclone MPMC0 register */ + u32 __iomem* reg; + int i; + + /* make sure we're on a summit box: */ + if (!use_cyclone) + return -ENODEV; + + printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n"); + + /* find base address: */ + offset = CYCLONE_CBAR_ADDR; + reg = ioremap_nocache(offset, sizeof(reg)); + if (!reg) { + printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n"); + return -ENODEV; + } + /* even on 64bit systems, this is only 32bits: */ + base = readl(reg); + if (!base) { + printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n"); + return -ENODEV; + } + iounmap(reg); + + /* setup PMCC: */ + offset = base + CYCLONE_PMCC_OFFSET; + reg = ioremap_nocache(offset, sizeof(reg)); + if (!reg) { + printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n"); + return -ENODEV; + } + writel(0x00000001,reg); + iounmap(reg); + + /* setup MPCS: */ + offset = base + CYCLONE_MPCS_OFFSET; + reg = ioremap_nocache(offset, sizeof(reg)); + if (!reg) { + printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n"); + return -ENODEV; + } + writel(0x00000001,reg); + iounmap(reg); + + /* map in cyclone_timer: */ + offset = base + CYCLONE_MPMC_OFFSET; + cyclone_timer = ioremap_nocache(offset, sizeof(u64)); + if (!cyclone_timer) { + printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n"); + return -ENODEV; + } + + /* quick test to make sure its ticking: */ + for (i = 0; i < 3; i++){ + u32 old = readl(cyclone_timer); + int stall = 100; + + while (stall--) + barrier(); + + if (readl(cyclone_timer) == old) { + printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n"); + iounmap(cyclone_timer); + cyclone_timer = NULL; + return -ENODEV; + } + } + cyclone_ptr = cyclone_timer; + + /* sort out mult/shift values: */ + clocksource_cyclone.shift = 22; + clocksource_cyclone.mult = clocksource_hz2mult(CYCLONE_TIMER_FREQ, + clocksource_cyclone.shift); + + register_clocksource(&clocksource_cyclone); + + return 0; +} + +module_init(init_cyclone_clocksource); Index: linux/drivers/clocksource/tsc-interp.c =================================================================== --- /dev/null +++ linux/drivers/clocksource/tsc-interp.c @@ -0,0 +1,111 @@ +/* + * TSC-Jiffies Interpolation clocksource + * Example interpolation clocksource. + * TODO: + * o per-cpu TSC offsets + */ +#include +#include +#include +#include +#include +#include +#include + +static unsigned long current_tsc_khz = 0; + +static DECLARE_RAW_SEQLOCK(tsc_interp_lock); +static unsigned long tsc_then; +static unsigned long jiffies_then; +struct timer_list tsc_interp_timer; + +static unsigned long mult, shift; + +#define NSEC_PER_JIFFY ((((unsigned long long)NSEC_PER_SEC)<<8)/ACTHZ) +#define SHIFT_VAL 22 + +static cycle_t read_tsc_interp(void); +static void tsc_interp_update_callback(void); + +static struct clocksource clocksource_tsc_interp = { + .name = "tsc-interp", + .rating = 150, + .type = CLOCKSOURCE_FUNCTION, + .read_fnct = read_tsc_interp, + .mask = (cycle_t)((1ULL<<32)-1), + .mult = 1<> shift); + else + ret += (cycle_t)(jiffs_now - jiffs_then)*NSEC_PER_JIFFY; + + return ret; +} + +static void tsc_interp_update_callback(void) +{ + /* only update if tsc_khz has changed: */ + if (current_tsc_khz != tsc_khz) { + current_tsc_khz = tsc_khz; + mult = clocksource_khz2mult(current_tsc_khz, shift); + } +} + +static int __init init_tsc_interp_clocksource(void) +{ + /* TSC initialization is done in arch/i386/kernel/tsc.c */ + if (cpu_has_tsc && tsc_khz) { + current_tsc_khz = tsc_khz; + shift = SHIFT_VAL; + mult = clocksource_khz2mult(current_tsc_khz, shift); + /* setup periodic soft-timer: */ + init_timer(&tsc_interp_timer); + tsc_interp_timer.function = tsc_interp_sync; + tsc_interp_timer.expires = jiffies; + add_timer(&tsc_interp_timer); + + register_clocksource(&clocksource_tsc_interp); + } + return 0; +} + +module_init(init_tsc_interp_clocksource); Index: linux/drivers/cpufreq/cpufreq.c =================================================================== --- linux.orig/drivers/cpufreq/cpufreq.c +++ linux/drivers/cpufreq/cpufreq.c @@ -605,7 +605,8 @@ static int cpufreq_add_dev (struct sys_d policy->cpu = cpu; policy->cpus = cpumask_of_cpu(cpu); - init_MUTEX_LOCKED(&policy->lock); + init_MUTEX(&policy->lock); + down(&policy->lock); init_completion(&policy->kobj_unregister); INIT_WORK(&policy->update, handle_update, (void *)(long)cpu); @@ -614,6 +615,7 @@ static int cpufreq_add_dev (struct sys_d */ ret = cpufreq_driver->init(policy); if (ret) { + up(&policy->lock); dprintk("initialization failed\n"); goto err_out; } @@ -626,8 +628,10 @@ static int cpufreq_add_dev (struct sys_d strlcpy(policy->kobj.name, "cpufreq", KOBJ_NAME_LEN); ret = kobject_register(&policy->kobj); - if (ret) + if (ret) { + up(&policy->lock); goto err_out_driver_exit; + } /* set up files for this cpu device */ drv_attr = cpufreq_driver->attr; Index: linux/drivers/i2c/busses/i2c-pxa.c =================================================================== --- linux.orig/drivers/i2c/busses/i2c-pxa.c +++ linux/drivers/i2c/busses/i2c-pxa.c @@ -925,7 +925,7 @@ static struct i2c_algorithm i2c_pxa_algo }; static struct pxa_i2c i2c_pxa = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(i2c_pxa.lock), .wait = __WAIT_QUEUE_HEAD_INITIALIZER(i2c_pxa.wait), .adap = { .owner = THIS_MODULE, Index: linux/drivers/i2c/busses/i2c-s3c2410.c =================================================================== --- linux.orig/drivers/i2c/busses/i2c-s3c2410.c +++ linux/drivers/i2c/busses/i2c-s3c2410.c @@ -573,7 +573,7 @@ static struct i2c_algorithm s3c24xx_i2c_ }; static struct s3c24xx_i2c s3c24xx_i2c = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(s3c24xx_i2c.lock), .wait = __WAIT_QUEUE_HEAD_INITIALIZER(s3c24xx_i2c.wait), .adap = { .name = "s3c2410-i2c", Index: linux/drivers/i2c/chips/tps65010.c =================================================================== --- linux.orig/drivers/i2c/chips/tps65010.c +++ linux/drivers/i2c/chips/tps65010.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -33,7 +34,6 @@ #include #include -#include #include #include Index: linux/drivers/ide/ide-floppy.c =================================================================== --- linux.orig/drivers/ide/ide-floppy.c +++ linux/drivers/ide/ide-floppy.c @@ -838,7 +838,7 @@ static ide_startstop_t idefloppy_pc_intr "transferred\n", pc->actually_transferred); clear_bit(PC_DMA_IN_PROGRESS, &pc->flags); - local_irq_enable(); + local_irq_enable_nort(); if (status.b.check || test_bit(PC_DMA_ERROR, &pc->flags)) { /* Error detected */ @@ -1670,9 +1670,9 @@ static int idefloppy_get_format_progress atapi_status_t status; unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); status.all = HWIF(drive)->INB(IDE_STATUS_REG); - local_irq_restore(flags); + local_irq_restore_nort(flags); progress_indication = !status.b.dsc ? 0 : 0x10000; } Index: linux/drivers/ide/ide-io.c =================================================================== --- linux.orig/drivers/ide/ide-io.c +++ linux/drivers/ide/ide-io.c @@ -636,7 +636,7 @@ static ide_startstop_t drive_cmd_intr (i u8 stat = hwif->INB(IDE_STATUS_REG); int retries = 10; - local_irq_enable(); + local_irq_enable_nort(); if ((stat & DRQ_STAT) && args && args[3]) { u8 io_32bit = drive->io_32bit; drive->io_32bit = 0; @@ -1107,7 +1107,7 @@ static void ide_do_request (ide_hwgroup_ ide_get_lock(ide_intr, hwgroup); /* caller must own ide_lock */ - BUG_ON(!irqs_disabled()); + BUG_ON_NONRT(!irqs_disabled()); while (!hwgroup->busy) { hwgroup->busy = 1; @@ -1219,8 +1219,7 @@ static void ide_do_request (ide_hwgroup_ */ if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq) disable_irq_nosync(hwif->irq); - spin_unlock(&ide_lock); - local_irq_enable(); + spin_unlock_irq(&ide_lock); /* allow other IRQs while we start this request */ startstop = start_request(drive, rq); spin_lock_irq(&ide_lock); @@ -1368,7 +1367,7 @@ void ide_timer_expiry (unsigned long dat #endif /* DISABLE_IRQ_NOSYNC */ /* local CPU only, * as if we were handling an interrupt */ - local_irq_disable(); + local_irq_disable_nort(); if (hwgroup->polling) { startstop = handler(drive); } else if (drive_is_ready(drive)) { @@ -1565,7 +1564,7 @@ irqreturn_t ide_intr (int irq, void *dev spin_unlock(&ide_lock); if (drive->unmask) - local_irq_enable(); + local_irq_enable_nort(); /* service this interrupt, may set handler for next interrupt */ startstop = handler(drive); spin_lock_irq(&ide_lock); Index: linux/drivers/ide/ide-iops.c =================================================================== --- linux.orig/drivers/ide/ide-iops.c +++ linux/drivers/ide/ide-iops.c @@ -246,10 +246,10 @@ static void ata_input_data(ide_drive_t * if (io_32bit) { if (io_32bit & 2) { unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); ata_vlb_sync(drive, IDE_NSECTOR_REG); hwif->INSL(IDE_DATA_REG, buffer, wcount); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else hwif->INSL(IDE_DATA_REG, buffer, wcount); } else { @@ -268,10 +268,10 @@ static void ata_output_data(ide_drive_t if (io_32bit) { if (io_32bit & 2) { unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); ata_vlb_sync(drive, IDE_NSECTOR_REG); hwif->OUTSL(IDE_DATA_REG, buffer, wcount); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else hwif->OUTSL(IDE_DATA_REG, buffer, wcount); } else { @@ -570,12 +570,12 @@ int ide_wait_stat (ide_startstop_t *star if (!(stat & BUSY_STAT)) break; - local_irq_restore(flags); + local_irq_restore_nort(flags); *startstop = ide_error(drive, "status timeout", stat); return 1; } } - local_irq_restore(flags); + local_irq_restore_nort(flags); } /* * Allow status to settle, then read it again. @@ -733,17 +733,15 @@ int ide_driveid_update (ide_drive_t *dri printk("%s: CHECK for good STATUS\n", drive->name); return 0; } - local_irq_save(flags); - SELECT_MASK(drive, 0); id = kmalloc(SECTOR_WORDS*4, GFP_ATOMIC); - if (!id) { - local_irq_restore(flags); + if (!id) return 0; - } + local_irq_save_nort(flags); + SELECT_MASK(drive, 0); ata_input_data(drive, id, SECTOR_WORDS); (void) hwif->INB(IDE_STATUS_REG); /* clear drive IRQ */ - local_irq_enable(); - local_irq_restore(flags); + local_irq_enable_nort(); + local_irq_restore_nort(flags); ide_fix_driveid(id); if (id) { drive->id->dma_ultra = id->dma_ultra; @@ -823,7 +821,7 @@ int ide_config_drive_speed (ide_drive_t if (time_after(jiffies, timeout)) break; } - local_irq_restore(flags); + local_irq_restore_nort(flags); } /* @@ -1249,6 +1247,7 @@ int ide_wait_not_busy(ide_hwif_t *hwif, */ if (stat == 0xff) return -ENODEV; + touch_softlockup_watchdog(); } return -EBUSY; } Index: linux/drivers/ide/ide-lib.c =================================================================== --- linux.orig/drivers/ide/ide-lib.c +++ linux/drivers/ide/ide-lib.c @@ -447,15 +447,16 @@ EXPORT_SYMBOL_GPL(ide_set_xfer_rate); static void ide_dump_opcode(ide_drive_t *drive) { + unsigned long flags; struct request *rq; u8 opcode = 0; int found = 0; - spin_lock(&ide_lock); + spin_lock_irqsave(&ide_lock, flags); rq = NULL; if (HWGROUP(drive)) rq = HWGROUP(drive)->rq; - spin_unlock(&ide_lock); + spin_unlock_irqrestore(&ide_lock, flags); if (!rq) return; if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) { @@ -483,10 +484,8 @@ static void ide_dump_opcode(ide_drive_t static u8 ide_dump_ata_status(ide_drive_t *drive, const char *msg, u8 stat) { ide_hwif_t *hwif = HWIF(drive); - unsigned long flags; u8 err = 0; - local_irq_set(flags); printk("%s: %s: status=0x%02x { ", drive->name, msg, stat); if (stat & BUSY_STAT) printk("Busy "); @@ -546,7 +545,7 @@ static u8 ide_dump_ata_status(ide_drive_ printk("\n"); } ide_dump_opcode(drive); - local_irq_restore(flags); + return err; } @@ -561,14 +560,12 @@ static u8 ide_dump_ata_status(ide_drive_ static u8 ide_dump_atapi_status(ide_drive_t *drive, const char *msg, u8 stat) { - unsigned long flags; - atapi_status_t status; atapi_error_t error; status.all = stat; error.all = 0; - local_irq_set(flags); + printk("%s: %s: status=0x%02x { ", drive->name, msg, stat); if (status.b.bsy) printk("Busy "); @@ -594,7 +591,7 @@ static u8 ide_dump_atapi_status(ide_driv printk("}\n"); } ide_dump_opcode(drive); - local_irq_restore(flags); + return error.all; } Index: linux/drivers/ide/ide-probe.c =================================================================== --- linux.orig/drivers/ide/ide-probe.c +++ linux/drivers/ide/ide-probe.c @@ -184,7 +184,7 @@ static inline void do_identify (ide_driv hwif->ata_input_data(drive, id, SECTOR_WORDS); drive->id_read = 1; - local_irq_enable(); + local_irq_enable_nort(); ide_fix_driveid(id); #if defined (CONFIG_SCSI_EATA_DMA) || defined (CONFIG_SCSI_EATA_PIO) || defined (CONFIG_SCSI_EATA) @@ -362,14 +362,14 @@ static int actual_try_to_identify (ide_d unsigned long flags; /* local CPU only; some systems need this */ - local_irq_save(flags); + local_irq_save_nort(flags); /* drive returned ID */ do_identify(drive, cmd); /* drive responded with ID */ rc = 0; /* clear drive IRQ */ (void) hwif->INB(IDE_STATUS_REG); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else { /* drive refused ID */ rc = 2; @@ -656,7 +656,7 @@ static void hwif_release_dev (struct dev { ide_hwif_t *hwif = container_of(dev, ide_hwif_t, gendev); - up(&hwif->gendev_rel_sem); + complete(&hwif->gendev_rel_comp); } static void hwif_register (ide_hwif_t *hwif) @@ -842,7 +842,7 @@ static void probe_hwif(ide_hwif_t *hwif) } while ((stat & BUSY_STAT) && time_after(timeout, jiffies)); } - local_irq_restore(flags); + local_irq_restore_nort(flags); /* * Use cached IRQ number. It might be (and is...) changed by probe * code above @@ -1328,7 +1328,7 @@ static void drive_release_dev (struct de drive->queue = NULL; spin_unlock_irq(&ide_lock); - up(&drive->gendev_rel_sem); + complete(&drive->gendev_rel_comp); } /* Index: linux/drivers/ide/ide-taskfile.c =================================================================== --- linux.orig/drivers/ide/ide-taskfile.c +++ linux/drivers/ide/ide-taskfile.c @@ -227,7 +227,7 @@ ide_startstop_t task_no_data_intr (ide_d ide_hwif_t *hwif = HWIF(drive); u8 stat; - local_irq_enable(); + local_irq_enable_nort(); if (!OK_STAT(stat = hwif->INB(IDE_STATUS_REG),READY_STAT,BAD_STAT)) { return ide_error(drive, "task_no_data_intr", stat); /* calls ide_end_drive_cmd */ @@ -279,7 +279,7 @@ static void ide_pio_sector(ide_drive_t * offset %= PAGE_SIZE; #ifdef CONFIG_HIGHMEM - local_irq_save(flags); + local_irq_save_nort(flags); #endif buf = kmap_atomic(page, KM_BIO_SRC_IRQ) + offset; @@ -299,7 +299,7 @@ static void ide_pio_sector(ide_drive_t * kunmap_atomic(buf, KM_BIO_SRC_IRQ); #ifdef CONFIG_HIGHMEM - local_irq_restore(flags); + local_irq_restore_nort(flags); #endif } @@ -457,7 +457,7 @@ ide_startstop_t pre_task_out_intr (ide_d } if (!drive->unmask) - local_irq_disable(); + local_irq_disable_nort(); ide_set_handler(drive, &task_out_intr, WAIT_WORSTCASE, NULL); ide_pio_datablock(drive, rq, 1); Index: linux/drivers/ide/ide.c =================================================================== --- linux.orig/drivers/ide/ide.c +++ linux/drivers/ide/ide.c @@ -222,7 +222,7 @@ static void init_hwif_data(ide_hwif_t *h hwif->mwdma_mask = 0x80; /* disable all mwdma */ hwif->swdma_mask = 0x80; /* disable all swdma */ - sema_init(&hwif->gendev_rel_sem, 0); + init_completion(&hwif->gendev_rel_comp); default_hwif_iops(hwif); default_hwif_transport(hwif); @@ -245,7 +245,7 @@ static void init_hwif_data(ide_hwif_t *h drive->is_flash = 0; drive->vdma = 0; INIT_LIST_HEAD(&drive->list); - sema_init(&drive->gendev_rel_sem, 0); + init_completion(&drive->gendev_rel_comp); } } @@ -602,7 +602,7 @@ void ide_unregister(unsigned int index) } spin_unlock_irq(&ide_lock); device_unregister(&drive->gendev); - down(&drive->gendev_rel_sem); + wait_for_completion(&drive->gendev_rel_comp); spin_lock_irq(&ide_lock); } hwif->present = 0; @@ -662,7 +662,7 @@ void ide_unregister(unsigned int index) /* More messed up locking ... */ spin_unlock_irq(&ide_lock); device_unregister(&hwif->gendev); - down(&hwif->gendev_rel_sem); + wait_for_completion(&hwif->gendev_rel_comp); /* * Remove us from the kernel's knowledge @@ -1049,15 +1049,13 @@ int ide_spin_wait_hwgroup (ide_drive_t * spin_lock_irq(&ide_lock); while (hwgroup->busy) { - unsigned long lflags; spin_unlock_irq(&ide_lock); - local_irq_set(lflags); + if (time_after(jiffies, timeout)) { - local_irq_restore(lflags); printk(KERN_ERR "%s: channel busy\n", drive->name); return -EBUSY; } - local_irq_restore(lflags); + spin_lock_irq(&ide_lock); } return 0; Index: linux/drivers/ide/pci/alim15x3.c =================================================================== --- linux.orig/drivers/ide/pci/alim15x3.c +++ linux/drivers/ide/pci/alim15x3.c @@ -296,7 +296,6 @@ static void ali15x3_tune_drive (ide_driv struct pci_dev *dev = hwif->pci_dev; int s_time, a_time, c_time; u8 s_clc, a_clc, r_clc; - unsigned long flags; int bus_speed = system_bus_clock(); int port = hwif->channel ? 0x5c : 0x58; int portFIFO = hwif->channel ? 0x55 : 0x54; @@ -323,7 +322,6 @@ static void ali15x3_tune_drive (ide_driv if (r_clc >= 16) r_clc = 0; } - local_irq_save(flags); /* * PIO mode => ATA FIFO on, ATAPI FIFO off @@ -345,7 +343,6 @@ static void ali15x3_tune_drive (ide_driv pci_write_config_byte(dev, port, s_clc); pci_write_config_byte(dev, port+drive->select.b.unit+2, (a_clc << 4) | r_clc); - local_irq_restore(flags); /* * setup active rec @@ -585,7 +582,6 @@ static int ali15x3_dma_setup(ide_drive_t static unsigned int __devinit init_chipset_ali15x3 (struct pci_dev *dev, const char *name) { - unsigned long flags; u8 tmpbyte; struct pci_dev *north = pci_find_slot(0, PCI_DEVFN(0,0)); @@ -601,7 +597,6 @@ static unsigned int __devinit init_chips } #endif /* defined(DISPLAY_ALI_TIMINGS) && defined(CONFIG_PROC_FS) */ - local_irq_save(flags); if (m5229_revision < 0xC2) { /* @@ -614,7 +609,6 @@ static unsigned int __devinit init_chips * clear bit 7 */ pci_write_config_byte(dev, 0x4b, tmpbyte & 0x7F); - local_irq_restore(flags); return 0; } @@ -639,7 +633,6 @@ static unsigned int __devinit init_chips * 0:0.0 so if we didn't find one we know what is cooking. */ if (north && north->vendor != PCI_VENDOR_ID_AL) { - local_irq_restore(flags); return 0; } @@ -662,7 +655,6 @@ static unsigned int __devinit init_chips pci_write_config_byte(isa_dev, 0x79, tmpbyte | 0x02); } } - local_irq_restore(flags); return 0; } @@ -683,10 +675,8 @@ static unsigned int __devinit ata66_ali1 unsigned int ata66 = 0; u8 cable_80_pin[2] = { 0, 0 }; - unsigned long flags; u8 tmpbyte; - local_irq_save(flags); if (m5229_revision >= 0xC2) { /* @@ -736,7 +726,6 @@ static unsigned int __devinit ata66_ali1 pci_write_config_byte(dev, 0x53, tmpbyte); - local_irq_restore(flags); return(ata66); } Index: linux/drivers/ide/pci/hpt366.c =================================================================== --- linux.orig/drivers/ide/pci/hpt366.c +++ linux/drivers/ide/pci/hpt366.c @@ -1481,7 +1481,6 @@ static void __devinit init_dma_hpt366(id u8 dma_new = 0, dma_old = 0; u8 primary = hwif->channel ? 0x4b : 0x43; u8 secondary = hwif->channel ? 0x4f : 0x47; - unsigned long flags; if (!dmabase) return; @@ -1493,8 +1492,6 @@ static void __devinit init_dma_hpt366(id dma_old = hwif->INB(dmabase+2); - local_irq_save(flags); - dma_new = dma_old; pci_read_config_byte(hwif->pci_dev, primary, &masterdma); pci_read_config_byte(hwif->pci_dev, secondary, &slavedma); @@ -1504,8 +1501,6 @@ static void __devinit init_dma_hpt366(id if (dma_new != dma_old) hwif->OUTB(dma_new, dmabase+2); - local_irq_restore(flags); - ide_setup_dma(hwif, dmabase, 8); } Index: linux/drivers/ide/setup-pci.c =================================================================== --- linux.orig/drivers/ide/setup-pci.c +++ linux/drivers/ide/setup-pci.c @@ -665,8 +665,11 @@ static int do_ide_setup_pci_device(struc { static ata_index_t ata_index = { .b = { .low = 0xff, .high = 0xff } }; int tried_config = 0; + unsigned long flags; int pciirq, ret; + spin_lock_irqsave(&ide_lock, flags); + ret = ide_setup_pci_controller(dev, d, noisy, &tried_config); if (ret < 0) goto out; @@ -721,6 +724,8 @@ static int do_ide_setup_pci_device(struc *index = ata_index; ide_pci_setup_ports(dev, d, pciirq, index); out: + spin_unlock_irqrestore(&ide_lock, flags); + return ret; } Index: linux/drivers/ieee1394/ieee1394_types.h =================================================================== --- linux.orig/drivers/ieee1394/ieee1394_types.h +++ linux/drivers/ieee1394/ieee1394_types.h @@ -19,7 +19,7 @@ struct hpsb_tlabel_pool { spinlock_t lock; u8 next; u32 allocations; - struct semaphore count; + struct compat_semaphore count; }; #define HPSB_TPOOL_INIT(_tp) \ Index: linux/drivers/ieee1394/nodemgr.c =================================================================== --- linux.orig/drivers/ieee1394/nodemgr.c +++ linux/drivers/ieee1394/nodemgr.c @@ -114,7 +114,7 @@ struct host_info { struct hpsb_host *host; struct list_head list; struct completion exited; - struct semaphore reset_sem; + struct compat_semaphore reset_sem; int pid; char daemon_name[15]; int kill_me; Index: linux/drivers/ieee1394/raw1394-private.h =================================================================== --- linux.orig/drivers/ieee1394/raw1394-private.h +++ linux/drivers/ieee1394/raw1394-private.h @@ -29,7 +29,7 @@ struct file_info { struct list_head req_pending; struct list_head req_complete; - struct semaphore complete_sem; + struct compat_semaphore complete_sem; spinlock_t reqlists_lock; wait_queue_head_t poll_wait_complete; Index: linux/drivers/input/gameport/gameport.c =================================================================== --- linux.orig/drivers/input/gameport/gameport.c +++ linux/drivers/input/gameport/gameport.c @@ -21,6 +21,7 @@ #include #include #include +#include /*#include */ @@ -100,12 +101,12 @@ static int gameport_measure_speed(struct tx = 1 << 30; for(i = 0; i < 50; i++) { - local_irq_save(flags); + local_irq_save_nort(flags); GET_TIME(t1); for (t = 0; t < 50; t++) gameport_read(gameport); GET_TIME(t2); GET_TIME(t3); - local_irq_restore(flags); + local_irq_restore_nort(flags); udelay(i * 10); if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t; } @@ -124,11 +125,11 @@ static int gameport_measure_speed(struct tx = 1 << 30; for(i = 0; i < 50; i++) { - local_irq_save(flags); + local_irq_save_nort(flags); rdtscl(t1); for (t = 0; t < 50; t++) gameport_read(gameport); rdtscl(t2); - local_irq_restore(flags); + local_irq_restore_nort(flags); udelay(i * 10); if (t2 - t1 < tx) tx = t2 - t1; } Index: linux/drivers/input/serio/sa1111ps2.c =================================================================== --- linux.orig/drivers/input/serio/sa1111ps2.c +++ linux/drivers/input/serio/sa1111ps2.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include Index: linux/drivers/media/dvb/dvb-core/dvb_frontend.c =================================================================== --- linux.orig/drivers/media/dvb/dvb-core/dvb_frontend.c +++ linux/drivers/media/dvb/dvb-core/dvb_frontend.c @@ -97,7 +97,7 @@ struct dvb_frontend_private { struct dvb_device *dvbdev; struct dvb_frontend_parameters parameters; struct dvb_fe_events events; - struct semaphore sem; + struct compat_semaphore sem; struct list_head list_head; wait_queue_head_t wait_queue; pid_t thread_pid; Index: linux/drivers/media/dvb/dvb-core/dvb_frontend.h =================================================================== --- linux.orig/drivers/media/dvb/dvb-core/dvb_frontend.h +++ linux/drivers/media/dvb/dvb-core/dvb_frontend.h @@ -86,7 +86,7 @@ struct dvb_fe_events { int eventr; int overflow; wait_queue_head_t wait_queue; - struct semaphore sem; + struct compat_semaphore sem; }; struct dvb_frontend { Index: linux/drivers/media/video/zr36120_i2c.c =================================================================== --- linux.orig/drivers/media/video/zr36120_i2c.c +++ linux/drivers/media/video/zr36120_i2c.c @@ -120,7 +120,7 @@ struct i2c_bus zoran_i2c_bus_template = I2C_BUSID_ZORAN, NULL, - SPIN_LOCK_UNLOCKED, + SPIN_LOCK_UNLOCKED(zoran_i2c_bus_template.lock), attach_inform, detach_inform, Index: linux/drivers/message/i2o/exec-osm.c =================================================================== --- linux.orig/drivers/message/i2o/exec-osm.c +++ linux/drivers/message/i2o/exec-osm.c @@ -204,7 +204,7 @@ static int i2o_msg_post_wait_complete(st { struct i2o_exec_wait *wait, *tmp; unsigned long flags; - static spinlock_t lock = SPIN_LOCK_UNLOCKED; + static DEFINE_SPINLOCK(lock); int rc = 1; /* Index: linux/drivers/misc/ibmasm/module.c =================================================================== --- linux.orig/drivers/misc/ibmasm/module.c +++ linux/drivers/misc/ibmasm/module.c @@ -85,7 +85,7 @@ static int __devinit ibmasm_init_one(str } memset(sp, 0, sizeof(struct service_processor)); - sp->lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&sp->lock); INIT_LIST_HEAD(&sp->command_queue); pci_set_drvdata(pdev, (void *)sp); Index: linux/drivers/net/3c527.c =================================================================== --- linux.orig/drivers/net/3c527.c +++ linux/drivers/net/3c527.c @@ -182,7 +182,7 @@ struct mc32_local u16 rx_ring_tail; /* index to rx de-queue end */ - struct semaphore cmd_mutex; /* Serialises issuing of execute commands */ + struct compat_semaphore cmd_mutex; /* Serialises issuing of execute commands */ struct completion execution_cmd; /* Card has completed an execute command */ struct completion xceiver_cmd; /* Card has completed a tx or rx command */ }; Index: linux/drivers/net/3c59x.c =================================================================== --- linux.orig/drivers/net/3c59x.c +++ linux/drivers/net/3c59x.c @@ -956,9 +956,9 @@ static void poll_vortex(struct net_devic struct vortex_private *vp = netdev_priv(dev); unsigned long flags; local_save_flags(flags); - local_irq_disable(); + local_irq_disable_nort(); (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev,NULL); - local_irq_restore(flags); + local_irq_restore_nort(flags); } #endif @@ -2004,13 +2004,17 @@ static void vortex_tx_timeout(struct net /* * Block interrupts because vortex_interrupt does a bare spin_lock() */ +#ifndef CONFIG_PREEMPT_RT unsigned long flags; local_irq_save(flags); +#endif if (vp->full_bus_master_tx) boomerang_interrupt(dev->irq, dev, NULL); else vortex_interrupt(dev->irq, dev, NULL); +#ifndef CONFIG_PREEMPT_RT local_irq_restore(flags); +#endif } } Index: linux/drivers/net/8139too.c =================================================================== --- linux.orig/drivers/net/8139too.c +++ linux/drivers/net/8139too.c @@ -2128,10 +2128,10 @@ static int rtl8139_poll(struct net_devic * Order is important since data can get interrupted * again when we think we are done. */ - local_irq_disable(); + raw_local_irq_disable(); RTL_W16_F(IntrMask, rtl8139_intr_mask); __netif_rx_complete(dev); - local_irq_enable(); + raw_local_irq_enable(); } spin_unlock(&tp->rx_lock); Index: linux/drivers/net/e1000/e1000_main.c =================================================================== --- linux.orig/drivers/net/e1000/e1000_main.c +++ linux/drivers/net/e1000/e1000_main.c @@ -2262,10 +2262,10 @@ e1000_xmit_frame(struct sk_buff *skb, st if(adapter->pcix_82544) count += nr_frags; - local_irq_save(flags); + local_irq_save_nort(flags); if (!spin_trylock(&adapter->tx_lock)) { /* Collision - tell upper layer to requeue */ - local_irq_restore(flags); + local_irq_restore_nort(flags); return NETDEV_TX_LOCKED; } if(adapter->hw.tx_pkt_filtering && (adapter->hw.mac_type == e1000_82573) ) Index: linux/drivers/net/hamradio/6pack.c =================================================================== --- linux.orig/drivers/net/hamradio/6pack.c +++ linux/drivers/net/hamradio/6pack.c @@ -124,7 +124,7 @@ struct sixpack { struct timer_list tx_t; struct timer_list resync_t; atomic_t refcnt; - struct semaphore dead_sem; + struct compat_semaphore dead_sem; spinlock_t lock; }; Index: linux/drivers/net/hamradio/mkiss.c =================================================================== --- linux.orig/drivers/net/hamradio/mkiss.c +++ linux/drivers/net/hamradio/mkiss.c @@ -85,7 +85,7 @@ struct mkiss { #define CRC_MODE_SMACK 2 atomic_t refcnt; - struct semaphore dead_sem; + struct compat_semaphore dead_sem; }; /*---------------------------------------------------------------------------*/ @@ -622,7 +622,7 @@ static void ax_setup(struct net_device * * best way to fix this is to use a rwlock in the tty struct, but for now we * use a single global rwlock for all ttys in ppp line discipline. */ -static rwlock_t disc_data_lock = RW_LOCK_UNLOCKED; +static DEFINE_RWLOCK(disc_data_lock); static struct mkiss *mkiss_get(struct tty_struct *tty) { Index: linux/drivers/net/netconsole.c =================================================================== --- linux.orig/drivers/net/netconsole.c +++ linux/drivers/net/netconsole.c @@ -75,10 +75,19 @@ static void write_msg(struct console *co return; local_irq_save(flags); +#ifdef CONFIG_PREEMPT_RT + /* + * A bit hairy. Netconsole uses mutexes (indirectly) and + * thus must have interrupts enabled: + */ + local_irq_enable(); +#endif for(left = len; left; ) { frag = min(left, MAX_PRINT_CHUNK); + WARN_ON_RT(irqs_disabled()); netpoll_send_udp(&np, msg, frag); + WARN_ON_RT(irqs_disabled()); msg += frag; left -= frag; } Index: linux/drivers/net/ns83820.c =================================================================== --- linux.orig/drivers/net/ns83820.c +++ linux/drivers/net/ns83820.c @@ -1014,8 +1014,6 @@ static void do_tx_done(struct net_device struct ns83820 *dev = PRIV(ndev); u32 cmdsts, tx_done_idx, *desc; - spin_lock_irq(&dev->tx_lock); - dprintk("do_tx_done(%p)\n", ndev); tx_done_idx = dev->tx_done_idx; desc = dev->tx_descs + (tx_done_idx * DESC_SIZE); @@ -1071,7 +1069,6 @@ static void do_tx_done(struct net_device netif_start_queue(ndev); netif_wake_queue(ndev); } - spin_unlock_irq(&dev->tx_lock); } static void ns83820_cleanup_tx(struct ns83820 *dev) @@ -1372,7 +1369,9 @@ static void ns83820_do_isr(struct net_de * work has accumulated */ if ((ISR_TXDESC | ISR_TXIDLE | ISR_TXOK | ISR_TXERR) & isr) { + spin_lock_irq(&dev->tx_lock); do_tx_done(ndev); + spin_unlock_irq(&dev->tx_lock); /* Disable TxOk if there are no outstanding tx packets. */ @@ -1457,7 +1456,7 @@ static void ns83820_tx_timeout(struct ne u32 tx_done_idx, *desc; unsigned long flags; - local_irq_save(flags); + spin_lock_irqsave(&dev->tx_lock, flags); tx_done_idx = dev->tx_done_idx; desc = dev->tx_descs + (tx_done_idx * DESC_SIZE); @@ -1484,7 +1483,7 @@ static void ns83820_tx_timeout(struct ne ndev->name, tx_done_idx, dev->tx_free_idx, le32_to_cpu(desc[DESC_CMDSTS])); - local_irq_restore(flags); + spin_unlock_irqrestore(&dev->tx_lock, flags); } static void ns83820_tx_watch(unsigned long data) Index: linux/drivers/net/plip.c =================================================================== --- linux.orig/drivers/net/plip.c +++ linux/drivers/net/plip.c @@ -229,7 +229,10 @@ struct net_local { struct hh_cache *hh); spinlock_t lock; atomic_t kill_timer; - struct semaphore killed_timer_sem; + /* + * PREEMPT_RT: this isnt a mutex, it should be struct completion. + */ + struct compat_semaphore killed_timer_sem; }; static inline void enable_parport_interrupts (struct net_device *dev) Index: linux/drivers/net/ppp_async.c =================================================================== --- linux.orig/drivers/net/ppp_async.c +++ linux/drivers/net/ppp_async.c @@ -65,7 +65,7 @@ struct asyncppp { struct tasklet_struct tsk; atomic_t refcnt; - struct semaphore dead_sem; + struct compat_semaphore dead_sem; struct ppp_channel chan; /* interface to generic ppp layer */ unsigned char obuf[OBUFSIZE]; }; Index: linux/drivers/net/ppp_synctty.c =================================================================== --- linux.orig/drivers/net/ppp_synctty.c +++ linux/drivers/net/ppp_synctty.c @@ -70,7 +70,7 @@ struct syncppp { struct tasklet_struct tsk; atomic_t refcnt; - struct semaphore dead_sem; + struct compat_semaphore dead_sem; struct ppp_channel chan; /* interface to generic ppp layer */ }; Index: linux/drivers/net/smc91x.c =================================================================== --- linux.orig/drivers/net/smc91x.c +++ linux/drivers/net/smc91x.c @@ -74,6 +74,7 @@ static const char version[] = #include #include #include +#include #include #include #include @@ -1998,7 +1999,7 @@ static int __init smc_probe(struct net_d if (retval) goto err_out; - set_irq_type(dev->irq, SMC_IRQ_TRIGGER_TYPE); + SMC_SET_IRQ_TYPE(dev->irq, SMC_IRQ_TRIGGER_TYPE); #ifdef SMC_USE_PXA_DMA { Index: linux/drivers/net/smc91x.h =================================================================== --- linux.orig/drivers/net/smc91x.h +++ linux/drivers/net/smc91x.h @@ -90,7 +90,7 @@ __l--; \ } \ } while (0) -#define set_irq_type(irq, type) +#define SMC_SET_IRQ_TYPE(irq, type) #elif defined(CONFIG_SA1100_PLEB) /* We can only do 16-bit reads and writes in the static memory space. */ @@ -109,7 +109,7 @@ #define SMC_outw(v, a, r) outw(v, (a) + (r)) #define SMC_outsw(a, r, p, l) outsw((a) + (r), p, l) -#define set_irq_type(irq, type) do {} while (0) +#define SMC_SET_IRQ_TYPE(irq, type) do {} while (0) #elif defined(CONFIG_SA1100_ASSABET) @@ -209,7 +209,7 @@ SMC_outw(u16 val, void __iomem *ioaddr, #define SMC_insw(a, r, p, l) insw((a) + (r) - 0xa0000000, p, l) #define SMC_outsw(a, r, p, l) outsw((a) + (r) - 0xa0000000, p, l) -#define set_irq_type(irq, type) do {} while(0) +#define SMC_SET_IRQ_TYPE(irq, type) do {} while(0) #elif defined(CONFIG_ISA) @@ -237,7 +237,7 @@ SMC_outw(u16 val, void __iomem *ioaddr, #define SMC_insw(a, r, p, l) insw((a) + (r) - 0xa0000000, p, l) #define SMC_outsw(a, r, p, l) outsw((a) + (r) - 0xa0000000, p, l) -#define set_irq_type(irq, type) do {} while(0) +#define SMC_SET_IRQ_TYPE(irq, type) do {} while(0) #define RPC_LSA_DEFAULT RPC_LED_TX_RX #define RPC_LSB_DEFAULT RPC_LED_100_10 @@ -310,6 +310,10 @@ static inline void SMC_outsw (unsigned l #endif +#ifndef SMC_SET_IRQ_TYPE +#define SMC_SET_IRQ_TYPE set_irq_type +#endif + #ifndef SMC_IRQ_TRIGGER_TYPE #define SMC_IRQ_TRIGGER_TYPE IRQT_RISING #endif Index: linux/drivers/net/tulip/tulip_core.c =================================================================== --- linux.orig/drivers/net/tulip/tulip_core.c +++ linux/drivers/net/tulip/tulip_core.c @@ -1811,6 +1811,7 @@ static void __devexit tulip_remove_one ( pci_iounmap(pdev, tp->base_addr); free_netdev (dev); pci_release_regions (pdev); + pci_disable_device (pdev); pci_set_drvdata (pdev, NULL); /* pci_power_off (pdev, -1); */ Index: linux/drivers/oprofile/buffer_sync.c =================================================================== --- linux.orig/drivers/oprofile/buffer_sync.c +++ linux/drivers/oprofile/buffer_sync.c @@ -43,13 +43,16 @@ static void process_task_mortuary(void); * list for processing. Only after two full buffer syncs * does the task eventually get freed, because by then * we are sure we will not reference it again. + * Can be invoked from softirq via RCU callback due to + * call_rcu() of the task struct, hence the _irqsave. */ static int task_free_notify(struct notifier_block * self, unsigned long val, void * data) { + unsigned long flags; struct task_struct * task = data; - spin_lock(&task_mortuary); + spin_lock_irqsave(&task_mortuary, flags); list_add(&task->tasks, &dying_tasks); - spin_unlock(&task_mortuary); + spin_unlock_irqrestore(&task_mortuary, flags); return NOTIFY_OK; } @@ -431,25 +434,22 @@ static void increment_tail(struct oprofi */ static void process_task_mortuary(void) { - struct list_head * pos; - struct list_head * pos2; + unsigned long flags; + LIST_HEAD(local_dead_tasks); struct task_struct * task; + struct task_struct * ttask; - spin_lock(&task_mortuary); + spin_lock_irqsave(&task_mortuary, flags); - list_for_each_safe(pos, pos2, &dead_tasks) { - task = list_entry(pos, struct task_struct, tasks); - list_del(&task->tasks); - free_task(task); - } + list_splice_init(&dead_tasks, &local_dead_tasks); + list_splice_init(&dying_tasks, &dead_tasks); - list_for_each_safe(pos, pos2, &dying_tasks) { - task = list_entry(pos, struct task_struct, tasks); + spin_unlock_irqrestore(&task_mortuary, flags); + + list_for_each_entry_safe(task, ttask, &local_dead_tasks, tasks) { list_del(&task->tasks); - list_add_tail(&task->tasks, &dead_tasks); + free_task(task); } - - spin_unlock(&task_mortuary); } Index: linux/drivers/oprofile/oprofilefs.c =================================================================== --- linux.orig/drivers/oprofile/oprofilefs.c +++ linux/drivers/oprofile/oprofilefs.c @@ -21,7 +21,7 @@ #define OPROFILEFS_MAGIC 0x6f70726f -DEFINE_SPINLOCK(oprofilefs_lock); +DEFINE_RAW_SPINLOCK(oprofilefs_lock); static struct inode * oprofilefs_get_inode(struct super_block * sb, int mode) { Index: linux/drivers/pci/hotplug/cpci_hotplug_core.c =================================================================== --- linux.orig/drivers/pci/hotplug/cpci_hotplug_core.c +++ linux/drivers/pci/hotplug/cpci_hotplug_core.c @@ -60,8 +60,8 @@ static int slots; static atomic_t extracting; int cpci_debug; static struct cpci_hp_controller *controller; -static struct semaphore event_semaphore; /* mutex for process loop (up if something to process) */ -static struct semaphore thread_exit; /* guard ensure thread has exited before calling it quits */ +static struct compat_semaphore event_semaphore; /* mutex for process loop (up if something to process) */ +static struct compat_semaphore thread_exit; /* guard ensure thread has exited before calling it quits */ static int thread_finished = 1; static int enable_slot(struct hotplug_slot *slot); Index: linux/drivers/pci/hotplug/cpqphp_ctrl.c =================================================================== --- linux.orig/drivers/pci/hotplug/cpqphp_ctrl.c +++ linux/drivers/pci/hotplug/cpqphp_ctrl.c @@ -45,8 +45,8 @@ static int configure_new_function(struct u8 behind_bridge, struct resource_lists *resources); static void interrupt_event_handler(struct controller *ctrl); -static struct semaphore event_semaphore; /* mutex for process loop (up if something to process) */ -static struct semaphore event_exit; /* guard ensure thread has exited before calling it quits */ +static struct compat_semaphore event_semaphore; /* mutex for process loop (up if something to process) */ +static struct compat_semaphore event_exit; /* guard ensure thread has exited before calling it quits */ static int event_finished; static unsigned long pushbutton_pending; /* = 0 */ Index: linux/drivers/pci/hotplug/ibmphp_hpc.c =================================================================== --- linux.orig/drivers/pci/hotplug/ibmphp_hpc.c +++ linux/drivers/pci/hotplug/ibmphp_hpc.c @@ -104,7 +104,7 @@ static int tid_poll; static struct semaphore sem_hpcaccess; // lock access to HPC static struct semaphore semOperations; // lock all operations and // access to data structures -static struct semaphore sem_exit; // make sure polling thread goes away +static struct compat_semaphore sem_exit; // make sure polling thread goes away //---------------------------------------------------------------------------- // local function prototypes //---------------------------------------------------------------------------- Index: linux/drivers/pci/hotplug/pciehp_ctrl.c =================================================================== --- linux.orig/drivers/pci/hotplug/pciehp_ctrl.c +++ linux/drivers/pci/hotplug/pciehp_ctrl.c @@ -48,8 +48,8 @@ static int configure_new_function( struc u8 behind_bridge, struct resource_lists *resources, u8 bridge_bus, u8 bridge_dev); static void interrupt_event_handler(struct controller *ctrl); -static struct semaphore event_semaphore; /* mutex for process loop (up if something to process) */ -static struct semaphore event_exit; /* guard ensure thread has exited before calling it quits */ +static struct compat_semaphore event_semaphore; /* mutex for process loop (up if something to process) */ +static struct compat_semaphore event_exit; /* guard ensure thread has exited before calling it quits */ static int event_finished; static unsigned long pushbutton_pending; /* = 0 */ static unsigned long surprise_rm_pending; /* = 0 */ Index: linux/drivers/pci/hotplug/shpchp_ctrl.c =================================================================== --- linux.orig/drivers/pci/hotplug/shpchp_ctrl.c +++ linux/drivers/pci/hotplug/shpchp_ctrl.c @@ -47,8 +47,8 @@ static int configure_new_function( struc u8 behind_bridge, struct resource_lists *resources, u8 bridge_bus, u8 bridge_dev); static void interrupt_event_handler(struct controller *ctrl); -static struct semaphore event_semaphore; /* mutex for process loop (up if something to process) */ -static struct semaphore event_exit; /* guard ensure thread has exited before calling it quits */ +static struct compat_semaphore event_semaphore; /* mutex for process loop (up if something to process) */ +static struct compat_semaphore event_exit; /* guard ensure thread has exited before calling it quits */ static int event_finished; static unsigned long pushbutton_pending; /* = 0 */ Index: linux/drivers/pcmcia/soc_common.c =================================================================== --- linux.orig/drivers/pcmcia/soc_common.c +++ linux/drivers/pcmcia/soc_common.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include Index: linux/drivers/s390/char/vmlogrdr.c =================================================================== --- linux.orig/drivers/s390/char/vmlogrdr.c +++ linux/drivers/s390/char/vmlogrdr.c @@ -145,7 +145,7 @@ static struct vmlogrdr_priv_t sys_ser[] .recording_name = "EREP", .minor_num = 0, .buffer_free = 1, - .priv_lock = SPIN_LOCK_UNLOCKED, + .priv_lock = SPIN_LOCK_UNLOCKED(sys_ser[0].priv_lock), .autorecording = 1, .autopurge = 1, }, @@ -154,7 +154,7 @@ static struct vmlogrdr_priv_t sys_ser[] .recording_name = "ACCOUNT", .minor_num = 1, .buffer_free = 1, - .priv_lock = SPIN_LOCK_UNLOCKED, + .priv_lock = SPIN_LOCK_UNLOCKED(sys_ser[1].priv_lock), .autorecording = 1, .autopurge = 1, }, @@ -163,7 +163,7 @@ static struct vmlogrdr_priv_t sys_ser[] .recording_name = "SYMPTOM", .minor_num = 2, .buffer_free = 1, - .priv_lock = SPIN_LOCK_UNLOCKED, + .priv_lock = SPIN_LOCK_UNLOCKED(sys_ser[2].priv_lock), .autorecording = 1, .autopurge = 1, } Index: linux/drivers/s390/cio/cmf.c =================================================================== --- linux.orig/drivers/s390/cio/cmf.c +++ linux/drivers/s390/cio/cmf.c @@ -297,7 +297,7 @@ struct cmb_area { }; static struct cmb_area cmb_area = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cmb_area.lock), .list = LIST_HEAD_INIT(cmb_area.list), .num_channels = 1024, }; Index: linux/drivers/sbus/char/cpwatchdog.c =================================================================== --- linux.orig/drivers/sbus/char/cpwatchdog.c +++ linux/drivers/sbus/char/cpwatchdog.c @@ -155,7 +155,7 @@ struct wd_device { }; static struct wd_device wd_dev = { - 0, SPIN_LOCK_UNLOCKED, 0, 0, 0, 0, + 0, SPIN_LOCK_UNLOCKED(wd_dev.lock), 0, 0, 0, 0, }; static struct timer_list wd_timer; Index: linux/drivers/scsi/aacraid/aacraid.h =================================================================== --- linux.orig/drivers/scsi/aacraid/aacraid.h +++ linux/drivers/scsi/aacraid/aacraid.h @@ -731,7 +731,7 @@ struct aac_fib_context { u32 unique; // unique value representing this context ulong jiffies; // used for cleanup - dmb changed to ulong struct list_head next; // used to link context's into a linked list - struct semaphore wait_sem; // this is used to wait for the next fib to arrive. + struct compat_semaphore wait_sem; // this is used to wait for the next fib to arrive. int wait; // Set to true when thread is in WaitForSingleObject unsigned long count; // total number of FIBs on FibList struct list_head fib_list; // this holds fibs and their attachd hw_fibs @@ -800,7 +800,7 @@ struct fib { * This is the event the sendfib routine will wait on if the * caller did not pass one and this is synch io. */ - struct semaphore event_wait; + struct compat_semaphore event_wait; spinlock_t event_lock; u32 done; /* gets set to 1 when fib is complete */ Index: linux/drivers/scsi/aic7xxx/aic79xx_osm.h =================================================================== --- linux.orig/drivers/scsi/aic7xxx/aic79xx_osm.h +++ linux/drivers/scsi/aic7xxx/aic79xx_osm.h @@ -391,7 +391,7 @@ struct ahd_platform_data { spinlock_t spin_lock; u_int qfrozen; struct timer_list reset_timer; - struct semaphore eh_sem; + struct compat_semaphore eh_sem; struct Scsi_Host *host; /* pointer to scsi host */ #define AHD_LINUX_NOIRQ ((uint32_t)~0) uint32_t irq; /* IRQ for this adapter */ Index: linux/drivers/scsi/aic7xxx/aic7xxx_osm.h =================================================================== --- linux.orig/drivers/scsi/aic7xxx/aic7xxx_osm.h +++ linux/drivers/scsi/aic7xxx/aic7xxx_osm.h @@ -395,7 +395,7 @@ struct ahc_platform_data { spinlock_t spin_lock; u_int qfrozen; struct timer_list reset_timer; - struct semaphore eh_sem; + struct compat_semaphore eh_sem; struct Scsi_Host *host; /* pointer to scsi host */ #define AHC_LINUX_NOIRQ ((uint32_t)~0) uint32_t irq; /* IRQ for this adapter */ Index: linux/drivers/scsi/ncr53c8xx.c =================================================================== --- linux.orig/drivers/scsi/ncr53c8xx.c +++ linux/drivers/scsi/ncr53c8xx.c @@ -3481,8 +3481,8 @@ static int ncr_queue_command (struct ncb **---------------------------------------------------- */ if (np->settle_time && cmd->timeout_per_command >= HZ) { - u_long tlimit = ktime_get(cmd->timeout_per_command - HZ); - if (ktime_dif(np->settle_time, tlimit) > 0) + u_long tlimit = jiffies + cmd->timeout_per_command - HZ; + if (time_after(np->settle_time, tlimit)) np->settle_time = tlimit; } @@ -3516,7 +3516,7 @@ static int ncr_queue_command (struct ncb ** Force ordered tag if necessary to avoid timeouts ** and to preserve interactivity. */ - if (lp && ktime_exp(lp->tags_stime)) { + if (lp && time_after(jiffies, lp->tags_stime)) { if (lp->tags_smap) { order = M_ORDERED_TAG; if ((DEBUG_FLAGS & DEBUG_TAGS)||bootverbose>2){ @@ -3524,7 +3524,7 @@ static int ncr_queue_command (struct ncb "ordered tag forced.\n"); } } - lp->tags_stime = ktime_get(3*HZ); + lp->tags_stime = jiffies + 3*HZ; lp->tags_smap = lp->tags_umap; } @@ -3669,7 +3669,7 @@ static int ncr_queue_command (struct ncb /* ** select */ - cp->phys.select.sel_id = sdev->id; + cp->phys.select.sel_id = sdev_id(sdev); cp->phys.select.sel_scntl3 = tp->wval; cp->phys.select.sel_sxfer = tp->sval; /* @@ -3792,7 +3792,7 @@ static int ncr_reset_scsi_bus(struct ncb u32 term; int retv = 0; - np->settle_time = ktime_get(settle_delay * HZ); + np->settle_time = jiffies + settle_delay * HZ; if (bootverbose > 1) printk("%s: resetting, " @@ -4820,7 +4820,7 @@ static void ncr_set_sync_wide_status (st */ for (cp = np->ccb; cp; cp = cp->link_ccb) { if (!cp->cmd) continue; - if (cp->cmd->device->id != target) continue; + if (scmd_id(cp->cmd) != target) continue; #if 0 cp->sync_status = tp->sval; cp->wide_status = tp->wval; @@ -4844,7 +4844,7 @@ static void ncr_setsync (struct ncb *np, u_char target = INB (nc_sdid) & 0x0f; u_char idiv; - BUG_ON(target != (cmd->device->id & 0xf)); + BUG_ON(target != (scmd_id(cmd) & 0xf)); tp = &np->target[target]; @@ -4902,7 +4902,7 @@ static void ncr_setwide (struct ncb *np, u_char scntl3; u_char sxfer; - BUG_ON(target != (cmd->device->id & 0xf)); + BUG_ON(target != (scmd_id(cmd) & 0xf)); tp = &np->target[target]; tp->widedone = wide+1; @@ -5044,7 +5044,7 @@ static void ncr_setup_tags (struct ncb * static void ncr_timeout (struct ncb *np) { - u_long thistime = ktime_get(0); + u_long thistime = jiffies; /* ** If release process in progress, let's go @@ -5057,7 +5057,7 @@ static void ncr_timeout (struct ncb *np) return; } - np->timer.expires = ktime_get(SCSI_NCR_TIMER_INTERVAL); + np->timer.expires = jiffies + SCSI_NCR_TIMER_INTERVAL; add_timer(&np->timer); /* @@ -5336,8 +5336,8 @@ void ncr_exception (struct ncb *np) **========================================================= */ - if (ktime_exp(np->regtime)) { - np->regtime = ktime_get(10*HZ); + if (time_after(jiffies, np->regtime)) { + np->regtime = jiffies + 10*HZ; for (i = 0; iregdump); i++) ((char*)&np->regdump)[i] = INB_OFF(i); np->regdump.nc_dstat = dstat; @@ -5453,7 +5453,7 @@ static int ncr_int_sbmc (struct ncb *np) ** Suspend command processing for 1 second and ** reinitialize all except the chip. */ - np->settle_time = ktime_get(1*HZ); + np->settle_time = jiffies + HZ; ncr_init (np, 0, bootverbose ? "scsi mode change" : NULL, HS_RESET); return 1; } @@ -6923,7 +6923,7 @@ static struct lcb *ncr_setup_lcb (struct for (i = 0 ; i < MAX_TAGS ; i++) lp->cb_tags[i] = i; lp->maxnxs = MAX_TAGS; - lp->tags_stime = ktime_get(3*HZ); + lp->tags_stime = jiffies + 3*HZ; ncr_setup_tags (np, sdev); } Index: linux/drivers/scsi/qla2xxx/qla_def.h =================================================================== --- linux.orig/drivers/scsi/qla2xxx/qla_def.h +++ linux/drivers/scsi/qla2xxx/qla_def.h @@ -2416,7 +2416,7 @@ typedef struct scsi_qla_host { spinlock_t mbx_reg_lock; /* Mbx Cmd Register Lock */ struct semaphore mbx_cmd_sem; /* Serialialize mbx access */ - struct semaphore mbx_intr_sem; /* Used for completion notification */ + struct compat_semaphore mbx_intr_sem; /* Used for completion notification */ uint32_t mbx_flags; #define MBX_IN_PROGRESS BIT_0 Index: linux/drivers/scsi/qla2xxx/qla_os.c =================================================================== --- linux.orig/drivers/scsi/qla2xxx/qla_os.c +++ linux/drivers/scsi/qla2xxx/qla_os.c @@ -2123,12 +2123,13 @@ qla2x00_free_sp_pool( scsi_qla_host_t *h static int qla2x00_do_dpc(void *data) { - DECLARE_MUTEX_LOCKED(sem); + DECLARE_MUTEX(sem); scsi_qla_host_t *ha; fc_port_t *fcport; uint8_t status; uint16_t next_loopid; + down(&sem); ha = (scsi_qla_host_t *)data; lock_kernel(); Index: linux/drivers/scsi/scsi.c =================================================================== --- linux.orig/drivers/scsi/scsi.c +++ linux/drivers/scsi/scsi.c @@ -772,10 +772,10 @@ void __scsi_done(struct scsi_cmnd *cmd) * It is a per-CPU queue, so we just disable local interrupts * and need no spinlock. */ - local_irq_save(flags); + raw_local_irq_save(flags); list_add_tail(&cmd->eh_entry, &__get_cpu_var(scsi_done_q)); raise_softirq_irqoff(SCSI_SOFTIRQ); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /** @@ -792,9 +792,9 @@ static void scsi_softirq(struct softirq_ int disposition; LIST_HEAD(local_q); - local_irq_disable(); + raw_local_irq_disable(); list_splice_init(&__get_cpu_var(scsi_done_q), &local_q); - local_irq_enable(); + raw_local_irq_enable(); while (!list_empty(&local_q)) { struct scsi_cmnd *cmd = list_entry(local_q.next, @@ -1286,11 +1286,11 @@ static int scsi_cpu_notify(struct notifi switch(action) { case CPU_DEAD: /* Drain scsi_done_q. */ - local_irq_disable(); + raw_local_irq_disable(); list_splice_init(&per_cpu(scsi_done_q, cpu), &__get_cpu_var(scsi_done_q)); raise_softirq_irqoff(SCSI_SOFTIRQ); - local_irq_enable(); + raw_local_irq_enable(); break; default: break; Index: linux/drivers/scsi/scsi_error.c =================================================================== --- linux.orig/drivers/scsi/scsi_error.c +++ linux/drivers/scsi/scsi_error.c @@ -1647,6 +1647,12 @@ int scsi_error_handler(void *data) __set_current_state(TASK_RUNNING); + /* + * There's a good chance that the loop will exit in the + * TASK_INTERRUPTIBLE state. + */ + __set_current_state(TASK_RUNNING); + SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d" " exiting\n",shost->host_no)); Index: linux/drivers/scsi/sym53c8xx_defs.h =================================================================== --- linux.orig/drivers/scsi/sym53c8xx_defs.h +++ linux/drivers/scsi/sym53c8xx_defs.h @@ -281,19 +281,6 @@ #endif /* -** These simple macros limit expression involving -** kernel time values (jiffies) to some that have -** chance not to be too much incorrect. :-) -*/ -#define ktime_get(o) (jiffies + (u_long) o) -#define ktime_exp(b) ((long)(jiffies) - (long)(b) >= 0) -#define ktime_dif(a, b) ((long)(a) - (long)(b)) -/* These ones are not used in this driver */ -#define ktime_add(a, o) ((a) + (u_long)(o)) -#define ktime_sub(a, o) ((a) - (u_long)(o)) - - -/* * IO functions definition for big/little endian CPU support. * For now, the NCR is only supported in little endian addressing mode, */ Index: linux/drivers/serial/cpm_uart/cpm_uart_core.c =================================================================== --- linux.orig/drivers/serial/cpm_uart/cpm_uart_core.c +++ linux/drivers/serial/cpm_uart/cpm_uart_core.c @@ -909,7 +909,7 @@ struct uart_cpm_port cpm_uart_ports[UART .irq = SMC1_IRQ, .ops = &cpm_uart_pops, .iotype = SERIAL_IO_MEM, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cpm_uart_ports[UART_SMC1].port.lock), }, .flags = FLAG_SMC, .tx_nrfifos = TX_NUM_FIFO, @@ -923,7 +923,7 @@ struct uart_cpm_port cpm_uart_ports[UART .irq = SMC2_IRQ, .ops = &cpm_uart_pops, .iotype = SERIAL_IO_MEM, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cpm_uart_ports[UART_SMC2].port.lock), }, .flags = FLAG_SMC, .tx_nrfifos = TX_NUM_FIFO, @@ -940,7 +940,7 @@ struct uart_cpm_port cpm_uart_ports[UART .irq = SCC1_IRQ, .ops = &cpm_uart_pops, .iotype = SERIAL_IO_MEM, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cpm_uart_ports[UART_SCC1].port.lock), }, .tx_nrfifos = TX_NUM_FIFO, .tx_fifosize = TX_BUF_SIZE, @@ -954,7 +954,7 @@ struct uart_cpm_port cpm_uart_ports[UART .irq = SCC2_IRQ, .ops = &cpm_uart_pops, .iotype = SERIAL_IO_MEM, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cpm_uart_ports[UART_SCC2].port.lock), }, .tx_nrfifos = TX_NUM_FIFO, .tx_fifosize = TX_BUF_SIZE, @@ -968,7 +968,7 @@ struct uart_cpm_port cpm_uart_ports[UART .irq = SCC3_IRQ, .ops = &cpm_uart_pops, .iotype = SERIAL_IO_MEM, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cpm_uart_ports[UART_SCC3].port.lock), }, .tx_nrfifos = TX_NUM_FIFO, .tx_fifosize = TX_BUF_SIZE, @@ -982,7 +982,7 @@ struct uart_cpm_port cpm_uart_ports[UART .irq = SCC4_IRQ, .ops = &cpm_uart_pops, .iotype = SERIAL_IO_MEM, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cpm_uart_ports[UART_SCC4].port.lock), }, .tx_nrfifos = TX_NUM_FIFO, .tx_fifosize = TX_BUF_SIZE, Index: linux/drivers/serial/s3c2410.c =================================================================== --- linux.orig/drivers/serial/s3c2410.c +++ linux/drivers/serial/s3c2410.c @@ -966,7 +966,7 @@ static struct uart_driver s3c24xx_uart_d static struct s3c24xx_uart_port s3c24xx_serial_ports[NR_PORTS] = { [0] = { .port = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(s3c24xx_serial_ports[0].port.lock), .iotype = UPIO_MEM, .irq = IRQ_S3CUART_RX0, .uartclk = 0, @@ -978,7 +978,7 @@ static struct s3c24xx_uart_port s3c24xx_ }, [1] = { .port = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(s3c24xx_serial_ports[1].port.lock), .iotype = UPIO_MEM, .irq = IRQ_S3CUART_RX1, .uartclk = 0, @@ -992,7 +992,7 @@ static struct s3c24xx_uart_port s3c24xx_ [2] = { .port = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(s3c24xx_serial_ports[2].port.lock), .iotype = UPIO_MEM, .irq = IRQ_S3CUART_RX2, .uartclk = 0, Index: linux/drivers/usb/core/devio.c =================================================================== --- linux.orig/drivers/usb/core/devio.c +++ linux/drivers/usb/core/devio.c @@ -284,10 +284,11 @@ static void async_completed(struct urb * struct async *as = (struct async *)urb->context; struct dev_state *ps = as->ps; struct siginfo sinfo; + unsigned long flags; - spin_lock(&ps->lock); - list_move_tail(&as->asynclist, &ps->async_completed); - spin_unlock(&ps->lock); + spin_lock_irqsave(&ps->lock, flags); + list_move_tail(&as->asynclist, &ps->async_completed); + spin_unlock_irqrestore(&ps->lock, flags); if (as->signr) { sinfo.si_signo = as->signr; sinfo.si_errno = as->urb->status; Index: linux/drivers/usb/core/hcd.c =================================================================== --- linux.orig/drivers/usb/core/hcd.c +++ linux/drivers/usb/core/hcd.c @@ -506,13 +506,11 @@ error: } /* any errors get returned through the urb completion */ - local_irq_save (flags); - spin_lock (&urb->lock); + spin_lock_irqsave(&urb->lock, flags); if (urb->status == -EINPROGRESS) urb->status = status; - spin_unlock (&urb->lock); + spin_unlock_irqrestore(&urb->lock, flags); usb_hcd_giveback_urb (hcd, urb, NULL); - local_irq_restore (flags); return 0; } @@ -540,8 +538,7 @@ void usb_hcd_poll_rh_status(struct usb_h if (length > 0) { /* try to complete the status urb */ - local_irq_save (flags); - spin_lock(&hcd_root_hub_lock); + spin_lock_irqsave(&hcd_root_hub_lock, flags); urb = hcd->status_urb; if (urb) { spin_lock(&urb->lock); @@ -557,14 +554,13 @@ void usb_hcd_poll_rh_status(struct usb_h spin_unlock(&urb->lock); } else length = 0; - spin_unlock(&hcd_root_hub_lock); + spin_unlock_irqrestore(&hcd_root_hub_lock, flags); /* local irqs are always blocked in completions */ if (length > 0) usb_hcd_giveback_urb (hcd, urb, NULL); else hcd->poll_pending = 1; - local_irq_restore (flags); } /* The USB 2.0 spec says 256 ms. This is close enough and won't @@ -647,17 +643,15 @@ static int usb_rh_urb_dequeue (struct us } else { /* Status URB */ if (!hcd->uses_new_polling) del_timer_sync (&hcd->rh_timer); - local_irq_disable (); - spin_lock (&hcd_root_hub_lock); + spin_lock_irq(&hcd_root_hub_lock); if (urb == hcd->status_urb) { hcd->status_urb = NULL; urb->hcpriv = NULL; } else urb = NULL; /* wasn't fully queued */ - spin_unlock (&hcd_root_hub_lock); + spin_unlock_irq(&hcd_root_hub_lock); if (urb) usb_hcd_giveback_urb (hcd, urb, NULL); - local_irq_enable (); } return 0; @@ -1367,15 +1361,13 @@ hcd_endpoint_disable (struct usb_device WARN_ON (!HC_IS_RUNNING (hcd->state) && hcd->state != HC_STATE_HALT && udev->state != USB_STATE_NOTATTACHED); - local_irq_disable (); - /* FIXME move most of this into message.c as part of its * endpoint disable logic */ /* ep is already gone from udev->ep_{in,out}[]; no more submits */ rescan: - spin_lock (&hcd_data_lock); + spin_lock_irq(&hcd_data_lock); list_for_each_entry (urb, &ep->urb_list, urb_list) { int tmp; @@ -1388,13 +1380,13 @@ rescan: if (urb->status != -EINPROGRESS) continue; usb_get_urb (urb); - spin_unlock (&hcd_data_lock); + spin_unlock_irq(&hcd_data_lock); - spin_lock (&urb->lock); + spin_lock_irq(&urb->lock); tmp = urb->status; if (tmp == -EINPROGRESS) urb->status = -ESHUTDOWN; - spin_unlock (&urb->lock); + spin_unlock_irq(&urb->lock); /* kick hcd unless it's already returning this */ if (tmp == -EINPROGRESS) { @@ -1417,8 +1409,7 @@ rescan: /* list contents may have changed */ goto rescan; } - spin_unlock (&hcd_data_lock); - local_irq_enable (); + spin_unlock_irq(&hcd_data_lock); /* synchronize with the hardware, so old configuration state * clears out immediately (and will be freed). Index: linux/drivers/usb/core/message.c =================================================================== --- linux.orig/drivers/usb/core/message.c +++ linux/drivers/usb/core/message.c @@ -224,8 +224,9 @@ static void sg_clean (struct usb_sg_requ static void sg_complete (struct urb *urb, struct pt_regs *regs) { struct usb_sg_request *io = (struct usb_sg_request *) urb->context; + unsigned long flags; - spin_lock (&io->lock); + spin_lock_irqsave (&io->lock, flags); /* In 2.5 we require hcds' endpoint queues not to progress after fault * reports, until the completion callback (this!) returns. That lets @@ -259,7 +260,7 @@ static void sg_complete (struct urb *urb * unlink pending urbs so they won't rx/tx bad data. * careful: unlink can sometimes be synchronous... */ - spin_unlock (&io->lock); + spin_unlock_irqrestore (&io->lock, flags); for (i = 0, found = 0; i < io->entries; i++) { if (!io->urbs [i] || !io->urbs [i]->dev) continue; @@ -274,7 +275,7 @@ static void sg_complete (struct urb *urb } else if (urb == io->urbs [i]) found = 1; } - spin_lock (&io->lock); + spin_lock_irqsave (&io->lock, flags); } urb->dev = NULL; @@ -284,7 +285,7 @@ static void sg_complete (struct urb *urb if (!io->count) complete (&io->complete); - spin_unlock (&io->lock); + spin_unlock_irqrestore (&io->lock, flags); } Index: linux/drivers/usb/net/usbnet.c =================================================================== --- linux.orig/drivers/usb/net/usbnet.c +++ linux/drivers/usb/net/usbnet.c @@ -822,6 +822,8 @@ static void tx_complete (struct urb *urb urb->dev = NULL; entry->state = tx_done; + spin_lock_rt(&dev->txq.lock); + spin_unlock_rt(&dev->txq.lock); defer_bh(dev, skb, &dev->txq); } Index: linux/drivers/usb/storage/usb.c =================================================================== --- linux.orig/drivers/usb/storage/usb.c +++ linux/drivers/usb/storage/usb.c @@ -319,6 +319,7 @@ static int usb_stor_control_thread(void if (test_bit(US_FLIDX_DISCONNECTING, &us->flags)) { US_DEBUGP("-- exiting\n"); up(&(us->dev_semaphore)); + up(&us->sema); break; } Index: linux/drivers/usb/storage/usb.h =================================================================== --- linux.orig/drivers/usb/storage/usb.h +++ linux/drivers/usb/storage/usb.h @@ -172,7 +172,7 @@ struct us_data { dma_addr_t iobuf_dma; /* mutual exclusion and synchronization structures */ - struct semaphore sema; /* to sleep thread on */ + struct compat_semaphore sema; /* to sleep thread on */ struct completion notify; /* thread begin/end */ wait_queue_head_t delay_wait; /* wait during scan, reset */ Index: linux/drivers/video/backlight/corgi_bl.c =================================================================== --- linux.orig/drivers/video/backlight/corgi_bl.c +++ linux/drivers/video/backlight/corgi_bl.c @@ -28,7 +28,7 @@ static int corgibl_powermode = FB_BLANK_ static int current_intensity = 0; static int corgibl_limit = 0; static void (*corgibl_mach_set_intensity)(int intensity); -static spinlock_t bl_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(bl_lock); static struct backlight_properties corgibl_data; static void corgibl_send_intensity(int intensity) Index: linux/drivers/video/console/fbcon.c =================================================================== --- linux.orig/drivers/video/console/fbcon.c +++ linux/drivers/video/console/fbcon.c @@ -1067,7 +1067,6 @@ static void fbcon_clear(struct vc_data * { struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; struct fbcon_ops *ops = info->fbcon_par; - struct display *p = &fb_display[vc->vc_num]; u_int y_break; @@ -1096,10 +1095,11 @@ static void fbcon_putcs(struct vc_data * struct display *p = &fb_display[vc->vc_num]; struct fbcon_ops *ops = info->fbcon_par; - if (!fbcon_is_inactive(vc, info)) + if (!fbcon_is_inactive(vc, info)) { ops->putcs(vc, info, s, count, real_y(p, ypos), xpos, get_color(vc, info, scr_readw(s), 1), get_color(vc, info, scr_readw(s), 0)); + } } static void fbcon_putc(struct vc_data *vc, int c, int ypos, int xpos) @@ -2846,6 +2846,7 @@ static const struct consw fb_con = { .con_screen_pos = fbcon_screen_pos, .con_getxy = fbcon_getxy, .con_resize = fbcon_resize, + .con_preemptible = 1, }; static struct notifier_block fbcon_event_notifier = { Index: linux/drivers/video/console/vgacon.c =================================================================== --- linux.orig/drivers/video/console/vgacon.c +++ linux/drivers/video/console/vgacon.c @@ -53,7 +53,7 @@ #include