tlds
Transactional Operations for Linked Data Structures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
platform.hpp
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2011
3  * University of Rochester Department of Computer Science
4  * and
5  * Lehigh University Department of Computer Science and Engineering
6  *
7  * License: Modified BSD
8  * Please see the file LICENSE.RSTM for licensing information
9  */
10 
11 /**
12  * This file hides differences that are based on compiler, CPU, and OS. In
13  * particular, we define:
14  *
15  * 1) atomic operations (cas, swap, etc, atomic 64-bit load/store)
16  * 2) access to the tick counter
17  * 3) clean definitions of custom compiler constructs (__builtin_expect,
18  * alignment attributes, etc)
19  * 4) scheduler syscalls (sleep, yield)
20  * 5) a high-resolution timer
21  */
22 
23 #ifndef PLATFORM_HPP__
24 #define PLATFORM_HPP__
25 
26 #include <stm/config.h>
27 #include <stdint.h>
28 #include <limits.h>
29 
30 /**
31  * We set up a bunch of macros that we use to insulate the rest of the code
32  * from potentially platform-dependent behavior.
33  *
34  * NB: This is partially keyed off of __LP64__ which isn't universally defined
35  * for -m64 code, but it works on the platforms that we support.
36  *
37  * NB2: We don't really support non-gcc compatible compilers, so there isn't
38  * any compiler logic in these ifdefs. If we begin to support the Windows
39  * platform these will need to be more complicated
40  */
41 
42 /**
43  * We begin by hard-coding some macros that may become platform-dependent in
44  * the future.
45  */
46 #define CACHELINE_BYTES 64
47 #define NORETURN __attribute__((noreturn))
48 #define NOINLINE __attribute__((noinline))
49 #define ALWAYS_INLINE __attribute__((always_inline))
50 #define USED __attribute__((used))
51 #define REGPARM(N) __attribute__((regparm(N)))
52 
53 /**
54  * Pick up the BITS define from the __LP64__ token.
55  */
56 #if defined(__LP64__)
57 #define STM_BITS_64
58 #else
59 #define STM_BITS_32
60 #endif
61 
62 /**
63  * GCC's fastcall attribute causes a warning on x86_64, so we don't use it
64  * there (it's not necessary in any case because of the native calling
65  * convention.
66  */
67 #if defined(__LP64__) && defined(STM_CPU_X86)
68 #define GCC_FASTCALL
69 #else
70 #define GCC_FASTCALL __attribute__((fastcall))
71 #endif
72 
73 /**
74  * We rely on the configured parameters here (no cross platform building yet)
75  */
76 #if !defined(STM_CC_GCC) || !defined(STM_CPU_SPARC)
77 #define TM_INLINE ALWAYS_INLINE
78 #else
79 #define TM_INLINE
80 #endif
81 
82 #if defined(STM_CPU_X86) && !defined(STM_CC_SUN)
83 #define TM_FASTCALL REGPARM(3)
84 #else
85 #define TM_FASTCALL
86 #endif
87 
88 #define TM_ALIGN(N) __attribute__((aligned(N)))
89 
90 /**
91  * The first task for this file is to declare atomic operations (cas, swap,
92  * etc) and custom assembly codes, such as compiler fences, memory barriers,
93  * and no-op instructions. This code depends on the compiler and processor.
94  */
95 
96 /**
97  * icc is nominally an x86/x86-64 compiler that supports sync builtins,
98  * however the stm prototype doesn't support operations on pointer types,
99  * which we perform all the time. This header performs the fixes by #defining
100  * the __sync builtin symbols as partial templates.
101  */
102 #if defined(STM_CPU_X86) && defined(__ICC)
103 # include "icc-sync.hpp"
104 #endif
105 
106 /**
107  * Here is the declaration of atomic operations when we're on an x86 (32bit or
108  * 64bit) and using the GNU compiler collection. This assumes that the
109  * compiler is recent enough that it supports the builtin __sync operations
110  */
111 #if defined(STM_CPU_X86) && !defined(STM_CC_SUN)
112 
113 #define CFENCE __asm__ volatile ("":::"memory")
114 #define WBR __sync_synchronize()
115 
116 #define cas32(p, o, n) __sync_val_compare_and_swap(p, o, n)
117 #define cas64(p, o, n) __sync_val_compare_and_swap(p, o, n)
118 #define casptr(p, o, n) __sync_val_compare_and_swap(p, o, n)
119 #define bcas32(p, o, n) __sync_bool_compare_and_swap(p, o, n)
120 #define bcas64(p, o, n) __sync_bool_compare_and_swap(p, o, n)
121 #define bcasptr(p, o, n) __sync_bool_compare_and_swap(p, o, n)
122 
123 #define tas(p) __sync_lock_test_and_set(p, 1)
124 
125 #define nop() __asm__ volatile("nop")
126 
127 // NB: GCC implements test_and_set via swap
128 #define atomicswap8(p, v) __sync_lock_test_and_set(p, v)
129 #define atomicswap32(p, v) __sync_lock_test_and_set(p, v)
130 #define atomicswap64(p, v) __sync_lock_test_and_set(p, v)
131 #define atomicswapptr(p, v) __sync_lock_test_and_set(p, v)
132 
133 #define fai32(p) __sync_fetch_and_add(p, 1)
134 #define fai64(p) __sync_fetch_and_add(p, 1)
135 #define faiptr(p) __sync_fetch_and_add(p, 1)
136 #define faa32(p, a) __sync_fetch_and_add(p, a)
137 #define faa64(p, a) __sync_fetch_and_add(p, a)
138 #define faaptr(p, a) __sync_fetch_and_add(p, a)
139 
140 #endif
141 
142 /**
143  * Here is the declaration of atomic operations when we're on a sparc (32bit)
144  * and using the GNU compiler collection. For some reason, gcc 4.3.1 __sync_*
145  * operations can sometimes cause odd compiler crashes, so we provide our own
146  * assembly and use it instead.
147  *
148  * NB: gcc doesn't provide a builtin equivalent to the SPARC swap instruction,
149  * and thus we have to implement atomicswap ourselves.
150  */
151 #if defined(STM_CPU_SPARC) && defined (STM_CC_GCC)
152 #define CFENCE __asm__ volatile ("":::"memory")
153 #define WBR __sync_synchronize()
154 
155 /**
156  * 32-bit CAS via SPARC CAS instruction
157  */
158 inline uint32_t internal_cas32(volatile uint32_t* ptr, uint32_t old,
159  uint32_t _new)
160 {
161  __asm__ volatile("cas [%2], %3, %0" // instruction
162  : "=&r"(_new) // output
163  : "0"(_new), "r"(ptr), "r"(old) // inputs
164  : "memory"); // side effects
165  return _new;
166 }
167 
168 /**
169  * 64-bit CAS via SPARC CASX instruction.
170  *
171  * NB: This code only works correctly with -m64 specified, as otherwise GCC
172  * refuses to use a 64-bit register to pass a value.
173  */
174 inline uint64_t internal_cas64(volatile uint64_t* ptr, uint64_t old,
175  uint64_t _new)
176 {
177  __asm__ volatile("casx [%2], %3, %0" // instruction
178  : "=&r"(_new) // output
179  : "0"(_new), "r"(ptr), "r"(old) // inputs
180  : "memory"); // side effects
181  return _new;
182 }
183 
184 #define cas32(p, o, n) internal_cas32((uint32_t*)(p), (uint32_t)(o), (uint32_t)(n))
185 #ifdef STM_BITS_64
186 #define cas64(p, o, n) internal_cas64((uint64_t*)(p), (uint64_t)(o), (uint64_t)(n))
187 #define casptr(p, o, n) cas64(p, o, n)
188 #else
189 #define cas64(p, o, n) __sync_val_compare_and_swap(p, o, n)
190 #define casptr(p, o, n) cas32(p, o, n)
191 #endif
192 
193 #define bcas32(p, o, n) ({ o == cas32(p, (o), (n)); })
194 #define bcas64(p, o, n) ({ o == cas64(p, (o), (n)); })
195 #define bcasptr(p, o, n) ({ ((void*)o) == (void*)casptr(p, (o), (n)); })
196 
197 #define tas(p) __sync_lock_test_and_set(p, 1)
198 
199 #define nop() __asm__ volatile("nop")
200 
201 // NB: SPARC swap instruction only is 32/64-bit... there is no atomicswap8
202 #ifdef STM_BITS_32
203 #define atomicswapptr(p, v) \
204  ({ \
205  __typeof((v)) v1 = v; \
206  __typeof((p)) p1 = p; \
207  __asm__ volatile("swap [%2], %0;" \
208  :"=r"(v1) :"0"(v1), "r"(p1):"memory"); \
209  v1; \
210  })
211 #else
212 #define atomicswapptr(p, v) \
213  ({ \
214  __typeof((v)) tmp; \
215  while (1) { \
216  tmp = *(p); \
217  if (bcasptr((p), tmp, (v))) break; \
218  } \
219  tmp; \
220  })
221 #endif
222 
223 #define faa32(p,a) \
224  ({ __typeof(*p) _f, _e; \
225  do { _e = _f; } \
226  while ((_f = (__typeof(*p))cas32(p, _e, (_e+a))) != _e); \
227  _f; \
228  })
229 #define fai32(p) faa32(p,1)
230 #define faiptr(p) __sync_fetch_and_add(p, 1)
231 #define faa64(p, a) __sync_fetch_and_add(p, a)
232 #define faaptr(p, a) __sync_fetch_and_add(p, a)
233 #endif
234 
235 /**
236  * Here is the declaration of atomic operations when we're using Sun Studio
237  * 12.1. These work for x86 and SPARC, at 32-bit or 64-bit
238  */
239 #if (defined(STM_CPU_X86) || defined(STM_CPU_SPARC)) && defined(STM_CC_SUN)
240 #include <atomic.h>
241 #define CFENCE __asm__ volatile("":::"memory")
242 #define WBR membar_enter()
243 
244 #define cas32(p, o, n) atomic_cas_32(p, (o), (n))
245 #define cas64(p, o, n) atomic_cas_64(p, (o), (n))
246 #define casptr(p, o, n) atomic_cas_ptr(p, (void*)(o), (void*)(n))
247 #define bcas32(p, o, n) ({ o == cas32(p, (o), (n)); })
248 #define bcas64(p, o, n) ({ o == cas64(p, (o), (n)); })
249 #define bcasptr(p, o, n) ({ ((void*)o) == casptr(p, (o), (n)); })
250 
251 #define tas(p) atomic_set_long_excl((volatile unsigned long*)p, 0)
252 
253 #define nop() __asm__ volatile("nop")
254 
255 #define atomicswap8(p, v) atomic_swap_8(p, v)
256 #define atomicswap32(p, v) atomic_swap_32(p, v)
257 #define atomicswap64(p, v) atomic_swap_64(p, v)
258 #define atomicswapptr(p, v) atomic_swap_ptr(p, (void*)(v))
259 
260 #define fai32(p) (atomic_inc_32_nv(p)-1)
261 #define fai64(p) __sync_fetch_and_add(p, 1)
262 #define faiptr(p) (atomic_inc_ulong_nv((volatile unsigned long*)p)-1)
263 #define faa32(p, a) atomic_add_32(p, a)
264 #define faa64(p, a) atomic_add_64(p, a)
265 #define faaptr(p, a) atomic_add_long((volatile unsigned long*)p, a)
266 
267 // NB: must shut off 'builtin_expect' support
268 #define __builtin_expect(a, b) a
269 #endif
270 
271 /**
272  * Now we must deal with the ability to load/store 64-bit values safely. In
273  * 32-bit mode, this is potentially a problem, so we handle 64-bit atomic
274  * load/store via the mvx() function. mvx() depends on the bit level and the
275  * CPU
276  */
277 
278 #if defined(STM_BITS_64)
279 /**
280  * 64-bit code is easy... 64-bit accesses are atomic
281  */
282 inline void mvx(const volatile uint64_t* src, volatile uint64_t* dest)
283 {
284  *dest = *src;
285 }
286 #endif
287 
288 #if defined(STM_BITS_32) && defined(STM_CPU_X86)
289 /**
290  * 32-bit on x86... cast to double
291  */
292 inline void mvx(const volatile uint64_t* src, volatile uint64_t* dest)
293 {
294  const volatile double* srcd = (const volatile double*)src;
295  volatile double* destd = (volatile double*)dest;
296  *destd = *srcd;
297 }
298 #endif
299 
300 #if defined(STM_BITS_32) && defined(STM_CPU_SPARC)
301 /**
302  * 32-bit on SPARC... use ldx/stx
303  */
304 inline void mvx(const volatile uint64_t* from, volatile uint64_t* to)
305 {
306  __asm__ volatile("ldx [%0], %%o4;"
307  "stx %%o4, [%1];"
308  :: "r"(from), "r"(to)
309  : "o4", "memory");
310 }
311 #endif
312 
313 /**
314  * The next task for this file is to establish access to a high-resolution CPU
315  * timer. The code depends on the CPU and bit level. It is identical for
316  * 32/64-bit x86. For sparc, the code depends on if we are 32-bit or 64-bit.
317  */
318 #if defined(STM_CPU_X86)
319 /**
320  * On x86, we use the rdtsc instruction
321  */
322 inline uint64_t tick()
323 {
324  uint32_t tmp[2];
325  __asm__ ("rdtsc" : "=a" (tmp[1]), "=d" (tmp[0]) : "c" (0x10) );
326  return (((uint64_t)tmp[0]) << 32) | tmp[1];
327 }
328 #endif
329 
330 #if defined(STM_CPU_SPARC) && defined(STM_BITS_64)
331 /**
332  * 64-bit SPARC: read the tick register into a regular (64-bit) register
333  *
334  * This code is based on http://blogs.sun.com/d/entry/reading_the_tick_counter and
335  * http://sourceware.org/binutils/docs-2.20/as/Sparc_002dRegs.html
336  */
337 inline uint64_t tick()
338 {
339  uint64_t val;
340  __asm__ volatile("rd %%tick, %[val]" : [val] "=r" (val) : :);
341  return val;
342 }
343 #endif
344 
345 #if defined(STM_CPU_SPARC) && defined(STM_BITS_32)
346 /**
347  * 32-bit SPARC: read the tick register into two 32-bit registers, then
348  * manually combine the result
349  *
350  * This code is based on
351  * http://blogs.sun.com/d/entry/reading_the_tick_counter
352  * and
353  * http://sourceware.org/binutils/docs-2.20/as/Sparc_002dRegs.html
354  */
355 inline uint64_t tick()
356 {
357  uint32_t lo = 0, hi = 0;
358  __asm__ volatile("rd %%tick, %%o2;"
359  "srlx %%o2, 32, %[high];"
360  "sra %%o2, 0, %[low];"
361  : [high] "=r"(hi),
362  [low] "=r"(lo)
363  :
364  : "%o2" );
365  uint64_t ans = hi;
366  ans = ans << 32;
367  ans |= lo;
368  return ans;
369 }
370 #endif
371 
372 /**
373  * Next, we provide a platform-independent function for sleeping for a number
374  * of milliseconds. This code depends on the OS.
375  *
376  * NB: since we do not have Win32 support, this is now very easy... we just
377  * use the usleep instruction.
378  */
379 #include <unistd.h>
380 inline void sleep_ms(uint32_t ms) { usleep(ms*1000); }
381 
382 
383 /**
384  * Now we present a clock that operates in nanoseconds, instead of in ticks,
385  * and a function for yielding the CPU. This code also depends on the OS
386  */
387 #if defined(STM_OS_LINUX)
388 #include <stdio.h>
389 #include <cstring>
390 #include <assert.h>
391 #include <pthread.h>
392 #include <time.h>
393 
394 /**
395  * Yield the CPU
396  */
397 inline void yield_cpu() { pthread_yield(); }
398 
399 /**
400  * The Linux clock_gettime is reasonably fast, has good resolution, and is not
401  * affected by TurboBoost. Using MONOTONIC_RAW also means that the timer is
402  * not subject to NTP adjustments, which is preferably since an adjustment in
403  * mid-experiment could produce some funky results.
404  */
405 inline uint64_t getElapsedTime()
406 {
407  struct timespec t;
408  clock_gettime(CLOCK_REALTIME, &t);
409 
410  uint64_t tt = (((long long)t.tv_sec) * 1000000000L) + ((long long)t.tv_nsec);
411  return tt;
412 }
413 
414 #endif // STM_OS_LINUX
415 
416 #if defined(STM_OS_SOLARIS)
417 #include <sys/time.h>
418 
419 /**
420  * Yield the CPU
421  */
422 inline void yield_cpu() { yield(); }
423 
424 /**
425  * We'll just use gethrtime() as our nanosecond timer
426  */
427 inline uint64_t getElapsedTime()
428 {
429  return gethrtime();
430 }
431 
432 #endif // STM_OS_SOLARIS
433 
434 #if defined(STM_OS_MACOS)
435 #include <mach/mach_time.h>
436 #include <sched.h>
437 
438 /**
439  * Yield the CPU
440  */
441 inline void yield_cpu() {
442  sched_yield();
443 }
444 
445 /**
446  * We'll use the MACH timer as our nanosecond timer
447  *
448  * This code is based on code at
449  * http://developer.apple.com/qa/qa2004/qa1398.html
450  */
451 inline uint64_t getElapsedTime()
452 {
453  static mach_timebase_info_data_t sTimebaseInfo;
454  if (sTimebaseInfo.denom == 0)
455  (void)mach_timebase_info(&sTimebaseInfo);
456  return mach_absolute_time() * sTimebaseInfo.numer / sTimebaseInfo.denom;
457 }
458 
459 #endif // STM_OS_MACOS
460 
461 #endif // PLATFORM_HPP__
void *volatile ptr
Definition: counted_ptr.hpp:57
void sleep_ms(uint32_t ms)
Definition: platform.hpp:380