tlds
Transactional Operations for Linked Data Structures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
icc-sync.hpp
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2011
3  * University of Rochester Department of Computer Science
4  * and
5  * Lehigh University Department of Computer Science and Engineering
6  *
7  * License: Modified BSD
8  * Please see the file LICENSE.RSTM for licensing information
9  */
10 
11 /**
12  * The icc stm compiler is based on ICC 11.0, which has fake support for
13  * __sync builtins. It knows what they are, and emits __sync_..._N symbols
14  * for them, but does not implement them, even when given a -march=i686 or
15  * greater.
16  *
17  * http://origin-software.intel.com/en-us/forums/showthread.php?t=71183
18  *
19  * The version 4 compiler that we used is still based on 11.0 (610), and has
20  * the same limitation. We provide our own support in this file.
21  */
22 
23 #ifndef STM_COMMON_PLATFORM_ICC_FIXES_HPP
24 #define STM_COMMON_PLATFORM_ICC_FIXES_HPP
25 
26 // only include this file when building with ICC
27 #ifndef __ICC
28 # error "icc-sync.hpp included incorrectly"
29 #endif
30 
31 #include <cstddef> // size_t
32 #include <stdint.h> // uintptr_t
33 
34 /**
35  * We're going to do some basic metaprogramming to emulate the interface
36  * that we expect from the __sync builtins, i.e., if there is a correct
37  * __sync for the type, then we'll use it.
38  */
39 
40 /**
41  * Ultimately, what this file provides is a set of 5 macros that look, feel,
42  * and smell just like the __sync builtins. Here are the macros, the
43  * implementations follow.
44  */
45 #define __sync_synchronize() \
46  asm volatile("mfence":::"memory")
47 
48 #define __sync_bool_compare_and_swap(p, f, t) \
49  stm::iccsync::bool_compare_and_swap(p, f, t)
50 
51 #define __sync_val_compare_and_swap(p, f, t) \
52  stm::iccsync::val_compare_and_swap(p, f, t)
53 
54 #define __sync_lock_test_and_set(p, v) \
55  stm::iccsync::lock_test_and_set(p, v)
56 
57 #define __sync_fetch_and_add(p, a) \
58  stm::iccsync::fetch_and_add(p, a)
59 
60 namespace stm
61 {
62  namespace iccsync
63  {
64  /**
65  * Our partial specialization helper is parameterized based on the type
66  * (T), the number of bytes in the type (N), and if this is a subword,
67  * word or double-word access (W). We assume that all addresses are
68  * aligned.
69  *
70  * T is necessary so that the caller doesn't need to perform any casts.
71  *
72  * N is necessary because our implementation depends on the number of
73  * bytes.
74  *
75  * W allows us to deduce the platform without ifdefing anything.
76  *
77  * NB: We've only implemented partial templates for what we actually
78  * use. Extending this is straightforward.
79  */
80  template <typename T,
81  size_t W = sizeof(void*),
82  size_t N = sizeof(T)>
83  struct SYNC { };
84 
85  /**
86  * The byte implementations.
87  */
88  template <typename T, size_t W>
89  struct SYNC<T, W, 1>
90  {
91  static T swap(volatile T* address, T value)
92  {
93  asm volatile("lock xchgb %[value], %[address]"
94  : [value] "+q" (value),
95  [address] "+m" (*address)
96  :
97  : "memory");
98  return value;
99  }
100  };
101 
102  /**
103  * The halfword sync implementations, independent of the platform
104  * bitwidths. We don't need any of these at the moment.
105  */
106  template <typename T, size_t W>
107  struct SYNC<T, W, 2> { };
108 
109  /**
110  * The word implementations, independent of the platform bitwidth.
111  */
112  template <typename T, size_t W>
113  struct SYNC<T, W, 4>
114  {
115  static T swap(volatile T* address, T value)
116  {
117  asm volatile("lock xchgl %[value], %[address]"
118  : [value] "+r" (value),
119  [address] "+m" (*address)
120  :
121  : "memory");
122  return value;
123  }
124 
125  // We can cas a word-sized value with a single x86 lock cmpxchgl
126  static T cas(volatile T* addr, T from, T to)
127  {
128  asm volatile("lock cmpxchgl %[to], %[addr]"
129  : "+a" (from),
130  [addr] "+m" (*addr)
131  : [to] "q" (to)
132  : "cc", "memory");
133  return from;
134  }
135 
136  // We exploit the fact that xmpxchgl sets the Z flag
137  static bool bcas(volatile T* addr, T from, T to)
138  {
139  bool result;
140  asm volatile("lock cmpxchgl %[to], %[addr]\n\t"
141  "setz %[result]"
142  : [result] "=q" (result),
143  [addr] "+m" (*addr),
144  "+a"(from)
145  : [to] "q" (to)
146  : "cc", "memory");
147  return result;
148  }
149  };
150 
151  /**
152  * The doubleword implementations, for 4-byte platforms.
153  */
154  template <typename T>
155  struct SYNC<T, 4, 8>
156  {
157  // implemented in terms of cas, because we don't have an 8 byte
158  // xchg8b.
159  static T swap(volatile T* address, T value)
160  {
161  // read memory, then update memory with value, making sure noone
162  // wrote a new value---ABA is irrelevant. Can't use val cas
163  // because I need to know if my memory write happened.
164  T mem = *address;
165  while (!bcas(address, mem, value))
166  mem = *address;
167  return mem;
168  }
169 
170  // 64-bit CAS
171  //
172  // Our implementation depends on if we are compiling in a PIC or
173  // non-PIC manner. PIC will not let us use %ebx in inline asm, so in
174  // that case we need to save %ebx first, and then call the cmpxchg8b.
175  //
176  // * cmpxchg8b m64: Compare EDX:EAX with m64. If equal, set ZF and load
177  // ECX:EBX into m64. Else, clear ZF and load m64 into
178  // EDX:EAX.
179  //
180  // again, we exploit the Z-flag side effect here
181  static bool bcas(volatile T* addr, T from, T to)
182  {
183  union {
184  T from;
185  uint32_t to[2];
186  } cast = { to };
187 
188  bool result;
189 #if defined(__PIC__)
190  asm volatile("xchgl %%ebx, %[to_low]\n\t" // Save %ebx
191  "lock cmpxchg8b\t%[addr]\n\t" // Perform the exchange
192  "movl %[to_low], %%ebx\n\t" // Restore %ebx
193  "setz %[result]"
194  : "+A" (from),
195  [result] "=q" (result),
196  [addr] "+m" (*addr)
197  : [to_high] "c" (cast.to[1]),
198  [to_low] "g" (cast.to[0])
199  : "cc", "memory");
200 #else
201  asm volatile("lock cmpxchg8b %[addr]\n\t"
202  "setz %[result]"
203  : "+A" (from),
204  [result] "=q" (result),
205  [addr] "+m" (*addr)
206  : "c" (cast.to[1]),
207  "b" (cast.to[0])
208  : "cc", "memory");
209 #endif
210  return result;
211  }
212  };
213 
214  /**
215  * The doubleword sync implementations, for 8-byte platforms.
216  */
217  template <typename T>
218  struct SYNC<T, 8, 8>
219  {
220  static T swap(volatile T* address, T value)
221  {
222  asm volatile("lock xchgq %[value], %[address]"
223  : [value] "+r" (value),
224  [address] "+m" (*address)
225  :
226  : "memory");
227  return value;
228  }
229 
230  // We can cas a word-sized value with a single x86 lock cmpxchgq
231  static T cas(volatile T* addr, T from, T to)
232  {
233  asm volatile("lock cmpxchgq %[to], %[addr]"
234  : "+a" (from),
235  [addr] "+m" (*addr)
236  : [to] "q" (to)
237  : "cc", "memory");
238  return from;
239  }
240 
241  static bool bcas(volatile T* addr, T from, T to)
242  {
243  bool result;
244  asm volatile("lock cmpxchgq %[to], %[addr]\n\t"
245  "setz %[result]"
246  : [result] "=q" (result),
247  [addr] "+m" (*addr),
248  "+a" (from)
249  : [to] "q" (to)
250  : "cc", "memory");
251  return result;
252  }
253  };
254 
255  /**
256  * The quadword implementations, for 8-byte platforms. We don't ever
257  * need this at the moment.
258  */
259  template <typename T>
260  struct SYNC<T, 8, 16> { };
261 
262  /**
263  * We're really liberal with the types of the parameters that we pass to
264  * the sync builtins. Apparently, gcc is fine with this, and "does the
265  * right thing" for them. Our SYNC interface, on the other hand, is
266  * really picky about type matching. This CAS_ADAPTER does the type
267  * matching that we need for our stm usage.
268  */
269 
270  /**
271  * This matches whenever T and S are different, and the value we're
272  * casting in may be smaller than the value we're casting to. This works
273  * because we've specialized for T == S, and for S > T.
274  */
275  template <typename T, typename S, size_t D = sizeof(T) / sizeof(S)>
276  struct CAS_ADAPTER
277  {
278  static T cas(volatile T* address, S from, S to)
279  {
280  return SYNC<T>::cas(address, static_cast<T>(from),
281  static_cast<T>(to));
282  }
283 
284  static bool bcas(volatile T* address, S from, S to)
285  {
286  return SYNC<T>::bcas(address, static_cast<T>(from),
287  static_cast<T>(to));
288  }
289  };
290 
291  /*** well-typed compare-and-swap */
292  template <typename T>
293  struct CAS_ADAPTER<T, T, 1>
294  {
295  static T cas(volatile T* address, T from, T to)
296  {
297  return SYNC<T>::cas(address, from, to);
298  }
299 
300  static bool bcas(volatile T* address, T from, T to)
301  {
302  return SYNC<T>::bcas(address, from, to);
303  }
304  };
305 
306  /**
307  * This is when the value is larger than the type we are casting to
308  *
309  * NB: We don't know how to do this right now. We'll get a compile time
310  * error if anyone instantiates it.
311  */
312  template <typename T, typename S>
313  struct CAS_ADAPTER<T, S, 0> { };
314 
315  /**
316  * An adapter class that matches types for swapping. This matches only
317  * when S is smaller than T, because equal sized and larger have their
318  * own specializations.
319  */
320  template <typename T, typename S, size_t D = sizeof(T) / sizeof(S)>
322  {
323  static T swap(volatile T* address, S value)
324  {
325  return SYNC<T>::swap(address, static_cast<T>(value));
326  }
327  };
328 
329  /*** the "well-typed" swap */
330  template <typename T>
331  struct SWAP_ADAPTER<T, T, 1>
332  {
333  static T swap(volatile T* address, T value)
334  {
335  return SYNC<T>::swap(address, value);
336  }
337  };
338 
339  /**
340  * swap when S is the same size as a T... we could probaly do this more
341  * intelligently than with a c-cast
342  */
343  template <typename T, typename S>
344  struct SWAP_ADAPTER<T, S, 1>
345  {
346  static T swap(volatile T* address, S value)
347  {
348  return SYNC<T>::swap(address, (T)value);
349  }
350  };
351 
352  /**
353  * swap when S is larger than a T, we have to intelligently cast.. just
354  * c-casting for now
355  */
356  template <typename T, typename S>
357  struct SWAP_ADAPTER<T, S, 0>
358  {
359  static T swap(volatile T* address, S value)
360  {
361  return SYNC<T>::swap(address, (T)value);
362  }
363  };
364 
365  /**
366  * The primary interface to the lock_test_and_set primitive. Uses the
367  * parameter types to pick the correct swap adapter.
368  */
369  template <typename T, typename S>
370  inline T lock_test_and_set(volatile T* address, S value)
371  {
372  return SWAP_ADAPTER<T, S>::swap(address, value);
373  }
374 
375  /**
376  * The primary interface to the bool compare-and-swap routine. Uses the
377  * parameter types to pick the correct implementation.
378  */
379  template <typename T, typename S>
380  inline bool bool_compare_and_swap(volatile T* address, S from, S to)
381  {
382  return CAS_ADAPTER<T, S>::bcas(address, from, to);
383  }
384 
385  /**
386  * The primary interface to the val compare-and-swap routine. Uses the
387  * parameter types to pick the correct implementation.
388  */
389  template <typename T, typename S>
390  inline T val_compare_and_swap(volatile T* address, S from, S to)
391  {
392  return CAS_ADAPTER<T, S>::cas(address, from, to);
393  }
394 
395  /**
396  * We implement fetch_and_add implemented in terms of bcas. We actually
397  * don't have a problem with the type of the value parameter, as long as
398  * the T + S operator returns a T, which it almost always will.
399  */
400  template <typename T, typename S>
401  inline T fetch_and_add(volatile T* address, S value)
402  {
403  T mem = *address;
404  // NB: mem + value must be a T
405  while (!SYNC<T>::bcas(address, mem, mem + value))
406  mem = *address;
407  return mem;
408  }
409  } // namespace stm::iccsync
410 } // namespace stm
411 
412 #endif // STM_COMMON_PLATFORM_ICC_FIXES_HPP
static bool bcas(volatile T *addr, T from, T to)
Definition: icc-sync.hpp:137
static T swap(volatile T *address, T value)
Definition: icc-sync.hpp:220
Definition: icc-sync.hpp:83
Definition: stm_fraser.c:61
static bool bcas(volatile T *address, S from, S to)
Definition: icc-sync.hpp:284
static T swap(volatile T *address, S value)
Definition: icc-sync.hpp:359
static T cas(volatile T *address, T from, T to)
Definition: icc-sync.hpp:295
static bool bcas(volatile T *addr, T from, T to)
Definition: icc-sync.hpp:181
static bool bcas(volatile T *addr, T from, T to)
Definition: icc-sync.hpp:241
static bool bcas(volatile T *address, T from, T to)
Definition: icc-sync.hpp:300
#define N
Definition: mt19937ar.h:135
static T swap(volatile T *address, T value)
Definition: icc-sync.hpp:159
static T swap(volatile T *address, T value)
Definition: icc-sync.hpp:115
static T cas(volatile T *addr, T from, T to)
Definition: icc-sync.hpp:126
Definition: icc-sync.hpp:321
static T swap(volatile T *address, S value)
Definition: icc-sync.hpp:323
static T cas(volatile T *address, S from, S to)
Definition: icc-sync.hpp:278
T lock_test_and_set(volatile T *address, S value)
Definition: icc-sync.hpp:370
static T cas(volatile T *addr, T from, T to)
Definition: icc-sync.hpp:231
T val_compare_and_swap(volatile T *address, S from, S to)
Definition: icc-sync.hpp:390
static T swap(volatile T *address, S value)
Definition: icc-sync.hpp:346
bool bool_compare_and_swap(volatile T *address, S from, S to)
Definition: icc-sync.hpp:380
static T swap(volatile T *address, T value)
Definition: icc-sync.hpp:333
static void swap(char *a, char *b, unsigned width)
Definition: sort.c:112
Definition: icc-sync.hpp:276
static T swap(volatile T *address, T value)
Definition: icc-sync.hpp:91
T fetch_and_add(volatile T *address, S value)
Definition: icc-sync.hpp:401