threads 1
play

threads 1 1 which scheduler should I choose? I care about CPU - PowerPoint PPT Presentation

threads 1 1 which scheduler should I choose? I care about CPU throughput: fjrst-come fjrst-serve average response time: SRTF approximation I/O throughput: SRTF approximation fairness medium-term CPU usage: something like Linux CFS


  1. sum example (only globals) int sum_all() { partly because this pattern works when we don’t join (later)) (partly to illustrate that memory is shared, via global array instead of return value values returned from threads happen to be the same except for some numbers two difgerent functions values, results: global variables — shared } return results[0] + results[1]; pthread_join(&sum_back_thread, NULL); pthread_join(&sum_front_thread, NULL); pthread_create(&sum_back_thread, NULL, sum_back, NULL); pthread_create(&sum_front_thread, NULL, sum_front, NULL); pthread_t sum_front_thread, sum_back_thread; } int values[1024]; return NULL; results[1] = sum; sum += values[i]; for ( int i = 512; i < 1024; ++i) int sum = 0; } return NULL; results[0] = sum; sum += values[i]; for ( int i = 0; i < 512; ++i) int sum = 0; int results[2]; 14 void *sum_front( void *ignored_argument) { void *sum_back( void *ignored_argument) {

  2. sum example (only globals) int sum_all() { partly because this pattern works when we don’t join (later)) (partly to illustrate that memory is shared, via global array instead of return value values returned from threads happen to be the same except for some numbers two difgerent functions values, results: global variables — shared } return results[0] + results[1]; pthread_join(&sum_back_thread, NULL); pthread_join(&sum_front_thread, NULL); pthread_create(&sum_back_thread, NULL, sum_back, NULL); pthread_create(&sum_front_thread, NULL, sum_front, NULL); pthread_t sum_front_thread, sum_back_thread; } int values[1024]; return NULL; results[1] = sum; sum += values[i]; for ( int i = 512; i < 1024; ++i) int sum = 0; } return NULL; results[0] = sum; sum += values[i]; for ( int i = 0; i < 512; ++i) int sum = 0; int results[2]; 14 void *sum_front( void *ignored_argument) { void *sum_back( void *ignored_argument) {

  3. thread_sum memory layout PC sum_back sum_front TCB for sum_back thread … registers PC TCB for sum_front thread … registers values, results (global) 0xFFFF FFFF FFFF FFFF Code / Data Heap / other dynamic sum_back_thread stack sum_front_thread stack main thread stack Used by OS 0x0000 0000 0040 0000 0x7F… 0xFFFF 8000 0000 0000 15

  4. thread_sum memory layout PC sum_back sum_front TCB for sum_back thread … registers PC TCB for sum_front thread … registers values, results (global) 0xFFFF FFFF FFFF FFFF Code / Data Heap / other dynamic sum_back_thread stack sum_front_thread stack main thread stack Used by OS 0x0000 0000 0040 0000 0x7F… 0xFFFF 8000 0000 0000 15

  5. sum example (to global, with thread IDs) int sum_all() { values, results: global variables — shared } return results[0] + results[1]; pthread_join(threads[i], NULL); for ( int i = 0; i < 2; ++i) } for ( int i = 0; i < 2; ++i) { pthread_t thread[2]; } int values[1024]; return NULL; results[id] = sum; } sum += values[i]; int sum = 0; int id = ( int ) argument; int results[2]; 16 void *sum_thread( void *argument) { for ( int i = id * 512; i < (id + 1) * 512; ++i) { pthread_create(&threads[i], NULL, sum_thread, ( void *) i);

  6. sum example (to global, with thread IDs) int sum_all() { values, results: global variables — shared } return results[0] + results[1]; pthread_join(threads[i], NULL); for ( int i = 0; i < 2; ++i) } for ( int i = 0; i < 2; ++i) { pthread_t thread[2]; } int values[1024]; return NULL; results[id] = sum; } sum += values[i]; int sum = 0; int id = ( int ) argument; int results[2]; 16 void *sum_thread( void *argument) { for ( int i = id * 512; i < (id + 1) * 512; ++i) { pthread_create(&threads[i], NULL, sum_thread, ( void *) i);

  7. sum example (info struct) int sum_all() { only okay because sum_all waits! my_info: pointer to sum_all’s stack values: global variable — shared } return info[0].result + info[1].result; pthread_join(threads[i], NULL); for ( int i = 0; i < 2; ++i) } pthread_create(&threads[i], NULL, sum_thread, &info[i]); info[i].start = i*512; info[i].end = (i+1)*512; for ( int i = 0; i < 2; ++i) { pthread_t thread[2]; ThreadInfo info[2]; } int values[1024]; return NULL; my_info->result = sum; } sum += values[i]; for ( int i = my_info->start; i < my_info->end; ++i) { int sum = 0; }; int start, end, result; struct ThreadInfo { 17 void *sum_thread( void *argument) { ThreadInfo *my_info = (ThreadInfo *) argument;

  8. sum example (info struct) int sum_all() { only okay because sum_all waits! my_info: pointer to sum_all’s stack values: global variable — shared } return info[0].result + info[1].result; pthread_join(threads[i], NULL); for ( int i = 0; i < 2; ++i) } pthread_create(&threads[i], NULL, sum_thread, &info[i]); info[i].start = i*512; info[i].end = (i+1)*512; for ( int i = 0; i < 2; ++i) { pthread_t thread[2]; ThreadInfo info[2]; } int values[1024]; return NULL; my_info->result = sum; } sum += values[i]; for ( int i = my_info->start; i < my_info->end; ++i) { int sum = 0; }; int start, end, result; struct ThreadInfo { 17 void *sum_thread( void *argument) { ThreadInfo *my_info = (ThreadInfo *) argument;

  9. sum example (info struct) int sum_all() { only okay because sum_all waits! my_info: pointer to sum_all’s stack values: global variable — shared } return info[0].result + info[1].result; pthread_join(threads[i], NULL); for ( int i = 0; i < 2; ++i) } pthread_create(&threads[i], NULL, sum_thread, &info[i]); info[i].start = i*512; info[i].end = (i+1)*512; for ( int i = 0; i < 2; ++i) { } int values[1024]; return NULL; my_info->result = sum; } sum += values[i]; for ( int i = my_info->start; i < my_info->end; ++i) { int sum = 0; }; int start, end, result; struct ThreadInfo { 17 void *sum_thread( void *argument) { ThreadInfo *my_info = (ThreadInfo *) argument; pthread_t thread[2]; ThreadInfo info[2];

  10. sum example (info struct) int sum_all() { only okay because sum_all waits! my_info: pointer to sum_all’s stack values: global variable — shared } return info[0].result + info[1].result; pthread_join(threads[i], NULL); for ( int i = 0; i < 2; ++i) } pthread_create(&threads[i], NULL, sum_thread, &info[i]); info[i].start = i*512; info[i].end = (i+1)*512; for ( int i = 0; i < 2; ++i) { pthread_t thread[2]; ThreadInfo info[2]; } int values[1024]; return NULL; my_info->result = sum; } sum += values[i]; for ( int i = my_info->start; i < my_info->end; ++i) { int sum = 0; }; int start, end, result; struct ThreadInfo { 17 void *sum_thread( void *argument) { ThreadInfo *my_info = (ThreadInfo *) argument;

  11. thread_sum memory layout (info struct) 0xFFFF FFFF FFFF FFFF 0xFFFF 8000 0000 0000 0x7F… 0x0000 0000 0040 0000 Used by OS main thread stack threads[0] stack threads[1] stack Heap / other dynamic Code / Data values (global) info array my_info my_info 18

  12. sum example (to main stack) ThreadInfo info[2]; pthread_t thread[2]; } return info[0].result + info[1].result; pthread_join(threads[i], NULL); for ( int i = 0; i < 2; ++i) } info[i].values = values; info[i].start = i*512; info[i].end = (i+1)*512; for ( int i = 0; i < 2; ++i) { 19 } return NULL; my_info->result = sum; } sum += my_info->values[i]; for ( int i = my_info->start; i < my_info->end; ++i) { int sum = 0; struct ThreadInfo { int *values; int start; int end; int result }; void *sum_thread( void *argument) { ThreadInfo *my_info = (ThreadInfo *) argument; int sum_all( int *values) { pthread_create(&threads[i], NULL, sum_thread, ( void *) &info[i]);

  13. sum example (to main stack) ThreadInfo info[2]; pthread_t thread[2]; } return info[0].result + info[1].result; pthread_join(threads[i], NULL); for ( int i = 0; i < 2; ++i) } info[i].values = values; info[i].start = i*512; info[i].end = (i+1)*512; for ( int i = 0; i < 2; ++i) { 19 } return NULL; my_info->result = sum; } sum += my_info->values[i]; for ( int i = my_info->start; i < my_info->end; ++i) { int sum = 0; struct ThreadInfo { int *values; int start; int end; int result }; void *sum_thread( void *argument) { ThreadInfo *my_info = (ThreadInfo *) argument; int sum_all( int *values) { pthread_create(&threads[i], NULL, sum_thread, ( void *) &info[i]);

  14. sum example (to main stack) ThreadInfo info[2]; pthread_t thread[2]; } return info[0].result + info[1].result; pthread_join(threads[i], NULL); for ( int i = 0; i < 2; ++i) } info[i].values = values; info[i].start = i*512; info[i].end = (i+1)*512; for ( int i = 0; i < 2; ++i) { 19 } return NULL; my_info->result = sum; } sum += my_info->values[i]; for ( int i = my_info->start; i < my_info->end; ++i) { int sum = 0; struct ThreadInfo { int *values; int start; int end; int result }; void *sum_thread( void *argument) { ThreadInfo *my_info = (ThreadInfo *) argument; int sum_all( int *values) { pthread_create(&threads[i], NULL, sum_thread, ( void *) &info[i]);

  15. sum example (to main stack) ThreadInfo info[2]; pthread_t thread[2]; } return info[0].result + info[1].result; pthread_join(threads[i], NULL); for ( int i = 0; i < 2; ++i) } info[i].values = values; info[i].start = i*512; info[i].end = (i+1)*512; for ( int i = 0; i < 2; ++i) { 19 } return NULL; my_info->result = sum; } sum += my_info->values[i]; for ( int i = my_info->start; i < my_info->end; ++i) { int sum = 0; struct ThreadInfo { int *values; int start; int end; int result }; void *sum_thread( void *argument) { ThreadInfo *my_info = (ThreadInfo *) argument; int sum_all( int *values) { pthread_create(&threads[i], NULL, sum_thread, ( void *) &info[i]);

  16. program memory (to main stack) 0xFFFF FFFF FFFF FFFF 0xFFFF 8000 0000 0000 0x7F… 0x0000 0000 0040 0000 Used by OS main thread stack sum_front_thread stack sum_back_thread stack Heap / other dynamic Code / Data info array values (stack? heap?) my_info my_info 20

  17. sum example (on heap) return info; } return result; delete [] info; int result = info[0].result + info[1].result; pthread_join(info[i].thread, NULL); for ( int i = 0; i < 2; ++i) } } info[i].values = values; info[i].start = i*512; info[i].end = (i+1)*512; for ( int i = 0; i < 2; ++i) { } ... 21 struct ThreadInfo { pthread_t thread; int *values; int start; int end; int result }; void *sum_thread( void *argument) { ThreadInfo *start_sum_all( int *values) { ThreadInfo *info = new ThreadInfo[2]; pthread_create(&info[i].thread, NULL, sum_thread, ( void *) &info[i]); void finish_sum_all(ThreadInfo *info) {

  18. sum example (on heap) return info; } return result; delete [] info; int result = info[0].result + info[1].result; pthread_join(info[i].thread, NULL); for ( int i = 0; i < 2; ++i) } } info[i].values = values; info[i].start = i*512; info[i].end = (i+1)*512; for ( int i = 0; i < 2; ++i) { } ... 21 struct ThreadInfo { pthread_t thread; int *values; int start; int end; int result }; void *sum_thread( void *argument) { ThreadInfo *start_sum_all( int *values) { ThreadInfo *info = new ThreadInfo[2]; pthread_create(&info[i].thread, NULL, sum_thread, ( void *) &info[i]); void finish_sum_all(ThreadInfo *info) {

  19. sum example (on heap) return info; } return result; delete [] info; int result = info[0].result + info[1].result; pthread_join(info[i].thread, NULL); for ( int i = 0; i < 2; ++i) } } info[i].values = values; info[i].start = i*512; info[i].end = (i+1)*512; for ( int i = 0; i < 2; ++i) { } ... 21 struct ThreadInfo { pthread_t thread; int *values; int start; int end; int result }; void *sum_thread( void *argument) { ThreadInfo *start_sum_all( int *values) { ThreadInfo *info = new ThreadInfo[2]; pthread_create(&info[i].thread, NULL, sum_thread, ( void *) &info[i]); void finish_sum_all(ThreadInfo *info) {

  20. thread_sum memory (heap version) 0xFFFF FFFF FFFF FFFF 0xFFFF 8000 0000 0000 0x7F… 0x0000 0000 0040 0000 Used by OS main thread stack sum_front_thread stack sum_back_thread stack Heap / other dynamic Code / Data info array values (stack? heap?) my_info my_info 22

  21. what’s wrong with this? string result; result = ComputeString(); return &result; } int main() { pthread_t the_thread; pthread_create(&the_thread, NULL, create_string, NULL); pthread_join(the_thread, &string_ptr); } 23 /* omitted: headers, using statements */ void *create_string( void *ignored_argument) { string *string_ptr; cout << "string is " << *string_ptr;

  22. program memory Heap / other dynamic threads exit/are joined …stacks deallocated when string_ptr pointed to here string result allocated here dynamically allocated stacks Code / Data third thread stack 0xFFFF FFFF FFFF FFFF second thread stack main thread stack Used by OS 0x0000 0000 0040 0000 0x7F… 0xFFFF 8000 0000 0000 24

  23. program memory Heap / other dynamic threads exit/are joined …stacks deallocated when string_ptr pointed to here string result allocated here dynamically allocated stacks Code / Data third thread stack 0xFFFF FFFF FFFF FFFF second thread stack main thread stack Used by OS 0x0000 0000 0040 0000 0x7F… 0xFFFF 8000 0000 0000 24

  24. thread resources to create a thread, allocate: new stack (how big???) thread control block deallocated when … can deallocate stack when thread exits but need to allow collecting return value same problem as for processes and waitpid 25

  25. thread resources to create a thread, allocate: new stack (how big???) thread control block deallocated when … can deallocate stack when thread exits but need to allow collecting return value same problem as for processes and waitpid 25

  26. pthread_detach void spawn_show_progress_thread() { pthread_t show_progress_thread; pthread_create(&show_progress_thread, NULL, show_progress, NULL); pthread_detach(show_progress_thread); } int main() { spawn_show_progress_thread(); do_other_stuff(); ... } detach = don’t care about return value, etc. system will deallocate when thread terminates 26 void *show_progress( void * ...) { ... } /* instead of keeping pthread_t around to join thread later: */

  27. starting threads detached void spawn_show_progress_thread() { pthread_t show_progress_thread; pthread_attr_t attrs; pthread_attr_init(&attrs); pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); pthread_create(&show_progress_thread, attrs, show_progress, NULL); pthread_attr_destroy(&attrs); } 27 void *show_progress( void * ...) { ... }

  28. setting stack sizes void spawn_show_progress_thread() { pthread_t show_progress_thread; pthread_attr_t attrs; pthread_attr_init(&attrs); pthread_create(&show_progress_thread, attrs, show_progress, NULL); } 28 void *show_progress( void * ...) { ... } pthread_attr_setstacksize(&attrs, 32 * 1024 /* bytes */ );

  29. a note on error checking from pthread_create manpage: special constants for return value same pattern for many other pthreads functions will often omit error checking in slides for brevity 29

  30. error checking pthread_create int error = pthread_create(...); if (error != 0) { } 30 /* print some error message */

  31. the correctness problem schedulers introduce non-determinism scheduler can switch threads at any time worse with threads on multiple cores difgerent cores happen in difgerent order each time allows for “race condition” bugs outcome depends on whether one thread can ‘race’ ahead of another …to be avoided by synchronization constructs what we’ll talk about for a while… 31 scheduler might run threads in any order cores not precisely synchronized (stalling for caches, etc., etc.)

  32. example application: ATM server commands: withdraw, deposit one correctness goal: don’t lose money 32

  33. ATM server (pseudocode) ServerLoop() { while ( true ) { ReceiveRequest(&operation, &accountNumber, &amount); if (operation == DEPOSIT) { Deposit(accountNumber, amount); } else ... } } Deposit(accountNumber, amount) { account = GetAccount(accountId); SaveAccountUpdates(account); } 33 account − >balance += amount;

  34. a threaded server? Deposit(accountNumber, amount) { account = GetAccount(accountId); SaveAccountUpdates(account); } maybe GetAccount/SaveAccountUpdates can be slow? read/write disk sometimes? contact another server sometimes? maybe lots of requests to process? maybe real logic has more checks than Deposit() … all reasons to handle multiple requests at once 34 account − >balance += amount; → many threads all running the server loop

  35. multiple threads while ( true ) { } } Deposit(accountNumber, amount); if (operation == DEPOSIT) { ReceiveRequest(&operation, &accountNumber, &amount); ServerLoop() { main() { } ... } ServerLoop, NULL); pthread_create(&server_loop_threads[i], NULL, for ( int i = 0; i < NumberOfThreads; ++i) { 35 } else ...

  36. the lost write context switch lost track of thread A’s money “winner” of the race lost write to balance context switch context switch 36 add amount, %rax Thread B Thread A add amount, %rax account − >balance += amount; (in two threads, same account) mov account − >balance, %rax mov account − >balance, %rax mov %rax, account − >balance mov %rax, account − >balance

  37. the lost write context switch lost track of thread A’s money “winner” of the race lost write to balance context switch context switch 36 add amount, %rax Thread B Thread A add amount, %rax account − >balance += amount; (in two threads, same account) mov account − >balance, %rax mov account − >balance, %rax mov %rax, account − >balance mov %rax, account − >balance

  38. the lost write context switch lost track of thread A’s money “winner” of the race lost write to balance context switch context switch 36 add amount, %rax Thread B Thread A add amount, %rax account − >balance += amount; (in two threads, same account) mov account − >balance, %rax mov account − >balance, %rax mov %rax, account − >balance mov %rax, account − >balance

  39. thinking about race conditions (1) Thread A Thread B must be 1. Thread B can’t do anything 37 what are the possible values of x ? (initially x = y = 0 ) x ← 1 y ← 2

  40. thinking about race conditions (1) Thread A Thread B must be 1. Thread B can’t do anything 37 what are the possible values of x ? (initially x = y = 0 ) x ← 1 y ← 2

  41. thinking about race conditions (2) TWO ; then y bit 3 0 0; y bit 0 B (fjnish): y bit 1 ; then TWO A: x 1; then 0; y bit 2 B (start): …and why not 7: if B line one, then A, then B line two: if B goes fjrst, then A: if A goes fjrst, then B: Thread A Thread B 38 what are some possible values of x ? (initially x = y = 0 ) x ← y + 1 y ← 2 y ← y × 2

  42. thinking about race conditions (2) TWO ; then y bit 3 0 0; y bit 0 B (fjnish): y bit 1 ; then TWO A: x 1; then 0; y bit 2 B (start): Thread A Thread B …and why not 7: 38 what are some possible values of x ? (initially x = y = 0 ) x ← y + 1 y ← 2 y ← y × 2 if A goes fjrst, then B: 1 if B goes fjrst, then A: 5 if B line one, then A, then B line two: 3

  43. thinking about race conditions (3) Thread A Thread B 1 or 2 …but why not 3? B: x bit 0 A: x bit 0 A: x bit 1 B: x bit 1 39 what are the possible values of x ? (initially x = y = 0 ) x ← 1 x ← 2

  44. thinking about race conditions (3) Thread A Thread B 1 or 2 …but why not 3? B: x bit 0 A: x bit 0 A: x bit 1 B: x bit 1 39 what are the possible values of x ? (initially x = y = 0 ) x ← 1 x ← 2

  45. thinking about race conditions (3) Thread A Thread B 1 or 2 …but why not 3? 39 what are the possible values of x ? (initially x = y = 0 ) x ← 1 x ← 2 B: x bit 0 ← 0 A: x bit 0 ← 1 A: x bit 1 ← 0 B: x bit 1 ← 1

  46. thinking about race conditions (2) Thread A Thread B …and why not 7: 40 what are some possible values of x ? (initially x = y = 0 ) x ← y + 1 y ← 2 y ← y × 2 if A goes fjrst, then B: 1 if B goes fjrst, then A: 5 if B line one, then A, then B line two: 3 B (start): y ← 2 = 0010 TWO ; then y bit 3 ← 0; y bit 2 ← 1; then A: x ← 110 TWO + 1 = 7 ; then B (fjnish): y bit 1 ← 0; y bit 0 ← 0

  47. atomic operation atomic operation = operation that runs to completion or not at all we will use these to let threads work together most machines: loading/storing (aligned) words is atomic compilers) but some instructions are not atomic; examples: x86: integer add constant to memory location many CPUs: loading/storing values that cross cache blocks e.g. if cache blocks 0x40 bytes, load/store 4 byte from addr. 0x3E is not atomic 41 so can’t get 3 from x ← 1 and x ← 2 running in parallel aligned ≈ address of word is multiple of word size (typically done by

  48. lost adds (program) the_value = 0; } printf("the_value = %d\n", the_value); // expected result: 1000000 + 1000000 = 2000000 pthread_join(B, NULL); pthread_join(A, NULL); pthread_create(&B, NULL, update_loop, ( void *) 1000000); pthread_create(&A, NULL, update_loop, ( void *) 1000000); pthread_t A, B; int main( void ) { .global update_loop int the_value; ret // if argument 1 >= 0 repeat jg update_loop // argument 1 -= 1 dec %rdi update_loop: 42 addl $1, the_value // the_value (global variable) + = 1 extern void *update_loop( void *);

  49. lost adds (results) 43 the_value = ? 5000 4000 frequency 3000 2000 1000 0 800000 1000000 1200000 1400000 1600000 1800000 2000000

  50. but how? probably not possible on single core exceptions can’t occur in the middle of add instruction …but ‘add to memory’ implemented with multiple steps still needs to load, add, store internally can be interleaved with what other cores do (and actually it’s more complicated than that — we’ll talk later) 44

  51. but how? probably not possible on single core exceptions can’t occur in the middle of add instruction …but ‘add to memory’ implemented with multiple steps still needs to load, add, store internally can be interleaved with what other cores do (and actually it’s more complicated than that — we’ll talk later) 44

  52. so, what is actually atomic for now we’ll assume: load/stores of ‘words’ (64-bit machine = 64-bits words) their job to design caches, etc. to work as documented 45 in general: processor designer will tell you

  53. too much milk buy milk how can Alice and Bob coordinate better? return home, put milk in fridge 3:30 buy milk 3:25 arrive at store return home, put milk in fridge 3:20 leave for store 3:15 roommates Alice and Bob want to keep fridge stocked with milk: look in fridge. no milk arrive at store 3:10 leave for store 3:05 look in fridge. no milk 3:00 Bob Alice time 46

  54. too much milk “solution” 1 (algorithm) leave a note: “I am buying milk” place before buying remove after buying don’t try buying if there’s a note with atomic load/store of variable if (no milk) { if (no note) { leave note; buy milk; remove note; } } 47 ≈ setting/checking a variable (e.g. “ note = 1 ”)

  55. too much milk “solution” 1 (timeline) } } } remove note; buy milk; leave note; } remove note; if (no milk) { buy milk; leave note; if (no note) { if (no milk) { Bob Alice if (no note) { 48

  56. too much milk “solution” 2 (algorithm) intuition: leave note when buying or checking if need to buy leave note; if (no milk) { if (no note) { buy milk; } } remove note; 49

  57. too much milk: “solution” 2 (timeline) leave note; if (no milk) { if (no note) { Alice buy milk; } } remove note; but there’s always a note …will never buy milk (twice or once) 50

  58. too much milk: “solution” 2 (timeline) leave note; if (no milk) { if (no note) { Alice buy milk; } } remove note; but there’s always a note …will never buy milk (twice or once) 50

  59. too much milk: “solution” 2 (timeline) leave note; if (no milk) { if (no note) { Alice buy milk; } } remove note; but there’s always a note …will never buy milk (twice or once) 50

  60. “solution” 3: algorithm leave note from Bob; Bob remove note from Bob; } } buy milk if (no note from Alice) { if (no milk) { Alice intuition: label notes so Alice knows which is hers (and vice-versa) remove note from Alice; } } buy milk if (no note from Bob) { if (no milk) { leave note from Alice; computer equivalent: separate noteFromAlice and noteFromBob variables 51

  61. too much milk: “solution” 3 (timeline) if (no milk) { remove note from Alice remove note from Bob } } buy milk if (no note from Alice) { } leave note from Alice } buy milk if (no note from Bob) { leave note from Bob Bob Alice if (no milk) { 52

  62. too much milk: is it possible is there a solutions with writing/reading notes? yes, but it’s not very elegant 53 ≈ loading/storing from shared memory

  63. too much milk: solution 4 (algorithm) if (no note from Alice) { exercise (hard): extend to three people exercise (hard): prove (in)correctness Bob remove note from Bob } } buy milk if (no milk) { leave note from Bob leave note from Alice Alice remove note from Alice } buy milk if (no milk) { } do nothing while (note from Bob) { 54

  64. too much milk: solution 4 (algorithm) if (no note from Alice) { exercise (hard): extend to three people exercise (hard): prove (in)correctness Bob remove note from Bob } } buy milk if (no milk) { leave note from Bob leave note from Alice Alice remove note from Alice } buy milk if (no milk) { } do nothing while (note from Bob) { 54

  65. too much milk: solution 4 (algorithm) if (no note from Alice) { exercise (hard): extend to three people exercise (hard): prove (in)correctness Bob remove note from Bob } } buy milk if (no milk) { leave note from Bob leave note from Alice Alice remove note from Alice } buy milk if (no milk) { } do nothing while (note from Bob) { 54

  66. too much milk: solution 4 (algorithm) if (no note from Alice) { exercise (hard): extend to three people exercise (hard): prove (in)correctness Bob remove note from Bob } } buy milk if (no milk) { leave note from Bob leave note from Alice Alice remove note from Alice } buy milk if (no milk) { } do nothing while (note from Bob) { 54

  67. Peterson’s algorithm general version of solution see, e.g., Wikipedia we’ll use special hardware support instead 55

  68. some defjnitions mutual exclusion : ensuring only one thread does a particular thing at a time like checking for and, if needed, buying milk critical section : code that exactly one thread can execute at a time result of critical section lock : object only one thread can hold at a time interface for creating critical sections 56

  69. some defjnitions mutual exclusion : ensuring only one thread does a particular thing at a time like checking for and, if needed, buying milk critical section : code that exactly one thread can execute at a time result of critical section lock : object only one thread can hold at a time interface for creating critical sections 56

  70. some defjnitions mutual exclusion : ensuring only one thread does a particular thing at a time like checking for and, if needed, buying milk critical section : code that exactly one thread can execute at a time result of critical section lock : object only one thread can hold at a time interface for creating critical sections 56

  71. the lock primitive locks: an object with (at least) two operations: acquire or lock — wait until lock is free, then “grab” it release or unlock — let others use lock, wakeup waiters typical usage: everyone acquires lock before using shared resource forget to acquire lock? weird things happen Lock(MilkLock); if (no milk) { buy milk } Unlock(MilkLock); 57

  72. pthread mutex #include <pthread.h> pthread_mutex_t MilkLock; pthread_mutex_init(&MilkLock, NULL); ... pthread_mutex_lock(&MilkLock); if (no milk) { buy milk } pthread_mutex_unlock(&MilkLock); 58

  73. xv6 spinlocks #include "spinlock.h" ... struct spinlock MilkLock; initlock(&MilkLock, "name for debugging"); ... acquire(&MilkLock); if (no milk) { buy milk } release(&MilkLock); 59

  74. 60

  75. backup slides 61

  76. lottery scheduler assignment track “ticks” process runs = number of times scheduled simplifjcation: don’t care if process uses less than timeslice new system call: getprocesesinfo copy info from process table into user space new system call: settickets set number of tickets for current process should be inherited by fork scheduler: choose pseudorandom weighted by tickets caution! no fmoating point 62

  77. passing thread IDs (1) } } } thread_function, ( void *) i); pthread_create(&threads[i], NULL, for ( int i = 0; i < 2; ++i) { vector<pthread_t> threads(2); void run_threads() { ... DataType items[1000]; } DoSomethingWith(items[i]); for ( int i = start; i < end; ++i) { int end = start + 500; int thread_id = ( int ) argument; 63 void *thread_function( void *argument) { int start = 500 * thread_id;

  78. passing thread IDs (1) } } } pthread_create(&threads[i], NULL, for ( int i = 0; i < 2; ++i) { vector<pthread_t> threads(2); void run_threads() { ... DataType items[1000]; } DoSomethingWith(items[i]); for ( int i = start; i < end; ++i) { int end = start + 500; int thread_id = ( int ) argument; 63 void *thread_function( void *argument) { int start = 500 * thread_id; thread_function, ( void *) i);

  79. passing thread IDs (2) } } ... } thread_function, ( void *) i); pthread_create(&threads[i], NULL, for ( int i = 0; i < num_threads; ++i) { vector<pthread_t> threads(num_threads); void run_threads() { ... DataType items[1000]; } DoSomethingWith(items[i]); for ( int i = start; i < end; ++i) { int end = start + (1000 / num_threads); int thread_id = ( int ) argument; int num_threads; 64 void *thread_function( void *argument) { int start = thread_id * (1000 / num_threads); if (thread_id == num_threads − 1) end = 1000;

  80. passing thread IDs (2) DataType items[1000]; } ... } pthread_create(&threads[i], NULL, for ( int i = 0; i < num_threads; ++i) { vector<pthread_t> threads(num_threads); void run_threads() { } ... } DoSomethingWith(items[i]); for ( int i = start; i < end; ++i) { int end = start + (1000 / num_threads); int thread_id = ( int ) argument; int num_threads; 64 void *thread_function( void *argument) { int start = thread_id * (1000 / num_threads); if (thread_id == num_threads − 1) end = 1000; thread_function, ( void *) i);

Recommend


More recommend