source: mainline/kernel/arch/sparc64/src/smp/sun4v/smp.c@ 3666d386

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 3666d386 was 5ea37b1, checked in by Martin Decky <martin@…>, 14 years ago

sun4v: fix formatting strings

  • Property mode set to 100644
File size: 13.9 KB
Line 
1/*
2 * Copyright (c) 2006 Jakub Jermar
3 * Copyright (c) 2009 Pavel Rimsky
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup sparc64
31 * @{
32 */
33/** @file
34 */
35
36#include <smp/smp.h>
37#include <smp/ipi.h>
38#include <genarch/ofw/ofw_tree.h>
39#include <cpu.h>
40#include <arch/cpu.h>
41#include <arch/boot/boot.h>
42#include <arch.h>
43#include <config.h>
44#include <macros.h>
45#include <func.h>
46#include <typedefs.h>
47#include <synch/synch.h>
48#include <synch/waitq.h>
49#include <print.h>
50#include <arch/sun4v/hypercall.h>
51#include <arch/sun4v/md.h>
52#include <arch/sun4v/ipi.h>
53#include <time/delay.h>
54#include <arch/smp/sun4v/smp.h>
55#include <str.h>
56#include <errno.h>
57
58/** hypervisor code of the "running" state of the CPU */
59#define CPU_STATE_RUNNING 2
60
61/** maximum possible number of processor cores */
62#define MAX_NUM_CORES 8
63
64/** needed in the CPU_START hypercall */
65extern void kernel_image_start(void);
66
67/** needed in the CPU_START hypercall */
68extern void *trap_table;
69
70/** number of execution units detected */
71uint8_t exec_unit_count = 0;
72
73/** execution units (processor cores) */
74exec_unit_t exec_units[MAX_NUM_CORES];
75
76/** CPU structures */
77extern cpu_t *cpus;
78
79/** maximum number of strands per a physical core detected */
80unsigned int max_core_strands = 0;
81
82#ifdef CONFIG_SIMICS_SMP_HACK
83/**
84 * Copies a piece of HelenOS code to the place where OBP had its IPI handler.
85 * By sending an IPI by the BSP to the AP the code will be executed.
86 * The code will jump to the first instruction of the kernel. This is
87 * a workaround how to make APs execute HelenOS code on Simics.
88 */
89static void simics_smp_hack_init(void) {
90 asm volatile (
91 "setx temp_cpu_mondo_handler, %g4, %g6 \n"
92 "setx 0x80200f80, %g4, %g7 \n"
93
94 "ldx [%g6], %g4 \n"
95 "stxa %g4, [%g7] 0x14 \n"
96 "membar #Sync \n"
97
98 "add %g7, 0x8, %g7 \n"
99 "ldx [%g6 + 0x8], %g4 \n"
100 "stxa %g4, [%g7] 0x14 \n"
101 "membar #Sync \n"
102
103 "add %g7, 0x8, %g7 \n"
104 "ldx [%g6 + 0x10], %g4 \n"
105 "stxa %g4, [%g7] 0x14 \n"
106 "membar #Sync \n"
107
108 "add %g7, 0x8, %g7 \n"
109 "ldx [%g6 + 0x18], %g4 \n"
110 "stxa %g4, [%g7] 0x14 \n"
111 "membar #Sync \n"
112
113 "add %g7, 0x8, %g7 \n"
114 "ldx [%g6 + 0x20], %g4 \n"
115 "stxa %g4, [%g7] 0x14 \n"
116 "membar #Sync \n"
117
118 "add %g7, 0x8, %g7 \n"
119 "ldx [%g6 + 0x28], %g4 \n"
120 "stxa %g4, [%g7] 0x14 \n"
121 "membar #Sync \n"
122
123 "add %g7, 0x8, %g7 \n"
124 "ldx [%g6 + 0x30], %g4 \n"
125 "stxa %g4, [%g7] 0x14 \n"
126 "membar #Sync \n"
127
128 "add %g7, 0x8, %g7 \n"
129 "ldx [%g6 + 0x38], %g4 \n"
130 "stxa %g4, [%g7] 0x14 \n"
131 "membar #Sync \n"
132
133 "add %g7, 0x8, %g7 \n"
134 "ldx [%g6 + 0x40], %g4 \n"
135 "stxa %g4, [%g7] 0x14 \n"
136 "membar #Sync \n"
137
138 "flush %i7"
139
140 );
141}
142#endif
143
144#if 0
145/**
146 * Proposes the optimal number of ready threads for each virtual processor
147 * in the given processor core so that the processor core is as busy as the
148 * average processor core. The proposed number of ready threads will be
149 * stored to the proposed_nrdy variable of the cpu_arch_t struture.
150 */
151bool calculate_optimal_nrdy(exec_unit_t *exec_unit) {
152
153 /* calculate the number of threads the core will steal */
154 int avg = atomic_get(&nrdy) / exec_unit_count;
155 int to_steal = avg - atomic_get(&(exec_units->nrdy));
156 if (to_steal < 0) {
157 return true;
158 } else if (to_steal == 0) {
159 return false;
160 }
161
162 /* initialize the proposals with the real numbers of ready threads */
163 unsigned int k;
164 for (k = 0; k < exec_unit->strand_count; k++) {
165 exec_units->cpus[k]->arch.proposed_nrdy =
166 atomic_get(&(exec_unit->cpus[k]->nrdy));
167 }
168
169 /* distribute the threads to be stolen to the core's CPUs */
170 int j;
171 for (j = to_steal; j > 0; j--) {
172 unsigned int k;
173 unsigned int least_busy = 0;
174 unsigned int least_busy_nrdy =
175 exec_unit->cpus[0]->arch.proposed_nrdy;
176
177 /* for each stolen thread, give it to the least busy CPU */
178 for (k = 0; k < exec_unit->strand_count; k++) {
179 if (exec_unit->cpus[k]->arch.proposed_nrdy
180 < least_busy_nrdy) {
181 least_busy = k;
182 least_busy_nrdy =
183 exec_unit->cpus[k]->arch.proposed_nrdy;
184 }
185 }
186 exec_unit->cpus[least_busy]->arch.proposed_nrdy++;
187 }
188
189 return false;
190}
191#endif
192
193/**
194 * Finds out which execution units belong to particular CPUs. By execution unit
195 * we mean the physical core the logical processor is backed by. Since each
196 * Niagara physical core has just one integer execution unit and we will
197 * ignore other execution units than the integer ones, we will use the terms
198 * "integer execution unit", "execution unit" and "physical core"
199 * interchangeably.
200 *
201 * The physical cores are detected by browsing the children of the CPU node
202 * in the machine description and looking for a node representing an integer
203 * execution unit. Once the integer execution unit of a particular CPU is
204 * known, the ID of the CPU is added to the list of cpuids of the corresponding
205 * execution unit structure (exec_unit_t). If an execution unit is encountered
206 * for the first time, a new execution unit structure (exec_unit_t) must be
207 * created first and added to the execution units array (exec_units).
208 *
209 * If the function fails to find an execution unit for a CPU (this may happen
210 * on machines with older firmware or on Simics), it performs a fallback code
211 * which pretends there exists just one execution unit and all CPUs belong to
212 * it.
213 *
214 * Finally, the array of all execution units is reordered such that its element
215 * which represents the physical core of the the bootstrap CPU is at index 0.
216 * Moreover, the array of CPU IDs within the BSP's physical core structure is
217 * reordered such that the element which represents the ID of the BSP is at
218 * index 0. This is done because we would like the CPUs to be woken up
219 * such that the 0-index CPU of the 0-index execution unit is
220 * woken up first. And since the BSP is already woken up, we would like it to be
221 * at 0-th position of the 0-th execution unit structure.
222 *
223 * Apart from that, the code also counts the total number of CPUs and stores
224 * it to the global config.cpu_count variable.
225 */
226static void detect_execution_units(void)
227{
228 /* ID of the bootstrap processor */
229 uint64_t myid;
230
231 /* total number of CPUs detected */
232 size_t cpu_count = 0;
233
234 /* will be set to 1 if detecting the physical cores fails */
235 bool exec_unit_assign_error = 0;
236
237 /* index of the bootstrap physical core in the array of cores */
238 unsigned int bsp_exec_unit_index = 0;
239
240 /* index of the BSP ID inside the array of bootstrap core's cpuids */
241 unsigned int bsp_core_strand_index = 0;
242
243 __hypercall_fast_ret1(0, 0, 0, 0, 0, CPU_MYID, &myid);
244 md_node_t node = md_get_root();
245
246 /* walk through all the CPU nodes in the MD*/
247 while (md_next_node(&node, "cpu")) {
248
249 uint64_t cpuid;
250 md_get_integer_property(node, "id", &cpuid);
251 cpu_count++;
252
253 /*
254 * if failed in previous CPUs, don't try
255 * to detect physical cores any more
256 */
257 if (exec_unit_assign_error)
258 continue;
259
260 /* detect exec. unit for the CPU represented by current node */
261 uint64_t exec_unit_id = 0;
262 md_child_iter_t it = md_get_child_iterator(node);
263
264 while (md_next_child(&it)) {
265 md_node_t child = md_get_child_node(it);
266 const char *exec_unit_type;
267 md_get_string_property(child, "type", &exec_unit_type);
268
269 /* each physical core has just 1 integer exec. unit */
270 if (str_cmp(exec_unit_type, "integer") == 0) {
271 exec_unit_id = child;
272 break;
273 }
274 }
275
276 /* execution unit detected successfully */
277 if (exec_unit_id != 0) {
278
279 /* find the exec. unit in array of existing units */
280 unsigned int i = 0;
281 for (i = 0; i < exec_unit_count; i++) {
282 if (exec_units[i].exec_unit_id == exec_unit_id)
283 break;
284 }
285
286 /*
287 * execution unit just met has not been met before, so
288 * create a new entry in array of all execution units
289 */
290 if (i == exec_unit_count) {
291 exec_units[i].exec_unit_id = exec_unit_id;
292 exec_units[i].strand_count = 0;
293 atomic_set(&(exec_units[i].nrdy), 0);
294 spinlock_initialize(&(exec_units[i].proposed_nrdy_lock), "exec_units[].proposed_nrdy_lock");
295 exec_unit_count++;
296 }
297
298 /*
299 * remember the exec. unit and strand of the BSP
300 */
301 if (cpuid == myid) {
302 bsp_exec_unit_index = i;
303 bsp_core_strand_index = exec_units[i].strand_count;
304 }
305
306 /* add the CPU just met to the exec. unit's list */
307 exec_units[i].cpuids[exec_units[i].strand_count] = cpuid;
308 exec_units[i].strand_count++;
309 max_core_strands =
310 exec_units[i].strand_count > max_core_strands ?
311 exec_units[i].strand_count : max_core_strands;
312
313 /* detecting execution unit failed */
314 } else {
315 exec_unit_assign_error = 1;
316 }
317 }
318
319 /* save the number of CPUs to a globally accessible variable */
320 config.cpu_count = cpu_count;
321
322 /*
323 * A fallback code which will be executed if finding out which
324 * execution units belong to particular CPUs fails. Pretend there
325 * exists just one execution unit and all CPUs belong to it.
326 */
327 if (exec_unit_assign_error) {
328 bsp_exec_unit_index = 0;
329 exec_unit_count = 1;
330 exec_units[0].strand_count = cpu_count;
331 exec_units[0].exec_unit_id = 1;
332 spinlock_initialize(&(exec_units[0].proposed_nrdy_lock), "exec_units[0].proposed_nrdy_lock");
333 atomic_set(&(exec_units[0].nrdy), 0);
334 max_core_strands = cpu_count;
335
336 /* browse CPUs again, assign them the fictional exec. unit */
337 node = md_get_root();
338 unsigned int i = 0;
339
340 while (md_next_node(&node, "cpu")) {
341 uint64_t cpuid;
342 md_get_integer_property(node, "id", &cpuid);
343 if (cpuid == myid) {
344 bsp_core_strand_index = i;
345 }
346 exec_units[0].cpuids[i++] = cpuid;
347 }
348 }
349
350 /*
351 * Reorder the execution units array elements and the cpuid array
352 * elements so that the BSP will always be the very first CPU of
353 * the very first execution unit.
354 */
355 exec_unit_t temp_exec_unit = exec_units[0];
356 exec_units[0] = exec_units[bsp_exec_unit_index];
357 exec_units[bsp_exec_unit_index] = temp_exec_unit;
358
359 uint64_t temp_cpuid = exec_units[0].cpuids[0];
360 exec_units[0].cpuids[0] = exec_units[0].cpuids[bsp_exec_unit_index];
361 exec_units[0].cpuids[bsp_core_strand_index] = temp_cpuid;
362
363}
364
365/**
366 * Determine number of processors and detect physical cores. On Simics
367 * copy the code which will be executed by the AP when the BSP sends an
368 * IPI to it in order to make it execute HelenOS code.
369 */
370void smp_init(void)
371{
372 detect_execution_units();
373#ifdef CONFIG_SIMICS_SMP_HACK
374 simics_smp_hack_init();
375#endif
376}
377
378/**
379 * For each CPU sets the value of cpus[i].arch.id, where i is the
380 * index of the CPU in the cpus variable, to the cpuid of the i-th processor
381 * to be run. The CPUs are run such that the CPU represented by cpus[0]
382 * is run first, cpus[1] is run after it, and cpus[cpu_count - 1] is run as the
383 * last one.
384 *
385 * The CPU IDs are set such that during waking the CPUs up the
386 * processor cores will be alternated, i.e. first one CPU from the first core
387 * will be run, after that one CPU from the second CPU core will be run,...
388 * then one CPU from the last core will be run, after that another CPU
389 * from the first core will be run, then another CPU from the second core
390 * will be run,... then another CPU from the last core will be run, and so on.
391 */
392static void init_cpuids(void)
393{
394 unsigned int cur_core_strand;
395 unsigned int cur_core;
396 unsigned int cur_cpu = 0;
397
398 for (cur_core_strand = 0; cur_core_strand < max_core_strands; cur_core_strand++) {
399 for (cur_core = 0; cur_core < exec_unit_count; cur_core++) {
400 if (cur_core_strand > exec_units[cur_core].strand_count)
401 continue;
402
403 cpus[cur_cpu].arch.exec_unit = &(exec_units[cur_core]);
404 atomic_add(&(exec_units[cur_core].nrdy), atomic_get(&(cpus[cur_cpu].nrdy)));
405 cpus[cur_cpu].arch.id = exec_units[cur_core].cpuids[cur_core_strand];
406 exec_units[cur_core].cpus[cur_core_strand] = &(cpus[cur_cpu]);
407 cur_cpu++;
408 }
409 }
410}
411
412/**
413 * Wakes up a single CPU.
414 *
415 * @param cpuid ID of the CPU to be woken up
416 */
417static bool wake_cpu(uint64_t cpuid)
418{
419#ifdef CONFIG_SIMICS_SMP_HACK
420 ipi_unicast_to((void (*)(void)) 1234, cpuid);
421#else
422 /* stop the CPU before making it execute our code */
423 if (__hypercall_fast1(CPU_STOP, cpuid) != EOK)
424 return false;
425
426 /* wait for the CPU to stop */
427 uint64_t state;
428 __hypercall_fast_ret1(cpuid, 0, 0, 0, 0, CPU_STATE, &state);
429 while (state == CPU_STATE_RUNNING)
430 __hypercall_fast_ret1(cpuid, 0, 0, 0, 0, CPU_STATE, &state);
431
432 /* make the CPU run again and execute HelenOS code */
433 if (__hypercall_fast4(CPU_START, cpuid,
434 (uint64_t) KA2PA(kernel_image_start), KA2PA(trap_table),
435 physmem_base) != EOK)
436 return false;
437#endif
438
439 if (waitq_sleep_timeout(&ap_completion_wq, 10000000, SYNCH_FLAGS_NONE) ==
440 ESYNCH_TIMEOUT)
441 printf("%s: waiting for processor (cpuid = %" PRIu64 ") timed out\n",
442 __func__, cpuid);
443
444 return true;
445}
446
447/** Wake application processors up. */
448void kmp(void *arg)
449{
450 init_cpuids();
451
452 unsigned int i;
453
454 for (i = 1; i < config.cpu_count; i++) {
455 wake_cpu(cpus[i].arch.id);
456 }
457}
458
459/** @}
460 */
Note: See TracBrowser for help on using the repository browser.