source: mainline/kernel/arch/sparc64/src/smp/sun4v/smp.c

Last change on this file was 7c5320c, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 2 years ago

Use the semaphore interface instead of waitq in some places

Since we already have an underused semaphore API in the kernel,
it would be worthwhile to use it in places where the baseline
semaphore semantics are needed. It makes the function of the
calls obvious even to people unfamiliar with the details of
waitq API.

  • Property mode set to 100644
File size: 12.5 KB
Line 
1/*
2 * Copyright (c) 2006 Jakub Jermar
3 * Copyright (c) 2009 Pavel Rimsky
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup kernel_sparc64
31 * @{
32 */
33/** @file
34 */
35
36#include <smp/smp.h>
37#include <smp/ipi.h>
38#include <genarch/ofw/ofw_tree.h>
39#include <cpu.h>
40#include <arch/cpu.h>
41#include <arch/boot/boot.h>
42#include <arch.h>
43#include <config.h>
44#include <macros.h>
45#include <halt.h>
46#include <stdbool.h>
47#include <stddef.h>
48#include <stdint.h>
49#include <synch/waitq.h>
50#include <stdio.h>
51#include <arch/sun4v/hypercall.h>
52#include <arch/sun4v/md.h>
53#include <arch/sun4v/ipi.h>
54#include <time/delay.h>
55#include <arch/smp/sun4v/smp.h>
56#include <str.h>
57#include <errno.h>
58
59/** hypervisor code of the "running" state of the CPU */
60#define CPU_STATE_RUNNING 2
61
62/** maximum possible number of processor cores */
63#define MAX_NUM_CORES 8
64
65/** needed in the CPU_START hypercall */
66extern void kernel_image_start(void);
67
68/** needed in the CPU_START hypercall */
69extern void *trap_table;
70
71/** number of execution units detected */
72uint8_t exec_unit_count = 0;
73
74/** execution units (processor cores) */
75exec_unit_t exec_units[MAX_NUM_CORES];
76
77/** CPU structures */
78extern cpu_t *cpus;
79
80/** maximum number of strands per a physical core detected */
81unsigned int max_core_strands = 0;
82
83#if 0
84/**
85 * Proposes the optimal number of ready threads for each virtual processor
86 * in the given processor core so that the processor core is as busy as the
87 * average processor core. The proposed number of ready threads will be
88 * stored to the proposed_nrdy variable of the cpu_arch_t struture.
89 */
90bool calculate_optimal_nrdy(exec_unit_t *exec_unit)
91{
92
93 /* calculate the number of threads the core will steal */
94 int avg = atomic_load(&nrdy) / exec_unit_count;
95 int to_steal = avg - atomic_load(&(exec_units->nrdy));
96 if (to_steal < 0) {
97 return true;
98 } else if (to_steal == 0) {
99 return false;
100 }
101
102 /* initialize the proposals with the real numbers of ready threads */
103 unsigned int k;
104 for (k = 0; k < exec_unit->strand_count; k++) {
105 exec_units->cpus[k]->arch.proposed_nrdy =
106 atomic_load(&(exec_unit->cpus[k]->nrdy));
107 }
108
109 /* distribute the threads to be stolen to the core's CPUs */
110 int j;
111 for (j = to_steal; j > 0; j--) {
112 unsigned int k;
113 unsigned int least_busy = 0;
114 unsigned int least_busy_nrdy =
115 exec_unit->cpus[0]->arch.proposed_nrdy;
116
117 /* for each stolen thread, give it to the least busy CPU */
118 for (k = 0; k < exec_unit->strand_count; k++) {
119 if (exec_unit->cpus[k]->arch.proposed_nrdy < least_busy_nrdy) {
120 least_busy = k;
121 least_busy_nrdy =
122 exec_unit->cpus[k]->arch.proposed_nrdy;
123 }
124 }
125 exec_unit->cpus[least_busy]->arch.proposed_nrdy++;
126 }
127
128 return false;
129}
130#endif
131
132/**
133 * Finds out which execution units belong to particular CPUs. By execution unit
134 * we mean the physical core the logical processor is backed by. Since each
135 * Niagara physical core has just one integer execution unit and we will
136 * ignore other execution units than the integer ones, we will use the terms
137 * "integer execution unit", "execution unit" and "physical core"
138 * interchangeably.
139 *
140 * The physical cores are detected by browsing the children of the CPU node
141 * in the machine description and looking for a node representing an integer
142 * execution unit. Once the integer execution unit of a particular CPU is
143 * known, the ID of the CPU is added to the list of cpuids of the corresponding
144 * execution unit structure (exec_unit_t). If an execution unit is encountered
145 * for the first time, a new execution unit structure (exec_unit_t) must be
146 * created first and added to the execution units array (exec_units).
147 *
148 * If the function fails to find an execution unit for a CPU (this may happen
149 * on machines with older firmware or on Simics), it performs a fallback code
150 * which pretends there exists just one execution unit and all CPUs belong to
151 * it.
152 *
153 * Finally, the array of all execution units is reordered such that its element
154 * which represents the physical core of the the bootstrap CPU is at index 0.
155 * Moreover, the array of CPU IDs within the BSP's physical core structure is
156 * reordered such that the element which represents the ID of the BSP is at
157 * index 0. This is done because we would like the CPUs to be woken up
158 * such that the 0-index CPU of the 0-index execution unit is
159 * woken up first. And since the BSP is already woken up, we would like it to be
160 * at 0-th position of the 0-th execution unit structure.
161 *
162 * Apart from that, the code also counts the total number of CPUs and stores
163 * it to the global config.cpu_count variable.
164 */
165static void detect_execution_units(void)
166{
167 /* ID of the bootstrap processor */
168 uint64_t myid;
169
170 /* total number of CPUs detected */
171 size_t cpu_count = 0;
172
173 /* will be set to 1 if detecting the physical cores fails */
174 bool exec_unit_assign_error = 0;
175
176 /* index of the bootstrap physical core in the array of cores */
177 unsigned int bsp_exec_unit_index = 0;
178
179 /* index of the BSP ID inside the array of bootstrap core's cpuids */
180 unsigned int bsp_core_strand_index = 0;
181
182 __hypercall_fast_ret1(0, 0, 0, 0, 0, CPU_MYID, &myid);
183 md_node_t node = md_get_root();
184
185 /* walk through all the CPU nodes in the MD */
186 while (md_next_node(&node, "cpu")) {
187
188 uint64_t cpuid;
189 md_get_integer_property(node, "id", &cpuid);
190 cpu_count++;
191
192 /*
193 * if failed in previous CPUs, don't try
194 * to detect physical cores any more
195 */
196 if (exec_unit_assign_error)
197 continue;
198
199 /* detect exec. unit for the CPU represented by current node */
200 uint64_t exec_unit_id = 0;
201 md_child_iter_t it = md_get_child_iterator(node);
202
203 while (md_next_child(&it)) {
204 md_node_t child = md_get_child_node(it);
205 const char *exec_unit_type = "";
206 md_get_string_property(child, "type", &exec_unit_type);
207
208 /* each physical core has just 1 integer exec. unit */
209 if (str_cmp(exec_unit_type, "integer") == 0) {
210 exec_unit_id = child;
211 break;
212 }
213 }
214
215 /* execution unit detected successfully */
216 if (exec_unit_id != 0) {
217
218 /* find the exec. unit in array of existing units */
219 unsigned int i = 0;
220 for (i = 0; i < exec_unit_count; i++) {
221 if (exec_units[i].exec_unit_id == exec_unit_id)
222 break;
223 }
224
225 /*
226 * execution unit just met has not been met before, so
227 * create a new entry in array of all execution units
228 */
229 if (i == exec_unit_count) {
230 exec_units[i].exec_unit_id = exec_unit_id;
231 exec_units[i].strand_count = 0;
232 atomic_store(&(exec_units[i].nrdy), 0);
233 spinlock_initialize(&(exec_units[i].proposed_nrdy_lock), "exec_units[].proposed_nrdy_lock");
234 exec_unit_count++;
235 }
236
237 /*
238 * remember the exec. unit and strand of the BSP
239 */
240 if (cpuid == myid) {
241 bsp_exec_unit_index = i;
242 bsp_core_strand_index = exec_units[i].strand_count;
243 }
244
245 /* add the CPU just met to the exec. unit's list */
246 exec_units[i].cpuids[exec_units[i].strand_count] = cpuid;
247 exec_units[i].strand_count++;
248 max_core_strands =
249 exec_units[i].strand_count > max_core_strands ?
250 exec_units[i].strand_count : max_core_strands;
251
252 /* detecting execution unit failed */
253 } else {
254 exec_unit_assign_error = 1;
255 }
256 }
257
258 /* save the number of CPUs to a globally accessible variable */
259 config.cpu_count = cpu_count;
260
261 /*
262 * A fallback code which will be executed if finding out which
263 * execution units belong to particular CPUs fails. Pretend there
264 * exists just one execution unit and all CPUs belong to it.
265 */
266 if (exec_unit_assign_error) {
267 bsp_exec_unit_index = 0;
268 exec_unit_count = 1;
269 exec_units[0].strand_count = cpu_count;
270 exec_units[0].exec_unit_id = 1;
271 spinlock_initialize(&(exec_units[0].proposed_nrdy_lock), "exec_units[0].proposed_nrdy_lock");
272 atomic_store(&(exec_units[0].nrdy), 0);
273 max_core_strands = cpu_count;
274
275 /* browse CPUs again, assign them the fictional exec. unit */
276 node = md_get_root();
277 unsigned int i = 0;
278
279 while (md_next_node(&node, "cpu")) {
280 uint64_t cpuid;
281 md_get_integer_property(node, "id", &cpuid);
282 if (cpuid == myid) {
283 bsp_core_strand_index = i;
284 }
285 exec_units[0].cpuids[i++] = cpuid;
286 }
287 }
288
289 /*
290 * Reorder the execution units array elements and the cpuid array
291 * elements so that the BSP will always be the very first CPU of
292 * the very first execution unit.
293 */
294 exec_unit_t temp_exec_unit = exec_units[0];
295 exec_units[0] = exec_units[bsp_exec_unit_index];
296 exec_units[bsp_exec_unit_index] = temp_exec_unit;
297
298 uint64_t temp_cpuid = exec_units[0].cpuids[0];
299 exec_units[0].cpuids[0] = exec_units[0].cpuids[bsp_exec_unit_index];
300 exec_units[0].cpuids[bsp_core_strand_index] = temp_cpuid;
301
302}
303
304/**
305 * Determine number of processors and detect physical cores. On Simics
306 * copy the code which will be executed by the AP when the BSP sends an
307 * IPI to it in order to make it execute HelenOS code.
308 */
309void smp_init(void)
310{
311 detect_execution_units();
312}
313
314/**
315 * For each CPU sets the value of cpus[i].arch.id, where i is the
316 * index of the CPU in the cpus variable, to the cpuid of the i-th processor
317 * to be run. The CPUs are run such that the CPU represented by cpus[0]
318 * is run first, cpus[1] is run after it, and cpus[cpu_count - 1] is run as the
319 * last one.
320 *
321 * The CPU IDs are set such that during waking the CPUs up the
322 * processor cores will be alternated, i.e. first one CPU from the first core
323 * will be run, after that one CPU from the second CPU core will be run,...
324 * then one CPU from the last core will be run, after that another CPU
325 * from the first core will be run, then another CPU from the second core
326 * will be run,... then another CPU from the last core will be run, and so on.
327 */
328static void init_cpuids(void)
329{
330 unsigned int cur_core_strand;
331 unsigned int cur_core;
332 unsigned int cur_cpu = 0;
333
334 for (cur_core_strand = 0; cur_core_strand < max_core_strands; cur_core_strand++) {
335 for (cur_core = 0; cur_core < exec_unit_count; cur_core++) {
336 if (cur_core_strand > exec_units[cur_core].strand_count)
337 continue;
338
339 cpus[cur_cpu].arch.exec_unit = &(exec_units[cur_core]);
340 atomic_add(&(exec_units[cur_core].nrdy), atomic_load(&(cpus[cur_cpu].nrdy)));
341 cpus[cur_cpu].arch.id = exec_units[cur_core].cpuids[cur_core_strand];
342 exec_units[cur_core].cpus[cur_core_strand] = &(cpus[cur_cpu]);
343 cur_cpu++;
344 }
345 }
346}
347
348/**
349 * Wakes up a single CPU.
350 *
351 * @param cpuid ID of the CPU to be woken up
352 */
353static bool wake_cpu(uint64_t cpuid)
354{
355#ifdef CONFIG_SIMICS_SMP_HACK
356 ipi_unicast_to((void (*)(void)) 1234, cpuid);
357#else
358 /* stop the CPU before making it execute our code */
359 if (__hypercall_fast1(CPU_STOP, cpuid) != EOK)
360 return false;
361
362 /* wait for the CPU to stop */
363 uint64_t state;
364 __hypercall_fast_ret1(cpuid, 0, 0, 0, 0, CPU_STATE, &state);
365 while (state == CPU_STATE_RUNNING)
366 __hypercall_fast_ret1(cpuid, 0, 0, 0, 0, CPU_STATE, &state);
367
368 /* make the CPU run again and execute HelenOS code */
369 if (__hypercall_fast4(CPU_START, cpuid,
370 (uint64_t) KA2PA(kernel_image_start), KA2PA(trap_table),
371 physmem_base) != EOK)
372 return false;
373#endif
374
375 if (semaphore_down_timeout(&ap_completion_semaphore, 10000000) != EOK)
376 printf("%s: waiting for processor (cpuid = %" PRIu64 ") timed out\n",
377 __func__, cpuid);
378
379 return true;
380}
381
382/** Wake application processors up. */
383void kmp(void *arg)
384{
385 init_cpuids();
386
387 unsigned int i;
388
389 for (i = 1; i < config.cpu_count; i++) {
390 wake_cpu(cpus[i].arch.id);
391 }
392}
393
394/** @}
395 */
Note: See TracBrowser for help on using the repository browser.