source: mainline/kernel/arch/arm32/src/cpu/cpu.c@ 8bb0af7f

Last change on this file since 8bb0af7f was 8bb0af7f, checked in by Maurizio Lombardi <mlombard@…>, 4 years ago

arm32: performance boost on raspberry pi

  • enable the icache and branch prediction for ARMv6
  • flush the branch predictor after writing to the TTBR0 register

Signed-off-by: Maurizio Lombardi <mlombard@…>

  • Property mode set to 100644
File size: 10.5 KB
Line 
1/*
2 * Copyright (c) 2007 Michal Kebrt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup kernel_arm32
30 * @{
31 */
32/** @file
33 * @brief CPU identification.
34 */
35
36#include <arch/cache.h>
37#include <arch/cpu.h>
38#include <arch/cp15.h>
39#include <cpu.h>
40#include <arch.h>
41#include <stdio.h>
42
43#ifdef CONFIG_FPU
44#include <arch/fpu_context.h>
45#endif
46
47static inline unsigned log2(unsigned val)
48{
49 unsigned log = 0;
50 --val;
51 while (val) {
52 ++log;
53 val >>= 1;
54 }
55 return log;
56}
57
58static unsigned dcache_ways(unsigned level);
59static unsigned dcache_sets(unsigned level);
60static unsigned dcache_linesize_log(unsigned level);
61
62/** Implementers (vendor) names */
63static const char *implementer(unsigned id)
64{
65 switch (id) {
66 case 0x41:
67 return "ARM Limited";
68 case 0x44:
69 return "Digital Equipment Corporation";
70 case 0x4d:
71 return "Motorola, Freescale Semiconductor Inc.";
72 case 0x51:
73 return "Qualcomm Inc.";
74 case 0x56:
75 return "Marvell Semiconductor Inc.";
76 case 0x69:
77 return "Intel Corporation";
78 }
79 return "Unknown implementer";
80}
81
82/** Architecture names */
83static const char *architecture_string(cpu_arch_t *arch)
84{
85 static const char *arch_data[] = {
86 "ARM", /* 0x0 */
87 "ARMv4", /* 0x1 */
88 "ARMv4T", /* 0x2 */
89 "ARMv5", /* 0x3 */
90 "ARMv5T", /* 0x4 */
91 "ARMv5TE", /* 0x5 */
92 "ARMv5TEJ", /* 0x6 */
93 "ARMv6" /* 0x7 */
94 };
95 if (arch->arch_num < (sizeof(arch_data) / sizeof(arch_data[0])))
96 return arch_data[arch->arch_num];
97 else
98 return arch_data[0];
99}
100
101/** Retrieves processor identification from CP15 register 0.
102 *
103 * @param cpu Structure for storing CPU identification.
104 * See page B4-1630 of ARM Architecture Reference Manual.
105 */
106static void arch_cpu_identify(cpu_arch_t *cpu)
107{
108 const uint32_t ident = MIDR_read();
109
110 cpu->imp_num = (ident >> MIDR_IMPLEMENTER_SHIFT) & MIDR_IMPLEMENTER_MASK;
111 cpu->variant_num = (ident >> MIDR_VARIANT_SHIFT) & MIDR_VARIANT_MASK;
112 cpu->arch_num = (ident >> MIDR_ARCHITECTURE_SHIFT) & MIDR_ARCHITECTURE_MASK;
113 cpu->prim_part_num = (ident >> MIDR_PART_NUMBER_SHIFT) & MIDR_PART_NUMBER_MASK;
114 cpu->rev_num = (ident >> MIDR_REVISION_SHIFT) & MIDR_REVISION_MASK;
115
116 // TODO CPUs with arch_num == 0xf use CPUID scheme for identification
117 cpu->dcache_levels = dcache_levels();
118
119 for (unsigned i = 0; i < cpu->dcache_levels; ++i) {
120 cpu->dcache[i].ways = dcache_ways(i);
121 cpu->dcache[i].sets = dcache_sets(i);
122 cpu->dcache[i].way_shift = 31 - log2(cpu->dcache[i].ways);
123 cpu->dcache[i].set_shift = dcache_linesize_log(i);
124 cpu->dcache[i].line_size = 1 << dcache_linesize_log(i);
125 printf("Found DCache L%u: %u-way, %u sets, %u byte lines "
126 "(shifts: w%u, s%u)\n", i + 1, cpu->dcache[i].ways,
127 cpu->dcache[i].sets, cpu->dcache[i].line_size,
128 cpu->dcache[i].way_shift, cpu->dcache[i].set_shift);
129 }
130}
131
132/** Enables unaligned access and caching for armv6+ */
133void cpu_arch_init(void)
134{
135 uint32_t control_reg = SCTLR_read();
136
137 dcache_invalidate();
138 read_barrier();
139
140 /* Turn off tex remap, RAZ/WI prior to armv7 */
141 control_reg &= ~SCTLR_TEX_REMAP_EN_FLAG;
142 /* Turn off accessed flag, RAZ/WI prior to armv7 */
143 control_reg &= ~(SCTLR_ACCESS_FLAG_EN_FLAG | SCTLR_HW_ACCESS_FLAG_EN_FLAG);
144
145 /* Unaligned access is supported on armv6+ */
146#if defined(PROCESSOR_ARCH_armv7_a) | defined(PROCESSOR_ARCH_armv6)
147 /*
148 * Enable unaligned access, RAZ/WI prior to armv6
149 * switchable on armv6, RAO/WI writes on armv7,
150 * see ARM Architecture Reference Manual ARMv7-A and ARMv7-R edition
151 * L.3.1 (p. 2456)
152 */
153 control_reg |= SCTLR_UNALIGNED_EN_FLAG;
154 /*
155 * Disable alignment checks, this turns unaligned access to undefined,
156 * unless U bit is set.
157 */
158 control_reg &= ~SCTLR_ALIGN_CHECK_EN_FLAG;
159 /*
160 * Enable caching, On arm prior to armv7 there is only one level
161 * of caches. Data cache is coherent.
162 * "This means that the behavior of accesses from the same observer to
163 * different VAs, that are translated to the same PA
164 * with the same memory attributes, is fully coherent."
165 * ARM Architecture Reference Manual ARMv7-A and ARMv7-R Edition
166 * B3.11.1 (p. 1383)
167 * We are safe to turn this on. For arm v6 see ch L.6.2 (p. 2469)
168 * L2 Cache for armv7 is enabled by default (i.e. controlled by
169 * this flag).
170 */
171 control_reg |= SCTLR_CACHE_EN_FLAG;
172 /*
173 * ICache coherency is elaborated on in barrier.h.
174 * VIPT and PIPT caches need maintenance only on code modify,
175 * so it should be safe for general use.
176 * Enable branch predictors too as they follow the same rules
177 * as ICache and they can be flushed together
178 */
179 if ((CTR_read() & CTR_L1I_POLICY_MASK) != CTR_L1I_POLICY_AIVIVT) {
180 control_reg |=
181 SCTLR_INST_CACHE_EN_FLAG | SCTLR_BRANCH_PREDICT_EN_FLAG;
182 } else {
183 control_reg &=
184 ~(SCTLR_INST_CACHE_EN_FLAG | SCTLR_BRANCH_PREDICT_EN_FLAG);
185 }
186#endif
187 SCTLR_write(control_reg);
188
189#ifdef CONFIG_FPU
190 fpu_setup();
191#endif
192
193#ifdef PROCESSOR_ARCH_armv7_a
194 if ((ID_PFR1_read() & ID_PFR1_GEN_TIMER_EXT_MASK) !=
195 ID_PFR1_GEN_TIMER_EXT) {
196 PMCR_write(PMCR_read() | PMCR_E_FLAG | PMCR_D_FLAG);
197 PMCNTENSET_write(PMCNTENSET_CYCLE_COUNTER_EN_FLAG);
198 }
199#endif
200}
201
202/** Retrieves processor identification and stores it to #CPU.arch */
203void cpu_identify(void)
204{
205 arch_cpu_identify(&CPU->arch);
206}
207
208/** Prints CPU identification. */
209void cpu_print_report(cpu_t *m)
210{
211 printf("cpu%d: vendor=%s, architecture=%s, part number=%x, "
212 "variant=%x, revision=%x\n",
213 m->id, implementer(m->arch.imp_num),
214 architecture_string(&m->arch), m->arch.prim_part_num,
215 m->arch.variant_num, m->arch.rev_num);
216}
217
218/** See chapter B4.1.19 of ARM Architecture Reference Manual */
219static unsigned dcache_linesize_log(unsigned level)
220{
221#ifdef PROCESSOR_ARCH_armv7_a
222 CSSELR_write((level & CCSELR_LEVEL_MASK) << CCSELR_LEVEL_SHIFT);
223 const uint32_t ccsidr = CCSIDR_read();
224 return CCSIDR_LINESIZE_LOG(ccsidr);
225#endif
226 return 0;
227
228}
229
230/** See chapter B4.1.19 of ARM Architecture Reference Manual */
231static unsigned dcache_ways(unsigned level)
232{
233#ifdef PROCESSOR_ARCH_armv7_a
234 CSSELR_write((level & CCSELR_LEVEL_MASK) << CCSELR_LEVEL_SHIFT);
235 const uint32_t ccsidr = CCSIDR_read();
236 return CCSIDR_WAYS(ccsidr);
237#endif
238 return 0;
239}
240
241/** See chapter B4.1.19 of ARM Architecture Reference Manual */
242static unsigned dcache_sets(unsigned level)
243{
244#ifdef PROCESSOR_ARCH_armv7_a
245 CSSELR_write((level & CCSELR_LEVEL_MASK) << CCSELR_LEVEL_SHIFT);
246 const uint32_t ccsidr = CCSIDR_read();
247 return CCSIDR_SETS(ccsidr);
248#endif
249 return 0;
250}
251
252unsigned dcache_levels(void)
253{
254 unsigned levels = 0;
255#ifdef PROCESSOR_ARCH_armv7_a
256 const uint32_t val = CLIDR_read();
257 for (unsigned i = 0; i < 8; ++i) {
258 const unsigned ctype = CLIDR_CACHE(i, val);
259 switch (ctype) {
260 case CLIDR_DCACHE_ONLY:
261 case CLIDR_SEP_CACHE:
262 case CLIDR_UNI_CACHE:
263 ++levels;
264 default:
265 (void)0;
266 }
267 }
268#endif
269 return levels;
270}
271
272static void dcache_clean_manual(unsigned level, bool invalidate,
273 unsigned ways, unsigned sets, unsigned way_shift, unsigned set_shift)
274{
275
276 for (unsigned i = 0; i < ways; ++i) {
277 for (unsigned j = 0; j < sets; ++j) {
278 const uint32_t val =
279 ((level & 0x7) << 1) |
280 (j << set_shift) | (i << way_shift);
281 if (invalidate)
282 DCCISW_write(val);
283 else
284 DCCSW_write(val);
285 }
286 }
287}
288
289void dcache_flush(void)
290{
291 /* See ARM Architecture Reference Manual ch. B4.2.1 p. B4-1724 */
292 const unsigned levels = dcache_levels();
293 for (unsigned i = 0; i < levels; ++i) {
294 const unsigned ways = dcache_ways(i);
295 const unsigned sets = dcache_sets(i);
296 const unsigned way_shift = 32 - log2(ways);
297 const unsigned set_shift = dcache_linesize_log(i);
298 dcache_clean_manual(i, false, ways, sets, way_shift, set_shift);
299 }
300}
301
302void dcache_flush_invalidate(void)
303{
304 /* See ARM Architecture Reference Manual ch. B4.2.1 p. B4-1724 */
305 const unsigned levels = dcache_levels();
306 for (unsigned i = 0; i < levels; ++i) {
307 const unsigned ways = dcache_ways(i);
308 const unsigned sets = dcache_sets(i);
309 const unsigned way_shift = 32 - log2(ways);
310 const unsigned set_shift = dcache_linesize_log(i);
311 dcache_clean_manual(i, true, ways, sets, way_shift, set_shift);
312 }
313}
314
315void cpu_dcache_flush(void)
316{
317 for (unsigned i = 0; i < CPU->arch.dcache_levels; ++i)
318 dcache_clean_manual(i, false,
319 CPU->arch.dcache[i].ways, CPU->arch.dcache[i].sets,
320 CPU->arch.dcache[i].way_shift, CPU->arch.dcache[i].set_shift);
321}
322
323void cpu_dcache_flush_invalidate(void)
324{
325 const unsigned levels = dcache_levels();
326 for (unsigned i = 0; i < levels; ++i)
327 dcache_clean_manual(i, true,
328 CPU->arch.dcache[i].ways, CPU->arch.dcache[i].sets,
329 CPU->arch.dcache[i].way_shift, CPU->arch.dcache[i].set_shift);
330}
331
332void icache_invalidate(void)
333{
334#if defined(PROCESSOR_ARCH_armv7_a)
335 ICIALLU_write(0);
336#else
337 ICIALL_write(0);
338#endif
339}
340
341#if !defined(PROCESSOR_ARCH_armv7_a)
342static bool cache_is_unified(void)
343{
344 if (MIDR_read() != CTR_read()) {
345 /* We have the CTR register */
346 return (CTR_read() & CTR_SEP_FLAG) != CTR_SEP_FLAG;
347 } else {
348 panic("Unknown cache type");
349 }
350}
351#endif
352
353void dcache_invalidate(void)
354{
355#if defined(PROCESSOR_ARCH_armv7_a)
356 dcache_flush_invalidate();
357#else
358 if (cache_is_unified())
359 CIALL_write(0);
360 else
361 DCIALL_write(0);
362#endif
363}
364
365void dcache_clean_mva_pou(uintptr_t mva)
366{
367#if defined(PROCESSOR_ARCH_armv7_a)
368 DCCMVAU_write(mva);
369#else
370 if (cache_is_unified())
371 CCMVA_write(mva);
372 else
373 DCCMVA_write(mva);
374#endif
375}
376
377/** @}
378 */
Note: See TracBrowser for help on using the repository browser.