source: mainline/kernel/arch/arm32/src/cpu/cpu.c@ 8bb0af7f

Last change on this file since 8bb0af7f was 8bb0af7f, checked in by Maurizio Lombardi <mlombard@…>, 4 years ago

arm32: performance boost on raspberry pi

  • enable the icache and branch prediction for ARMv6
  • flush the branch predictor after writing to the TTBR0 register

Signed-off-by: Maurizio Lombardi <mlombard@…>

  • Property mode set to 100644
File size: 10.5 KB
RevLine 
[d630139]1/*
[6b781c0]2 * Copyright (c) 2007 Michal Kebrt
[d630139]3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
[c5429fe]29/** @addtogroup kernel_arm32
[d630139]30 * @{
31 */
32/** @file
[6b781c0]33 * @brief CPU identification.
[d630139]34 */
35
[bad1f53]36#include <arch/cache.h>
[6b781c0]37#include <arch/cpu.h>
[bad1f53]38#include <arch/cp15.h>
[d630139]39#include <cpu.h>
[6b781c0]40#include <arch.h>
[bab75df6]41#include <stdio.h>
[d630139]42
[9048147]43#ifdef CONFIG_FPU
44#include <arch/fpu_context.h>
45#endif
46
[bad1f53]47static inline unsigned log2(unsigned val)
48{
49 unsigned log = 0;
50 --val;
51 while (val) {
52 ++log;
53 val >>= 1;
54 }
55 return log;
56}
57
58static unsigned dcache_ways(unsigned level);
59static unsigned dcache_sets(unsigned level);
60static unsigned dcache_linesize_log(unsigned level);
61
[8ff9484]62/** Implementers (vendor) names */
[1433ecda]63static const char *implementer(unsigned id)
[8ff9484]64{
[9048147]65 switch (id) {
[1433ecda]66 case 0x41:
67 return "ARM Limited";
68 case 0x44:
69 return "Digital Equipment Corporation";
70 case 0x4d:
71 return "Motorola, Freescale Semiconductor Inc.";
72 case 0x51:
73 return "Qualcomm Inc.";
74 case 0x56:
75 return "Marvell Semiconductor Inc.";
76 case 0x69:
77 return "Intel Corporation";
[8ff9484]78 }
79 return "Unknown implementer";
80}
[6b781c0]81
82/** Architecture names */
[1433ecda]83static const char *architecture_string(cpu_arch_t *arch)
[8ff9484]84{
85 static const char *arch_data[] = {
86 "ARM", /* 0x0 */
87 "ARMv4", /* 0x1 */
88 "ARMv4T", /* 0x2 */
89 "ARMv5", /* 0x3 */
90 "ARMv5T", /* 0x4 */
91 "ARMv5TE", /* 0x5 */
92 "ARMv5TEJ", /* 0x6 */
93 "ARMv6" /* 0x7 */
94 };
95 if (arch->arch_num < (sizeof(arch_data) / sizeof(arch_data[0])))
96 return arch_data[arch->arch_num];
97 else
98 return arch_data[0];
99}
[6b781c0]100
101/** Retrieves processor identification from CP15 register 0.
[04cb6957]102 *
[6b781c0]103 * @param cpu Structure for storing CPU identification.
[8ff9484]104 * See page B4-1630 of ARM Architecture Reference Manual.
[6b781c0]105 */
106static void arch_cpu_identify(cpu_arch_t *cpu)
107{
[26e3db2]108 const uint32_t ident = MIDR_read();
109
110 cpu->imp_num = (ident >> MIDR_IMPLEMENTER_SHIFT) & MIDR_IMPLEMENTER_MASK;
111 cpu->variant_num = (ident >> MIDR_VARIANT_SHIFT) & MIDR_VARIANT_MASK;
112 cpu->arch_num = (ident >> MIDR_ARCHITECTURE_SHIFT) & MIDR_ARCHITECTURE_MASK;
113 cpu->prim_part_num = (ident >> MIDR_PART_NUMBER_SHIFT) & MIDR_PART_NUMBER_MASK;
114 cpu->rev_num = (ident >> MIDR_REVISION_SHIFT) & MIDR_REVISION_MASK;
115
[8ff9484]116 // TODO CPUs with arch_num == 0xf use CPUID scheme for identification
[bad1f53]117 cpu->dcache_levels = dcache_levels();
118
119 for (unsigned i = 0; i < cpu->dcache_levels; ++i) {
120 cpu->dcache[i].ways = dcache_ways(i);
121 cpu->dcache[i].sets = dcache_sets(i);
122 cpu->dcache[i].way_shift = 31 - log2(cpu->dcache[i].ways);
123 cpu->dcache[i].set_shift = dcache_linesize_log(i);
124 cpu->dcache[i].line_size = 1 << dcache_linesize_log(i);
125 printf("Found DCache L%u: %u-way, %u sets, %u byte lines "
126 "(shifts: w%u, s%u)\n", i + 1, cpu->dcache[i].ways,
127 cpu->dcache[i].sets, cpu->dcache[i].line_size,
128 cpu->dcache[i].way_shift, cpu->dcache[i].set_shift);
129 }
[6b781c0]130}
131
[8316547f]132/** Enables unaligned access and caching for armv6+ */
[d630139]133void cpu_arch_init(void)
134{
[a03b609]135 uint32_t control_reg = SCTLR_read();
[93d8022]136
137 dcache_invalidate();
138 read_barrier();
139
[2826998]140 /* Turn off tex remap, RAZ/WI prior to armv7 */
[a03b609]141 control_reg &= ~SCTLR_TEX_REMAP_EN_FLAG;
[2826998]142 /* Turn off accessed flag, RAZ/WI prior to armv7 */
[a03b609]143 control_reg &= ~(SCTLR_ACCESS_FLAG_EN_FLAG | SCTLR_HW_ACCESS_FLAG_EN_FLAG);
[46a6a5d]144
145 /* Unaligned access is supported on armv6+ */
146#if defined(PROCESSOR_ARCH_armv7_a) | defined(PROCESSOR_ARCH_armv6)
[7c3fb9b]147 /*
148 * Enable unaligned access, RAZ/WI prior to armv6
[2826998]149 * switchable on armv6, RAO/WI writes on armv7,
[8316547f]150 * see ARM Architecture Reference Manual ARMv7-A and ARMv7-R edition
[7c3fb9b]151 * L.3.1 (p. 2456)
152 */
[a03b609]153 control_reg |= SCTLR_UNALIGNED_EN_FLAG;
[7c3fb9b]154 /*
155 * Disable alignment checks, this turns unaligned access to undefined,
156 * unless U bit is set.
157 */
[a03b609]158 control_reg &= ~SCTLR_ALIGN_CHECK_EN_FLAG;
[7c3fb9b]159 /*
160 * Enable caching, On arm prior to armv7 there is only one level
[8316547f]161 * of caches. Data cache is coherent.
162 * "This means that the behavior of accesses from the same observer to
163 * different VAs, that are translated to the same PA
164 * with the same memory attributes, is fully coherent."
165 * ARM Architecture Reference Manual ARMv7-A and ARMv7-R Edition
166 * B3.11.1 (p. 1383)
[46a6a5d]167 * We are safe to turn this on. For arm v6 see ch L.6.2 (p. 2469)
[7a38962]168 * L2 Cache for armv7 is enabled by default (i.e. controlled by
169 * this flag).
[8316547f]170 */
[a03b609]171 control_reg |= SCTLR_CACHE_EN_FLAG;
[7c3fb9b]172 /*
173 * ICache coherency is elaborated on in barrier.h.
[ae7d03c]174 * VIPT and PIPT caches need maintenance only on code modify,
175 * so it should be safe for general use.
176 * Enable branch predictors too as they follow the same rules
177 * as ICache and they can be flushed together
178 */
[7a38962]179 if ((CTR_read() & CTR_L1I_POLICY_MASK) != CTR_L1I_POLICY_AIVIVT) {
180 control_reg |=
181 SCTLR_INST_CACHE_EN_FLAG | SCTLR_BRANCH_PREDICT_EN_FLAG;
[8ff767b]182 } else {
183 control_reg &=
184 ~(SCTLR_INST_CACHE_EN_FLAG | SCTLR_BRANCH_PREDICT_EN_FLAG);
[7a38962]185 }
[46a6a5d]186#endif
[a03b609]187 SCTLR_write(control_reg);
188
[65871bb]189#ifdef CONFIG_FPU
[36e5eb3]190 fpu_setup();
[65871bb]191#endif
[3fa509b]192
193#ifdef PROCESSOR_ARCH_armv7_a
[c8a5c8c]194 if ((ID_PFR1_read() & ID_PFR1_GEN_TIMER_EXT_MASK) !=
195 ID_PFR1_GEN_TIMER_EXT) {
196 PMCR_write(PMCR_read() | PMCR_E_FLAG | PMCR_D_FLAG);
197 PMCNTENSET_write(PMCNTENSET_CYCLE_COUNTER_EN_FLAG);
198 }
[3fa509b]199#endif
[d630139]200}
201
[6b781c0]202/** Retrieves processor identification and stores it to #CPU.arch */
[f94b95b1]203void cpu_identify(void)
[d630139]204{
[6b781c0]205 arch_cpu_identify(&CPU->arch);
[d630139]206}
207
[6b781c0]208/** Prints CPU identification. */
[d630139]209void cpu_print_report(cpu_t *m)
210{
[8ff9484]211 printf("cpu%d: vendor=%s, architecture=%s, part number=%x, "
[6b781c0]212 "variant=%x, revision=%x\n",
[8ff9484]213 m->id, implementer(m->arch.imp_num),
214 architecture_string(&m->arch), m->arch.prim_part_num,
215 m->arch.variant_num, m->arch.rev_num);
[d630139]216}
217
[bad1f53]218/** See chapter B4.1.19 of ARM Architecture Reference Manual */
219static unsigned dcache_linesize_log(unsigned level)
220{
221#ifdef PROCESSOR_ARCH_armv7_a
222 CSSELR_write((level & CCSELR_LEVEL_MASK) << CCSELR_LEVEL_SHIFT);
[8abcf4e]223 const uint32_t ccsidr = CCSIDR_read();
224 return CCSIDR_LINESIZE_LOG(ccsidr);
[bad1f53]225#endif
226 return 0;
227
228}
229
230/** See chapter B4.1.19 of ARM Architecture Reference Manual */
231static unsigned dcache_ways(unsigned level)
232{
233#ifdef PROCESSOR_ARCH_armv7_a
234 CSSELR_write((level & CCSELR_LEVEL_MASK) << CCSELR_LEVEL_SHIFT);
[8abcf4e]235 const uint32_t ccsidr = CCSIDR_read();
236 return CCSIDR_WAYS(ccsidr);
[bad1f53]237#endif
238 return 0;
239}
240
241/** See chapter B4.1.19 of ARM Architecture Reference Manual */
242static unsigned dcache_sets(unsigned level)
243{
244#ifdef PROCESSOR_ARCH_armv7_a
245 CSSELR_write((level & CCSELR_LEVEL_MASK) << CCSELR_LEVEL_SHIFT);
[8abcf4e]246 const uint32_t ccsidr = CCSIDR_read();
247 return CCSIDR_SETS(ccsidr);
[bad1f53]248#endif
249 return 0;
250}
251
252unsigned dcache_levels(void)
253{
254 unsigned levels = 0;
[4b28c70]255#ifdef PROCESSOR_ARCH_armv7_a
256 const uint32_t val = CLIDR_read();
[34847e2]257 for (unsigned i = 0; i < 8; ++i) {
[bad1f53]258 const unsigned ctype = CLIDR_CACHE(i, val);
259 switch (ctype) {
260 case CLIDR_DCACHE_ONLY:
261 case CLIDR_SEP_CACHE:
262 case CLIDR_UNI_CACHE:
263 ++levels;
264 default:
265 (void)0;
266 }
267 }
[4b28c70]268#endif
[bad1f53]269 return levels;
270}
271
272static void dcache_clean_manual(unsigned level, bool invalidate,
273 unsigned ways, unsigned sets, unsigned way_shift, unsigned set_shift)
274{
275
276 for (unsigned i = 0; i < ways; ++i) {
277 for (unsigned j = 0; j < sets; ++j) {
278 const uint32_t val =
279 ((level & 0x7) << 1) |
280 (j << set_shift) | (i << way_shift);
281 if (invalidate)
282 DCCISW_write(val);
283 else
284 DCCSW_write(val);
285 }
286 }
287}
288
289void dcache_flush(void)
290{
291 /* See ARM Architecture Reference Manual ch. B4.2.1 p. B4-1724 */
292 const unsigned levels = dcache_levels();
293 for (unsigned i = 0; i < levels; ++i) {
294 const unsigned ways = dcache_ways(i);
295 const unsigned sets = dcache_sets(i);
[8abcf4e]296 const unsigned way_shift = 32 - log2(ways);
[bad1f53]297 const unsigned set_shift = dcache_linesize_log(i);
298 dcache_clean_manual(i, false, ways, sets, way_shift, set_shift);
299 }
300}
301
302void dcache_flush_invalidate(void)
303{
304 /* See ARM Architecture Reference Manual ch. B4.2.1 p. B4-1724 */
305 const unsigned levels = dcache_levels();
306 for (unsigned i = 0; i < levels; ++i) {
307 const unsigned ways = dcache_ways(i);
308 const unsigned sets = dcache_sets(i);
[8abcf4e]309 const unsigned way_shift = 32 - log2(ways);
[bad1f53]310 const unsigned set_shift = dcache_linesize_log(i);
311 dcache_clean_manual(i, true, ways, sets, way_shift, set_shift);
312 }
313}
314
315void cpu_dcache_flush(void)
316{
317 for (unsigned i = 0; i < CPU->arch.dcache_levels; ++i)
318 dcache_clean_manual(i, false,
319 CPU->arch.dcache[i].ways, CPU->arch.dcache[i].sets,
320 CPU->arch.dcache[i].way_shift, CPU->arch.dcache[i].set_shift);
321}
322
323void cpu_dcache_flush_invalidate(void)
324{
325 const unsigned levels = dcache_levels();
326 for (unsigned i = 0; i < levels; ++i)
327 dcache_clean_manual(i, true,
328 CPU->arch.dcache[i].ways, CPU->arch.dcache[i].sets,
329 CPU->arch.dcache[i].way_shift, CPU->arch.dcache[i].set_shift);
330}
331
332void icache_invalidate(void)
333{
[d5610b9]334#if defined(PROCESSOR_ARCH_armv7_a)
[bad1f53]335 ICIALLU_write(0);
[d5610b9]336#else
337 ICIALL_write(0);
338#endif
339}
340
341#if !defined(PROCESSOR_ARCH_armv7_a)
342static bool cache_is_unified(void)
343{
344 if (MIDR_read() != CTR_read()) {
345 /* We have the CTR register */
346 return (CTR_read() & CTR_SEP_FLAG) != CTR_SEP_FLAG;
347 } else {
348 panic("Unknown cache type");
349 }
350}
351#endif
352
[93d8022]353void dcache_invalidate(void)
354{
355#if defined(PROCESSOR_ARCH_armv7_a)
356 dcache_flush_invalidate();
357#else
358 if (cache_is_unified())
359 CIALL_write(0);
360 else
361 DCIALL_write(0);
362#endif
363}
364
[d5610b9]365void dcache_clean_mva_pou(uintptr_t mva)
366{
367#if defined(PROCESSOR_ARCH_armv7_a)
368 DCCMVAU_write(mva);
369#else
370 if (cache_is_unified())
371 CCMVA_write(mva);
372 else
373 DCCMVA_write(mva);
374#endif
[bad1f53]375}
376
[d630139]377/** @}
378 */
Note: See TracBrowser for help on using the repository browser.