]>
Commit | Line | Data |
---|---|---|
e761ecdb SG |
1 | /* |
2 | * Copyright (c) 2012 The Chromium OS Authors. | |
3 | * | |
076bb44b BM |
4 | * TSC calibration codes are adapted from Linux kernel |
5 | * arch/x86/kernel/tsc_msr.c and arch/x86/kernel/tsc.c | |
6 | * | |
1a459660 | 7 | * SPDX-License-Identifier: GPL-2.0+ |
e761ecdb SG |
8 | */ |
9 | ||
10 | #include <common.h> | |
4e51fc23 | 11 | #include <dm.h> |
e761ecdb | 12 | #include <malloc.h> |
4e51fc23 | 13 | #include <timer.h> |
0b992e49 | 14 | #include <asm/cpu.h> |
e761ecdb SG |
15 | #include <asm/io.h> |
16 | #include <asm/i8254.h> | |
17 | #include <asm/ibmpc.h> | |
18 | #include <asm/msr.h> | |
19 | #include <asm/u-boot-x86.h> | |
20 | ||
076bb44b BM |
21 | #define MAX_NUM_FREQS 8 |
22 | ||
e761ecdb SG |
23 | DECLARE_GLOBAL_DATA_PTR; |
24 | ||
076bb44b BM |
25 | /* |
26 | * According to Intel 64 and IA-32 System Programming Guide, | |
27 | * if MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be | |
28 | * read in MSR_PLATFORM_ID[12:8], otherwise in MSR_PERF_STAT[44:40]. | |
29 | * Unfortunately some Intel Atom SoCs aren't quite compliant to this, | |
30 | * so we need manually differentiate SoC families. This is what the | |
31 | * field msr_plat does. | |
32 | */ | |
33 | struct freq_desc { | |
34 | u8 x86_family; /* CPU family */ | |
35 | u8 x86_model; /* model */ | |
5c1b685e SG |
36 | /* 2: use 100MHz, 1: use MSR_PLATFORM_INFO, 0: MSR_IA32_PERF_STATUS */ |
37 | u8 msr_plat; | |
076bb44b BM |
38 | u32 freqs[MAX_NUM_FREQS]; |
39 | }; | |
40 | ||
41 | static struct freq_desc freq_desc_tables[] = { | |
42 | /* PNW */ | |
c6367748 | 43 | { 6, 0x27, 0, { 0, 0, 0, 0, 0, 99840, 0, 83200 } }, |
076bb44b | 44 | /* CLV+ */ |
c6367748 BM |
45 | { 6, 0x35, 0, { 0, 133200, 0, 0, 0, 99840, 0, 83200 } }, |
46 | /* TNG - Intel Atom processor Z3400 series */ | |
47 | { 6, 0x4a, 1, { 0, 99840, 133200, 0, 0, 0, 0, 0 } }, | |
48 | /* VLV2 - Intel Atom processor E3000, Z3600, Z3700 series */ | |
49 | { 6, 0x37, 1, { 83200, 99840, 133200, 166400, 0, 0, 0, 0 } }, | |
50 | /* ANN - Intel Atom processor Z3500 series */ | |
51 | { 6, 0x5a, 1, { 83200, 99840, 133200, 99840, 0, 0, 0, 0 } }, | |
5c1b685e SG |
52 | /* Ivybridge */ |
53 | { 6, 0x3a, 2, { 0, 0, 0, 0, 0, 0, 0, 0 } }, | |
076bb44b BM |
54 | }; |
55 | ||
56 | static int match_cpu(u8 family, u8 model) | |
57 | { | |
58 | int i; | |
59 | ||
60 | for (i = 0; i < ARRAY_SIZE(freq_desc_tables); i++) { | |
61 | if ((family == freq_desc_tables[i].x86_family) && | |
62 | (model == freq_desc_tables[i].x86_model)) | |
63 | return i; | |
64 | } | |
65 | ||
66 | return -1; | |
67 | } | |
68 | ||
69 | /* Map CPU reference clock freq ID(0-7) to CPU reference clock freq(KHz) */ | |
70 | #define id_to_freq(cpu_index, freq_id) \ | |
71 | (freq_desc_tables[cpu_index].freqs[freq_id]) | |
72 | ||
73 | /* | |
74 | * Do MSR calibration only for known/supported CPUs. | |
75 | * | |
76 | * Returns the calibration value or 0 if MSR calibration failed. | |
77 | */ | |
3ba6a0f4 | 78 | static unsigned long __maybe_unused try_msr_calibrate_tsc(void) |
076bb44b BM |
79 | { |
80 | u32 lo, hi, ratio, freq_id, freq; | |
81 | unsigned long res; | |
82 | int cpu_index; | |
83 | ||
0b992e49 BM |
84 | if (gd->arch.x86_vendor != X86_VENDOR_INTEL) |
85 | return 0; | |
86 | ||
076bb44b BM |
87 | cpu_index = match_cpu(gd->arch.x86, gd->arch.x86_model); |
88 | if (cpu_index < 0) | |
89 | return 0; | |
90 | ||
91 | if (freq_desc_tables[cpu_index].msr_plat) { | |
92 | rdmsr(MSR_PLATFORM_INFO, lo, hi); | |
d92e9c8d | 93 | ratio = (lo >> 8) & 0xff; |
076bb44b BM |
94 | } else { |
95 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | |
96 | ratio = (hi >> 8) & 0x1f; | |
97 | } | |
98 | debug("Maximum core-clock to bus-clock ratio: 0x%x\n", ratio); | |
99 | ||
5c1b685e SG |
100 | if (freq_desc_tables[cpu_index].msr_plat == 2) { |
101 | /* TODO: Figure out how best to deal with this */ | |
c6367748 | 102 | freq = 99840; |
5c1b685e SG |
103 | debug("Using frequency: %u KHz\n", freq); |
104 | } else { | |
105 | /* Get FSB FREQ ID */ | |
106 | rdmsr(MSR_FSB_FREQ, lo, hi); | |
107 | freq_id = lo & 0x7; | |
108 | freq = id_to_freq(cpu_index, freq_id); | |
109 | debug("Resolved frequency ID: %u, frequency: %u KHz\n", | |
110 | freq_id, freq); | |
111 | } | |
076bb44b BM |
112 | |
113 | /* TSC frequency = maximum resolved freq * maximum resolved bus ratio */ | |
114 | res = freq * ratio / 1000; | |
115 | debug("TSC runs at %lu MHz\n", res); | |
116 | ||
117 | return res; | |
076bb44b BM |
118 | } |
119 | ||
80de0495 BM |
120 | /* |
121 | * This reads the current MSB of the PIT counter, and | |
122 | * checks if we are running on sufficiently fast and | |
123 | * non-virtualized hardware. | |
124 | * | |
125 | * Our expectations are: | |
126 | * | |
127 | * - the PIT is running at roughly 1.19MHz | |
128 | * | |
129 | * - each IO is going to take about 1us on real hardware, | |
130 | * but we allow it to be much faster (by a factor of 10) or | |
131 | * _slightly_ slower (ie we allow up to a 2us read+counter | |
132 | * update - anything else implies a unacceptably slow CPU | |
133 | * or PIT for the fast calibration to work. | |
134 | * | |
135 | * - with 256 PIT ticks to read the value, we have 214us to | |
136 | * see the same MSB (and overhead like doing a single TSC | |
137 | * read per MSB value etc). | |
138 | * | |
139 | * - We're doing 2 reads per loop (LSB, MSB), and we expect | |
140 | * them each to take about a microsecond on real hardware. | |
141 | * So we expect a count value of around 100. But we'll be | |
142 | * generous, and accept anything over 50. | |
143 | * | |
144 | * - if the PIT is stuck, and we see *many* more reads, we | |
145 | * return early (and the next caller of pit_expect_msb() | |
146 | * then consider it a failure when they don't see the | |
147 | * next expected value). | |
148 | * | |
149 | * These expectations mean that we know that we have seen the | |
150 | * transition from one expected value to another with a fairly | |
151 | * high accuracy, and we didn't miss any events. We can thus | |
152 | * use the TSC value at the transitions to calculate a pretty | |
153 | * good value for the TSC frequencty. | |
154 | */ | |
155 | static inline int pit_verify_msb(unsigned char val) | |
156 | { | |
157 | /* Ignore LSB */ | |
158 | inb(0x42); | |
159 | return inb(0x42) == val; | |
160 | } | |
161 | ||
162 | static inline int pit_expect_msb(unsigned char val, u64 *tscp, | |
163 | unsigned long *deltap) | |
164 | { | |
165 | int count; | |
166 | u64 tsc = 0, prev_tsc = 0; | |
167 | ||
168 | for (count = 0; count < 50000; count++) { | |
169 | if (!pit_verify_msb(val)) | |
170 | break; | |
171 | prev_tsc = tsc; | |
172 | tsc = rdtsc(); | |
173 | } | |
174 | *deltap = rdtsc() - prev_tsc; | |
175 | *tscp = tsc; | |
176 | ||
177 | /* | |
178 | * We require _some_ success, but the quality control | |
179 | * will be based on the error terms on the TSC values. | |
180 | */ | |
181 | return count > 5; | |
182 | } | |
183 | ||
184 | /* | |
185 | * How many MSB values do we want to see? We aim for | |
186 | * a maximum error rate of 500ppm (in practice the | |
187 | * real error is much smaller), but refuse to spend | |
188 | * more than 50ms on it. | |
189 | */ | |
190 | #define MAX_QUICK_PIT_MS 50 | |
191 | #define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) | |
192 | ||
3ba6a0f4 | 193 | static unsigned long __maybe_unused quick_pit_calibrate(void) |
80de0495 BM |
194 | { |
195 | int i; | |
196 | u64 tsc, delta; | |
197 | unsigned long d1, d2; | |
198 | ||
199 | /* Set the Gate high, disable speaker */ | |
200 | outb((inb(0x61) & ~0x02) | 0x01, 0x61); | |
201 | ||
202 | /* | |
203 | * Counter 2, mode 0 (one-shot), binary count | |
204 | * | |
205 | * NOTE! Mode 2 decrements by two (and then the | |
206 | * output is flipped each time, giving the same | |
207 | * final output frequency as a decrement-by-one), | |
208 | * so mode 0 is much better when looking at the | |
209 | * individual counts. | |
210 | */ | |
211 | outb(0xb0, 0x43); | |
212 | ||
213 | /* Start at 0xffff */ | |
214 | outb(0xff, 0x42); | |
215 | outb(0xff, 0x42); | |
216 | ||
217 | /* | |
218 | * The PIT starts counting at the next edge, so we | |
219 | * need to delay for a microsecond. The easiest way | |
220 | * to do that is to just read back the 16-bit counter | |
221 | * once from the PIT. | |
222 | */ | |
223 | pit_verify_msb(0); | |
224 | ||
225 | if (pit_expect_msb(0xff, &tsc, &d1)) { | |
226 | for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { | |
227 | if (!pit_expect_msb(0xff-i, &delta, &d2)) | |
228 | break; | |
229 | ||
230 | /* | |
231 | * Iterate until the error is less than 500 ppm | |
232 | */ | |
233 | delta -= tsc; | |
234 | if (d1+d2 >= delta >> 11) | |
235 | continue; | |
236 | ||
237 | /* | |
238 | * Check the PIT one more time to verify that | |
239 | * all TSC reads were stable wrt the PIT. | |
240 | * | |
241 | * This also guarantees serialization of the | |
242 | * last cycle read ('d2') in pit_expect_msb. | |
243 | */ | |
244 | if (!pit_verify_msb(0xfe - i)) | |
245 | break; | |
246 | goto success; | |
247 | } | |
248 | } | |
249 | debug("Fast TSC calibration failed\n"); | |
250 | return 0; | |
251 | ||
252 | success: | |
253 | /* | |
254 | * Ok, if we get here, then we've seen the | |
255 | * MSB of the PIT decrement 'i' times, and the | |
256 | * error has shrunk to less than 500 ppm. | |
257 | * | |
258 | * As a result, we can depend on there not being | |
259 | * any odd delays anywhere, and the TSC reads are | |
260 | * reliable (within the error). | |
261 | * | |
262 | * kHz = ticks / time-in-seconds / 1000; | |
263 | * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000 | |
264 | * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000) | |
265 | */ | |
266 | delta *= PIT_TICK_RATE; | |
267 | delta /= (i*256*1000); | |
268 | debug("Fast TSC calibration using PIT\n"); | |
269 | return delta / 1000; | |
270 | } | |
271 | ||
e761ecdb | 272 | /* Get the speed of the TSC timer in MHz */ |
2f80fc50 | 273 | unsigned notrace long get_tbclk_mhz(void) |
e761ecdb | 274 | { |
4e51fc23 | 275 | return get_tbclk() / 1000000; |
e761ecdb SG |
276 | } |
277 | ||
e761ecdb SG |
278 | static ulong get_ms_timer(void) |
279 | { | |
280 | return (get_ticks() * 1000) / get_tbclk(); | |
281 | } | |
282 | ||
283 | ulong get_timer(ulong base) | |
284 | { | |
285 | return get_ms_timer() - base; | |
286 | } | |
287 | ||
2f80fc50 | 288 | ulong notrace timer_get_us(void) |
e761ecdb SG |
289 | { |
290 | return get_ticks() / get_tbclk_mhz(); | |
291 | } | |
292 | ||
293 | ulong timer_get_boot_us(void) | |
294 | { | |
295 | return timer_get_us(); | |
296 | } | |
297 | ||
298 | void __udelay(unsigned long usec) | |
299 | { | |
300 | u64 now = get_ticks(); | |
301 | u64 stop; | |
302 | ||
303 | stop = now + usec * get_tbclk_mhz(); | |
304 | ||
305 | while ((int64_t)(stop - get_ticks()) > 0) | |
417576c2 MY |
306 | #if defined(CONFIG_QEMU) && defined(CONFIG_SMP) |
307 | /* | |
308 | * Add a 'pause' instruction on qemu target, | |
309 | * to give other VCPUs a chance to run. | |
310 | */ | |
311 | asm volatile("pause"); | |
312 | #else | |
e761ecdb | 313 | ; |
417576c2 | 314 | #endif |
e761ecdb SG |
315 | } |
316 | ||
4e51fc23 BM |
317 | static int tsc_timer_get_count(struct udevice *dev, u64 *count) |
318 | { | |
319 | u64 now_tick = rdtsc(); | |
320 | ||
321 | *count = now_tick - gd->arch.tsc_base; | |
322 | ||
323 | return 0; | |
324 | } | |
325 | ||
326 | static int tsc_timer_probe(struct udevice *dev) | |
327 | { | |
328 | struct timer_dev_priv *uc_priv = dev_get_uclass_priv(dev); | |
329 | ||
330 | gd->arch.tsc_base = rdtsc(); | |
331 | ||
332 | /* | |
333 | * If there is no clock frequency specified in the device tree, | |
334 | * calibrate it by ourselves. | |
335 | */ | |
336 | if (!uc_priv->clock_rate) { | |
337 | unsigned long fast_calibrate; | |
338 | ||
339 | fast_calibrate = try_msr_calibrate_tsc(); | |
340 | if (!fast_calibrate) { | |
341 | fast_calibrate = quick_pit_calibrate(); | |
342 | if (!fast_calibrate) | |
343 | panic("TSC frequency is ZERO"); | |
344 | } | |
345 | ||
346 | uc_priv->clock_rate = fast_calibrate * 1000000; | |
347 | } | |
348 | ||
349 | return 0; | |
350 | } | |
351 | ||
352 | static const struct timer_ops tsc_timer_ops = { | |
353 | .get_count = tsc_timer_get_count, | |
354 | }; | |
355 | ||
356 | static const struct udevice_id tsc_timer_ids[] = { | |
357 | { .compatible = "x86,tsc-timer", }, | |
358 | { } | |
359 | }; | |
360 | ||
361 | U_BOOT_DRIVER(tsc_timer) = { | |
362 | .name = "tsc_timer", | |
363 | .id = UCLASS_TIMER, | |
364 | .of_match = tsc_timer_ids, | |
365 | .probe = tsc_timer_probe, | |
366 | .ops = &tsc_timer_ops, | |
367 | .flags = DM_FLAG_PRE_RELOC, | |
368 | }; |