1 /************************************************************************
2 *
3 * Copyright (c) 2013-2015 Intel Corporation.
4 *
5 * This program and the accompanying materials
6 * are licensed and made available under the terms and conditions of the BSD License
7 * which accompanies this distribution. The full text of the license may be found at
8 * http://opensource.org/licenses/bsd-license.php
9 *
10 * THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
12 *
13 ***************************************************************************/
14
15 #include "mrc.h"
16 #include "memory_options.h"
17
18 #include "meminit_utils.h"
19 #include "hte.h"
20 #include "io.h"
21
22 void select_hte(
23 MRCParams_t *mrc_params);
24
25 static uint8_t first_run = 0;
26
27 const uint8_t vref_codes[64] =
28 { // lowest to highest
29 0x3F, 0x3E, 0x3D, 0x3C, 0x3B, 0x3A, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, // 00 - 15
30 0x2F, 0x2E, 0x2D, 0x2C, 0x2B, 0x2A, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, // 16 - 31
31 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, // 32 - 47
32 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F // 48 - 63
33 };
34
35 #ifdef EMU
36 // Track current post code for debugging purpose
37 uint32_t PostCode;
38 #endif
39
40 // set_rcvn:
41 //
42 // This function will program the RCVEN delays.
43 // (currently doesn't comprehend rank)
set_rcvn(uint8_t channel,uint8_t rank,uint8_t byte_lane,uint32_t pi_count)44 void set_rcvn(
45 uint8_t channel,
46 uint8_t rank,
47 uint8_t byte_lane,
48 uint32_t pi_count)
49 {
50 uint32_t reg;
51 uint32_t msk;
52 uint32_t tempD;
53
54 ENTERFN();
55 DPF(D_TRN, "Rcvn ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);
56
57 // RDPTR (1/2 MCLK, 64 PIs)
58 // BL0 -> B01PTRCTL0[11:08] (0x0-0xF)
59 // BL1 -> B01PTRCTL0[23:20] (0x0-0xF)
60 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
61 msk = (byte_lane & BIT0) ? (BIT23 | BIT22 | BIT21 | BIT20) : (BIT11 | BIT10 | BIT9 | BIT8);
62 tempD = (byte_lane & BIT0) ? ((pi_count / HALF_CLK) << 20) : ((pi_count / HALF_CLK) << 8);
63 isbM32m(DDRPHY, reg, tempD, msk);
64
65 // Adjust PI_COUNT
66 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
67
68 // PI (1/64 MCLK, 1 PIs)
69 // BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)
70 // BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)
71 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
72 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
73 msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24);
74 tempD = pi_count << 24;
75 isbM32m(DDRPHY, reg, tempD, msk);
76
77 // DEADBAND
78 // BL0/1 -> B01DBCTL1[08/11] (+1 select)
79 // BL0/1 -> B01DBCTL1[02/05] (enable)
80 reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
81 msk = 0x00;
82 tempD = 0x00;
83 // enable
84 msk |= (byte_lane & BIT0) ? (BIT5) : (BIT2);
85 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
86 {
87 tempD |= msk;
88 }
89 // select
90 msk |= (byte_lane & BIT0) ? (BIT11) : (BIT8);
91 if (pi_count < EARLY_DB)
92 {
93 tempD |= msk;
94 }
95 isbM32m(DDRPHY, reg, tempD, msk);
96
97 // error check
98 if (pi_count > 0x3F)
99 {
100 training_message(channel, rank, byte_lane);
101 post_code(0xEE, 0xE0);
102 }
103
104 LEAVEFN();
105 return;
106 }
107
108 // get_rcvn:
109 //
110 // This function will return the current RCVEN delay on the given channel, rank, byte_lane as an absolute PI count.
111 // (currently doesn't comprehend rank)
get_rcvn(uint8_t channel,uint8_t rank,uint8_t byte_lane)112 uint32_t get_rcvn(
113 uint8_t channel,
114 uint8_t rank,
115 uint8_t byte_lane)
116 {
117 uint32_t reg;
118 uint32_t tempD;
119 uint32_t pi_count;
120
121 ENTERFN();
122
123 // RDPTR (1/2 MCLK, 64 PIs)
124 // BL0 -> B01PTRCTL0[11:08] (0x0-0xF)
125 // BL1 -> B01PTRCTL0[23:20] (0x0-0xF)
126 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
127 tempD = isbR32m(DDRPHY, reg);
128 tempD >>= (byte_lane & BIT0) ? (20) : (8);
129 tempD &= 0xF;
130
131 // Adjust PI_COUNT
132 pi_count = tempD * HALF_CLK;
133
134 // PI (1/64 MCLK, 1 PIs)
135 // BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)
136 // BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)
137 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
138 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
139 tempD = isbR32m(DDRPHY, reg);
140 tempD >>= 24;
141 tempD &= 0x3F;
142
143 // Adjust PI_COUNT
144 pi_count += tempD;
145
146 LEAVEFN();
147 return pi_count;
148 }
149
150 // set_rdqs:
151 //
152 // This function will program the RDQS delays based on an absolute amount of PIs.
153 // (currently doesn't comprehend rank)
set_rdqs(uint8_t channel,uint8_t rank,uint8_t byte_lane,uint32_t pi_count)154 void set_rdqs(
155 uint8_t channel,
156 uint8_t rank,
157 uint8_t byte_lane,
158 uint32_t pi_count)
159 {
160 uint32_t reg;
161 uint32_t msk;
162 uint32_t tempD;
163
164 ENTERFN();
165 DPF(D_TRN, "Rdqs ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);
166
167 // PI (1/128 MCLK)
168 // BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)
169 // BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)
170 reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE);
171 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
172 msk = (BIT6 | BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0);
173 tempD = pi_count << 0;
174 isbM32m(DDRPHY, reg, tempD, msk);
175
176 // error check (shouldn't go above 0x3F)
177 if (pi_count > 0x47)
178 {
179 training_message(channel, rank, byte_lane);
180 post_code(0xEE, 0xE1);
181 }
182
183 LEAVEFN();
184 return;
185 }
186
187 // get_rdqs:
188 //
189 // This function will return the current RDQS delay on the given channel, rank, byte_lane as an absolute PI count.
190 // (currently doesn't comprehend rank)
get_rdqs(uint8_t channel,uint8_t rank,uint8_t byte_lane)191 uint32_t get_rdqs(
192 uint8_t channel,
193 uint8_t rank,
194 uint8_t byte_lane)
195 {
196 uint32_t reg;
197 uint32_t tempD;
198 uint32_t pi_count;
199
200 ENTERFN();
201
202 // PI (1/128 MCLK)
203 // BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)
204 // BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)
205 reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE);
206 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
207 tempD = isbR32m(DDRPHY, reg);
208
209 // Adjust PI_COUNT
210 pi_count = tempD & 0x7F;
211
212 LEAVEFN();
213 return pi_count;
214 }
215
216 // set_wdqs:
217 //
218 // This function will program the WDQS delays based on an absolute amount of PIs.
219 // (currently doesn't comprehend rank)
set_wdqs(uint8_t channel,uint8_t rank,uint8_t byte_lane,uint32_t pi_count)220 void set_wdqs(
221 uint8_t channel,
222 uint8_t rank,
223 uint8_t byte_lane,
224 uint32_t pi_count)
225 {
226 uint32_t reg;
227 uint32_t msk;
228 uint32_t tempD;
229
230 ENTERFN();
231 DPF(D_TRN, "Wdqs ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);
232
233 // RDPTR (1/2 MCLK, 64 PIs)
234 // BL0 -> B01PTRCTL0[07:04] (0x0-0xF)
235 // BL1 -> B01PTRCTL0[19:16] (0x0-0xF)
236 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
237 msk = (byte_lane & BIT0) ? (BIT19 | BIT18 | BIT17 | BIT16) : (BIT7 | BIT6 | BIT5 | BIT4);
238 tempD = pi_count / HALF_CLK;
239 tempD <<= (byte_lane & BIT0) ? (16) : (4);
240 isbM32m(DDRPHY, reg, tempD, msk);
241
242 // Adjust PI_COUNT
243 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
244
245 // PI (1/64 MCLK, 1 PIs)
246 // BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)
247 // BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)
248 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
249 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
250 msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16);
251 tempD = pi_count << 16;
252 isbM32m(DDRPHY, reg, tempD, msk);
253
254 // DEADBAND
255 // BL0/1 -> B01DBCTL1[07/10] (+1 select)
256 // BL0/1 -> B01DBCTL1[01/04] (enable)
257 reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
258 msk = 0x00;
259 tempD = 0x00;
260 // enable
261 msk |= (byte_lane & BIT0) ? (BIT4) : (BIT1);
262 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
263 {
264 tempD |= msk;
265 }
266 // select
267 msk |= (byte_lane & BIT0) ? (BIT10) : (BIT7);
268 if (pi_count < EARLY_DB)
269 {
270 tempD |= msk;
271 }
272 isbM32m(DDRPHY, reg, tempD, msk);
273
274 // error check
275 if (pi_count > 0x3F)
276 {
277 training_message(channel, rank, byte_lane);
278 post_code(0xEE, 0xE2);
279 }
280
281 LEAVEFN();
282 return;
283 }
284
285 // get_wdqs:
286 //
287 // This function will return the amount of WDQS delay on the given channel, rank, byte_lane as an absolute PI count.
288 // (currently doesn't comprehend rank)
get_wdqs(uint8_t channel,uint8_t rank,uint8_t byte_lane)289 uint32_t get_wdqs(
290 uint8_t channel,
291 uint8_t rank,
292 uint8_t byte_lane)
293 {
294 uint32_t reg;
295 uint32_t tempD;
296 uint32_t pi_count;
297
298 ENTERFN();
299
300 // RDPTR (1/2 MCLK, 64 PIs)
301 // BL0 -> B01PTRCTL0[07:04] (0x0-0xF)
302 // BL1 -> B01PTRCTL0[19:16] (0x0-0xF)
303 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
304 tempD = isbR32m(DDRPHY, reg);
305 tempD >>= (byte_lane & BIT0) ? (16) : (4);
306 tempD &= 0xF;
307
308 // Adjust PI_COUNT
309 pi_count = (tempD * HALF_CLK);
310
311 // PI (1/64 MCLK, 1 PIs)
312 // BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)
313 // BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)
314 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
315 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
316 tempD = isbR32m(DDRPHY, reg);
317 tempD >>= 16;
318 tempD &= 0x3F;
319
320 // Adjust PI_COUNT
321 pi_count += tempD;
322
323 LEAVEFN();
324 return pi_count;
325 }
326
327 // set_wdq:
328 //
329 // This function will program the WDQ delays based on an absolute number of PIs.
330 // (currently doesn't comprehend rank)
set_wdq(uint8_t channel,uint8_t rank,uint8_t byte_lane,uint32_t pi_count)331 void set_wdq(
332 uint8_t channel,
333 uint8_t rank,
334 uint8_t byte_lane,
335 uint32_t pi_count)
336 {
337 uint32_t reg;
338 uint32_t msk;
339 uint32_t tempD;
340
341 ENTERFN();
342 DPF(D_TRN, "Wdq ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);
343
344 // RDPTR (1/2 MCLK, 64 PIs)
345 // BL0 -> B01PTRCTL0[03:00] (0x0-0xF)
346 // BL1 -> B01PTRCTL0[15:12] (0x0-0xF)
347 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
348 msk = (byte_lane & BIT0) ? (BIT15 | BIT14 | BIT13 | BIT12) : (BIT3 | BIT2 | BIT1 | BIT0);
349 tempD = pi_count / HALF_CLK;
350 tempD <<= (byte_lane & BIT0) ? (12) : (0);
351 isbM32m(DDRPHY, reg, tempD, msk);
352
353 // Adjust PI_COUNT
354 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
355
356 // PI (1/64 MCLK, 1 PIs)
357 // BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)
358 // BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)
359 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
360 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
361 msk = (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8);
362 tempD = pi_count << 8;
363 isbM32m(DDRPHY, reg, tempD, msk);
364
365 // DEADBAND
366 // BL0/1 -> B01DBCTL1[06/09] (+1 select)
367 // BL0/1 -> B01DBCTL1[00/03] (enable)
368 reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
369 msk = 0x00;
370 tempD = 0x00;
371 // enable
372 msk |= (byte_lane & BIT0) ? (BIT3) : (BIT0);
373 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
374 {
375 tempD |= msk;
376 }
377 // select
378 msk |= (byte_lane & BIT0) ? (BIT9) : (BIT6);
379 if (pi_count < EARLY_DB)
380 {
381 tempD |= msk;
382 }
383 isbM32m(DDRPHY, reg, tempD, msk);
384
385 // error check
386 if (pi_count > 0x3F)
387 {
388 training_message(channel, rank, byte_lane);
389 post_code(0xEE, 0xE3);
390 }
391
392 LEAVEFN();
393 return;
394 }
395
396 // get_wdq:
397 //
398 // This function will return the amount of WDQ delay on the given channel, rank, byte_lane as an absolute PI count.
399 // (currently doesn't comprehend rank)
get_wdq(uint8_t channel,uint8_t rank,uint8_t byte_lane)400 uint32_t get_wdq(
401 uint8_t channel,
402 uint8_t rank,
403 uint8_t byte_lane)
404 {
405 uint32_t reg;
406 uint32_t tempD;
407 uint32_t pi_count;
408
409 ENTERFN();
410
411 // RDPTR (1/2 MCLK, 64 PIs)
412 // BL0 -> B01PTRCTL0[03:00] (0x0-0xF)
413 // BL1 -> B01PTRCTL0[15:12] (0x0-0xF)
414 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
415 tempD = isbR32m(DDRPHY, reg);
416 tempD >>= (byte_lane & BIT0) ? (12) : (0);
417 tempD &= 0xF;
418
419 // Adjust PI_COUNT
420 pi_count = (tempD * HALF_CLK);
421
422 // PI (1/64 MCLK, 1 PIs)
423 // BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)
424 // BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)
425 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
426 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
427 tempD = isbR32m(DDRPHY, reg);
428 tempD >>= 8;
429 tempD &= 0x3F;
430
431 // Adjust PI_COUNT
432 pi_count += tempD;
433
434 LEAVEFN();
435 return pi_count;
436 }
437
438 // set_wcmd:
439 //
440 // This function will program the WCMD delays based on an absolute number of PIs.
set_wcmd(uint8_t channel,uint32_t pi_count)441 void set_wcmd(
442 uint8_t channel,
443 uint32_t pi_count)
444 {
445 uint32_t reg;
446 uint32_t msk;
447 uint32_t tempD;
448
449 ENTERFN();
450 // RDPTR (1/2 MCLK, 64 PIs)
451 // CMDPTRREG[11:08] (0x0-0xF)
452 reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET);
453 msk = (BIT11 | BIT10 | BIT9 | BIT8);
454 tempD = pi_count / HALF_CLK;
455 tempD <<= 8;
456 isbM32m(DDRPHY, reg, tempD, msk);
457
458 // Adjust PI_COUNT
459 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
460
461 // PI (1/64 MCLK, 1 PIs)
462 // CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)
463 // CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)
464 // CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)
465 // CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)
466 // CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)
467 // CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)
468 // CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)
469 // CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)
470 reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);
471
472 msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24) | (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16)
473 | (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8) | (BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0);
474
475 tempD = (pi_count << 24) | (pi_count << 16) | (pi_count << 8) | (pi_count << 0);
476
477 isbM32m(DDRPHY, reg, tempD, msk);
478 reg = CMDDLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); // PO
479 isbM32m(DDRPHY, reg, tempD, msk);
480
481 // DEADBAND
482 // CMDCFGREG0[17] (+1 select)
483 // CMDCFGREG0[16] (enable)
484 reg = CMDCFGREG0 + (channel * DDRIOCCC_CH_OFFSET);
485 msk = 0x00;
486 tempD = 0x00;
487 // enable
488 msk |= BIT16;
489 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
490 {
491 tempD |= msk;
492 }
493 // select
494 msk |= BIT17;
495 if (pi_count < EARLY_DB)
496 {
497 tempD |= msk;
498 }
499 isbM32m(DDRPHY, reg, tempD, msk);
500
501 // error check
502 if (pi_count > 0x3F)
503 {
504 post_code(0xEE, 0xE4);
505 }
506
507 LEAVEFN();
508 return;
509 }
510
511 // get_wcmd:
512 //
513 // This function will return the amount of WCMD delay on the given channel as an absolute PI count.
get_wcmd(uint8_t channel)514 uint32_t get_wcmd(
515 uint8_t channel)
516 {
517 uint32_t reg;
518 uint32_t tempD;
519 uint32_t pi_count;
520
521 ENTERFN();
522 // RDPTR (1/2 MCLK, 64 PIs)
523 // CMDPTRREG[11:08] (0x0-0xF)
524 reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET);
525 tempD = isbR32m(DDRPHY, reg);
526 tempD >>= 8;
527 tempD &= 0xF;
528
529 // Adjust PI_COUNT
530 pi_count = tempD * HALF_CLK;
531
532 // PI (1/64 MCLK, 1 PIs)
533 // CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)
534 // CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)
535 // CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)
536 // CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)
537 // CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)
538 // CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)
539 // CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)
540 // CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)
541 reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);
542 tempD = isbR32m(DDRPHY, reg);
543 tempD >>= 16;
544 tempD &= 0x3F;
545
546 // Adjust PI_COUNT
547 pi_count += tempD;
548
549 LEAVEFN();
550 return pi_count;
551 }
552
553 // set_wclk:
554 //
555 // This function will program the WCLK delays based on an absolute number of PIs.
set_wclk(uint8_t channel,uint8_t rank,uint32_t pi_count)556 void set_wclk(
557 uint8_t channel,
558 uint8_t rank,
559 uint32_t pi_count)
560 {
561 uint32_t reg;
562 uint32_t msk;
563 uint32_t tempD;
564
565 ENTERFN();
566 // RDPTR (1/2 MCLK, 64 PIs)
567 // CCPTRREG[15:12] -> CLK1 (0x0-0xF)
568 // CCPTRREG[11:08] -> CLK0 (0x0-0xF)
569 reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
570 msk = (BIT15 | BIT14 | BIT13 | BIT12) | (BIT11 | BIT10 | BIT9 | BIT8);
571 tempD = ((pi_count / HALF_CLK) << 12) | ((pi_count / HALF_CLK) << 8);
572 isbM32m(DDRPHY, reg, tempD, msk);
573
574 // Adjust PI_COUNT
575 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
576
577 // PI (1/64 MCLK, 1 PIs)
578 // ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)
579 // ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)
580 reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0);
581 reg += (channel * DDRIOCCC_CH_OFFSET);
582 msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16) | (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8);
583 tempD = (pi_count << 16) | (pi_count << 8);
584 isbM32m(DDRPHY, reg, tempD, msk);
585 reg = (rank) ? (ECCB1DLLPICODER1) : (ECCB1DLLPICODER1);
586 reg += (channel * DDRIOCCC_CH_OFFSET);
587 isbM32m(DDRPHY, reg, tempD, msk);
588 reg = (rank) ? (ECCB1DLLPICODER2) : (ECCB1DLLPICODER2);
589 reg += (channel * DDRIOCCC_CH_OFFSET);
590 isbM32m(DDRPHY, reg, tempD, msk);
591 reg = (rank) ? (ECCB1DLLPICODER3) : (ECCB1DLLPICODER3);
592 reg += (channel * DDRIOCCC_CH_OFFSET);
593 isbM32m(DDRPHY, reg, tempD, msk);
594
595 // DEADBAND
596 // CCCFGREG1[11:08] (+1 select)
597 // CCCFGREG1[03:00] (enable)
598 reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET);
599 msk = 0x00;
600 tempD = 0x00;
601 // enable
602 msk |= (BIT3 | BIT2 | BIT1 | BIT0); // only ??? matters
603 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
604 {
605 tempD |= msk;
606 }
607 // select
608 msk |= (BIT11 | BIT10 | BIT9 | BIT8); // only ??? matters
609 if (pi_count < EARLY_DB)
610 {
611 tempD |= msk;
612 }
613 isbM32m(DDRPHY, reg, tempD, msk);
614
615 // error check
616 if (pi_count > 0x3F)
617 {
618 post_code(0xEE, 0xE5);
619 }
620
621 LEAVEFN();
622 return;
623 }
624
625 // get_wclk:
626 //
627 // This function will return the amout of WCLK delay on the given channel, rank as an absolute PI count.
get_wclk(uint8_t channel,uint8_t rank)628 uint32_t get_wclk(
629 uint8_t channel,
630 uint8_t rank)
631 {
632 uint32_t reg;
633 uint32_t tempD;
634 uint32_t pi_count;
635
636 ENTERFN();
637 // RDPTR (1/2 MCLK, 64 PIs)
638 // CCPTRREG[15:12] -> CLK1 (0x0-0xF)
639 // CCPTRREG[11:08] -> CLK0 (0x0-0xF)
640 reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
641 tempD = isbR32m(DDRPHY, reg);
642 tempD >>= (rank) ? (12) : (8);
643 tempD &= 0xF;
644
645 // Adjust PI_COUNT
646 pi_count = tempD * HALF_CLK;
647
648 // PI (1/64 MCLK, 1 PIs)
649 // ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)
650 // ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)
651 reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0);
652 reg += (channel * DDRIOCCC_CH_OFFSET);
653 tempD = isbR32m(DDRPHY, reg);
654 tempD >>= (rank) ? (16) : (8);
655 tempD &= 0x3F;
656
657 pi_count += tempD;
658
659 LEAVEFN();
660 return pi_count;
661 }
662
663 // set_wctl:
664 //
665 // This function will program the WCTL delays based on an absolute number of PIs.
666 // (currently doesn't comprehend rank)
set_wctl(uint8_t channel,uint8_t rank,uint32_t pi_count)667 void set_wctl(
668 uint8_t channel,
669 uint8_t rank,
670 uint32_t pi_count)
671 {
672 uint32_t reg;
673 uint32_t msk;
674 uint32_t tempD;
675
676 ENTERFN();
677
678 // RDPTR (1/2 MCLK, 64 PIs)
679 // CCPTRREG[31:28] (0x0-0xF)
680 // CCPTRREG[27:24] (0x0-0xF)
681 reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
682 msk = (BIT31 | BIT30 | BIT29 | BIT28) | (BIT27 | BIT26 | BIT25 | BIT24);
683 tempD = ((pi_count / HALF_CLK) << 28) | ((pi_count / HALF_CLK) << 24);
684 isbM32m(DDRPHY, reg, tempD, msk);
685
686 // Adjust PI_COUNT
687 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
688
689 // PI (1/64 MCLK, 1 PIs)
690 // ECCB1DLLPICODER?[29:24] (0x00-0x3F)
691 // ECCB1DLLPICODER?[29:24] (0x00-0x3F)
692 reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET);
693 msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24);
694 tempD = (pi_count << 24);
695 isbM32m(DDRPHY, reg, tempD, msk);
696 reg = ECCB1DLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);
697 isbM32m(DDRPHY, reg, tempD, msk);
698 reg = ECCB1DLLPICODER2 + (channel * DDRIOCCC_CH_OFFSET);
699 isbM32m(DDRPHY, reg, tempD, msk);
700 reg = ECCB1DLLPICODER3 + (channel * DDRIOCCC_CH_OFFSET);
701 isbM32m(DDRPHY, reg, tempD, msk);
702
703 // DEADBAND
704 // CCCFGREG1[13:12] (+1 select)
705 // CCCFGREG1[05:04] (enable)
706 reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET);
707 msk = 0x00;
708 tempD = 0x00;
709 // enable
710 msk |= (BIT5 | BIT4); // only ??? matters
711 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
712 {
713 tempD |= msk;
714 }
715 // select
716 msk |= (BIT13 | BIT12); // only ??? matters
717 if (pi_count < EARLY_DB)
718 {
719 tempD |= msk;
720 }
721 isbM32m(DDRPHY, reg, tempD, msk);
722
723 // error check
724 if (pi_count > 0x3F)
725 {
726 post_code(0xEE, 0xE6);
727 }
728
729 LEAVEFN();
730 return;
731 }
732
733 // get_wctl:
734 //
735 // This function will return the amount of WCTL delay on the given channel, rank as an absolute PI count.
736 // (currently doesn't comprehend rank)
get_wctl(uint8_t channel,uint8_t rank)737 uint32_t get_wctl(
738 uint8_t channel,
739 uint8_t rank)
740 {
741 uint32_t reg;
742 uint32_t tempD;
743 uint32_t pi_count;
744
745 ENTERFN();
746
747 // RDPTR (1/2 MCLK, 64 PIs)
748 // CCPTRREG[31:28] (0x0-0xF)
749 // CCPTRREG[27:24] (0x0-0xF)
750 reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
751 tempD = isbR32m(DDRPHY, reg);
752 tempD >>= 24;
753 tempD &= 0xF;
754
755 // Adjust PI_COUNT
756 pi_count = tempD * HALF_CLK;
757
758 // PI (1/64 MCLK, 1 PIs)
759 // ECCB1DLLPICODER?[29:24] (0x00-0x3F)
760 // ECCB1DLLPICODER?[29:24] (0x00-0x3F)
761 reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET);
762 tempD = isbR32m(DDRPHY, reg);
763 tempD >>= 24;
764 tempD &= 0x3F;
765
766 // Adjust PI_COUNT
767 pi_count += tempD;
768
769 LEAVEFN();
770 return pi_count;
771 }
772
773 // set_vref:
774 //
775 // This function will program the internal Vref setting in a given byte lane in a given channel.
set_vref(uint8_t channel,uint8_t byte_lane,uint32_t setting)776 void set_vref(
777 uint8_t channel,
778 uint8_t byte_lane,
779 uint32_t setting)
780 {
781 uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL);
782
783 ENTERFN();
784 DPF(D_TRN, "Vref ch%d ln%d : val=%03X\n", channel, byte_lane, setting);
785
786 isbM32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)),
787 (vref_codes[setting] << 2), (BIT7 | BIT6 | BIT5 | BIT4 | BIT3 | BIT2));
788 //isbM32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)), (setting<<2), (BIT7|BIT6|BIT5|BIT4|BIT3|BIT2));
789 // need to wait ~300ns for Vref to settle (check that this is necessary)
790 delay_n(300);
791 // ??? may need to clear pointers ???
792 LEAVEFN();
793 return;
794 }
795
796 // get_vref:
797 //
798 // This function will return the internal Vref setting for the given channel, byte_lane;
get_vref(uint8_t channel,uint8_t byte_lane)799 uint32_t get_vref(
800 uint8_t channel,
801 uint8_t byte_lane)
802 {
803 uint8_t j;
804 uint32_t ret_val = sizeof(vref_codes) / 2;
805 uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL);
806
807 uint32_t tempD;
808
809 ENTERFN();
810 tempD = isbR32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)));
811 tempD >>= 2;
812 tempD &= 0x3F;
813 for (j = 0; j < sizeof(vref_codes); j++)
814 {
815 if (vref_codes[j] == tempD)
816 {
817 ret_val = j;
818 break;
819 }
820 }
821 LEAVEFN();
822 return ret_val;
823 }
824
825 // clear_pointers:
826 //
827 // This function will be used to clear the pointers in a given byte lane in a given channel.
clear_pointers(void)828 void clear_pointers(
829 void)
830 {
831 uint8_t channel_i;
832 uint8_t bl_i;
833
834 ENTERFN();
835 for (channel_i = 0; channel_i < NUM_CHANNELS; channel_i++)
836 {
837 for (bl_i = 0; bl_i < NUM_BYTE_LANES; bl_i++)
838 {
839 isbM32m(DDRPHY, (B01PTRCTL1 + (channel_i * DDRIODQ_CH_OFFSET) + ((bl_i >> 1) * DDRIODQ_BL_OFFSET)), ~(BIT8),
840 (BIT8));
841 //delay_m(1); // DEBUG
842 isbM32m(DDRPHY, (B01PTRCTL1 + (channel_i * DDRIODQ_CH_OFFSET) + ((bl_i >> 1) * DDRIODQ_BL_OFFSET)), (BIT8),
843 (BIT8));
844 }
845 }
846 LEAVEFN();
847 return;
848 }
849
850 // void enable_cache:
enable_cache(void)851 void enable_cache(
852 void)
853 {
854 // Cache control not used in Quark MRC
855 return;
856 }
857
858 // void disable_cache:
disable_cache(void)859 void disable_cache(
860 void)
861 {
862 // Cache control not used in Quark MRC
863 return;
864 }
865
866 // Send DRAM command, data should be formated
867 // using DCMD_Xxxx macro or emrsXCommand structure.
dram_init_command(uint32_t data)868 static void dram_init_command(
869 uint32_t data)
870 {
871 Wr32(DCMD, 0, data);
872 }
873
874 // find_rising_edge:
875 //
876 // This function will find the rising edge transition on RCVN or WDQS.
find_rising_edge(MRCParams_t * mrc_params,uint32_t delay[],uint8_t channel,uint8_t rank,bool rcvn)877 void find_rising_edge(
878 MRCParams_t *mrc_params,
879 uint32_t delay[],
880 uint8_t channel,
881 uint8_t rank,
882 bool rcvn)
883 {
884
885 #define SAMPLE_CNT 3 // number of sample points
886 #define SAMPLE_DLY 26 // number of PIs to increment per sample
887 #define FORWARD true // indicates to increase delays when looking for edge
888 #define BACKWARD false // indicates to decrease delays when looking for edge
889
890 bool all_edges_found; // determines stop condition
891 bool direction[NUM_BYTE_LANES]; // direction indicator
892 uint8_t sample_i; // sample counter
893 uint8_t bl_i; // byte lane counter
894 uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1; // byte lane divisor
895 uint32_t sample_result[SAMPLE_CNT]; // results of "sample_dqs()"
896 uint32_t tempD; // temporary DWORD
897 uint32_t transition_pattern;
898
899 ENTERFN();
900
901 // select hte and request initial configuration
902 select_hte(mrc_params);
903 first_run = 1;
904
905 // Take 3 sample points (T1,T2,T3) to obtain a transition pattern.
906 for (sample_i = 0; sample_i < SAMPLE_CNT; sample_i++)
907 {
908 // program the desired delays for sample
909 for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)
910 {
911 // increase sample delay by 26 PI (0.2 CLK)
912 if (rcvn)
913 {
914 set_rcvn(channel, rank, bl_i, delay[bl_i] + (sample_i * SAMPLE_DLY));
915 }
916 else
917 {
918 set_wdqs(channel, rank, bl_i, delay[bl_i] + (sample_i * SAMPLE_DLY));
919 }
920 } // bl_i loop
921 // take samples (Tsample_i)
922 sample_result[sample_i] = sample_dqs(mrc_params, channel, rank, rcvn);
923
924 DPF(D_TRN, "Find rising edge %s ch%d rnk%d: #%d dly=%d dqs=%02X\n",
925 (rcvn ? "RCVN" : "WDQS"), channel, rank,
926 sample_i, sample_i * SAMPLE_DLY, sample_result[sample_i]);
927
928 } // sample_i loop
929
930 // This pattern will help determine where we landed and ultimately how to place RCVEN/WDQS.
931 for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)
932 {
933 // build "transition_pattern" (MSB is 1st sample)
934 transition_pattern = 0x00;
935 for (sample_i = 0; sample_i < SAMPLE_CNT; sample_i++)
936 {
937 transition_pattern |= ((sample_result[sample_i] & (1 << bl_i)) >> bl_i) << (SAMPLE_CNT - 1 - sample_i);
938 } // sample_i loop
939
940 DPF(D_TRN, "=== transition pattern %d\n", transition_pattern);
941
942 // set up to look for rising edge based on "transition_pattern"
943 switch (transition_pattern)
944 {
945 case 0x00: // sampled 0->0->0
946 // move forward from T3 looking for 0->1
947 delay[bl_i] += 2 * SAMPLE_DLY;
948 direction[bl_i] = FORWARD;
949 break;
950 case 0x01: // sampled 0->0->1
951 case 0x05: // sampled 1->0->1 (bad duty cycle) *HSD#237503*
952 // move forward from T2 looking for 0->1
953 delay[bl_i] += 1 * SAMPLE_DLY;
954 direction[bl_i] = FORWARD;
955 break;
956 // HSD#237503
957 // case 0x02: // sampled 0->1->0 (bad duty cycle)
958 // training_message(channel, rank, bl_i);
959 // post_code(0xEE, 0xE8);
960 // break;
961 case 0x02: // sampled 0->1->0 (bad duty cycle) *HSD#237503*
962 case 0x03: // sampled 0->1->1
963 // move forward from T1 looking for 0->1
964 delay[bl_i] += 0 * SAMPLE_DLY;
965 direction[bl_i] = FORWARD;
966 break;
967 case 0x04: // sampled 1->0->0 (assumes BL8, HSD#234975)
968 // move forward from T3 looking for 0->1
969 delay[bl_i] += 2 * SAMPLE_DLY;
970 direction[bl_i] = FORWARD;
971 break;
972 // HSD#237503
973 // case 0x05: // sampled 1->0->1 (bad duty cycle)
974 // training_message(channel, rank, bl_i);
975 // post_code(0xEE, 0xE9);
976 // break;
977 case 0x06: // sampled 1->1->0
978 case 0x07: // sampled 1->1->1
979 // move backward from T1 looking for 1->0
980 delay[bl_i] += 0 * SAMPLE_DLY;
981 direction[bl_i] = BACKWARD;
982 break;
983 default:
984 post_code(0xEE, 0xEE);
985 break;
986 } // transition_pattern switch
987 // program delays
988 if (rcvn)
989 {
990 set_rcvn(channel, rank, bl_i, delay[bl_i]);
991 }
992 else
993 {
994 set_wdqs(channel, rank, bl_i, delay[bl_i]);
995 }
996 } // bl_i loop
997
998 // Based on the observed transition pattern on the byte lane,
999 // begin looking for a rising edge with single PI granularity.
1000 do
1001 {
1002 all_edges_found = true; // assume all byte lanes passed
1003 tempD = sample_dqs(mrc_params, channel, rank, rcvn); // take a sample
1004 // check all each byte lane for proper edge
1005 for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)
1006 {
1007 if (tempD & (1 << bl_i))
1008 {
1009 // sampled "1"
1010 if (direction[bl_i] == BACKWARD)
1011 {
1012 // keep looking for edge on this byte lane
1013 all_edges_found = false;
1014 delay[bl_i] -= 1;
1015 if (rcvn)
1016 {
1017 set_rcvn(channel, rank, bl_i, delay[bl_i]);
1018 }
1019 else
1020 {
1021 set_wdqs(channel, rank, bl_i, delay[bl_i]);
1022 }
1023 }
1024 }
1025 else
1026 {
1027 // sampled "0"
1028 if (direction[bl_i] == FORWARD)
1029 {
1030 // keep looking for edge on this byte lane
1031 all_edges_found = false;
1032 delay[bl_i] += 1;
1033 if (rcvn)
1034 {
1035 set_rcvn(channel, rank, bl_i, delay[bl_i]);
1036 }
1037 else
1038 {
1039 set_wdqs(channel, rank, bl_i, delay[bl_i]);
1040 }
1041 }
1042 }
1043 } // bl_i loop
1044 } while (!all_edges_found);
1045
1046 // restore DDR idle state
1047 dram_init_command(DCMD_PREA(rank));
1048
1049 DPF(D_TRN, "Delay %03X %03X %03X %03X\n",
1050 delay[0], delay[1], delay[2], delay[3]);
1051
1052 LEAVEFN();
1053 return;
1054 }
1055
1056 // sample_dqs:
1057 //
1058 // This function will sample the DQTRAINSTS registers in the given channel/rank SAMPLE_SIZE times looking for a valid '0' or '1'.
1059 // It will return an encoded DWORD in which each bit corresponds to the sampled value on the byte lane.
sample_dqs(MRCParams_t * mrc_params,uint8_t channel,uint8_t rank,bool rcvn)1060 uint32_t sample_dqs(
1061 MRCParams_t *mrc_params,
1062 uint8_t channel,
1063 uint8_t rank,
1064 bool rcvn)
1065 {
1066 uint8_t j; // just a counter
1067 uint8_t bl_i; // which BL in the module (always 2 per module)
1068 uint8_t bl_grp; // which BL module
1069 uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1; // byte lane divisor
1070 uint32_t msk[2]; // BLx in module
1071 uint32_t sampled_val[SAMPLE_SIZE]; // DQTRAINSTS register contents for each sample
1072 uint32_t num_0s; // tracks the number of '0' samples
1073 uint32_t num_1s; // tracks the number of '1' samples
1074 uint32_t ret_val = 0x00; // assume all '0' samples
1075 uint32_t address = get_addr(mrc_params, channel, rank);
1076
1077 // initialise "msk[]"
1078 msk[0] = (rcvn) ? (BIT1) : (BIT9); // BL0
1079 msk[1] = (rcvn) ? (BIT0) : (BIT8); // BL1
1080
1081
1082 // cycle through each byte lane group
1083 for (bl_grp = 0; bl_grp < (NUM_BYTE_LANES / bl_divisor) / 2; bl_grp++)
1084 {
1085 // take SAMPLE_SIZE samples
1086 for (j = 0; j < SAMPLE_SIZE; j++)
1087 {
1088 HteMemOp(address, first_run, rcvn?0:1);
1089 first_run = 0;
1090
1091 // record the contents of the proper DQTRAINSTS register
1092 sampled_val[j] = isbR32m(DDRPHY, (DQTRAINSTS + (bl_grp * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET)));
1093 }
1094 // look for a majority value ( (SAMPLE_SIZE/2)+1 ) on the byte lane
1095 // and set that value in the corresponding "ret_val" bit
1096 for (bl_i = 0; bl_i < 2; bl_i++)
1097 {
1098 num_0s = 0x00; // reset '0' tracker for byte lane
1099 num_1s = 0x00; // reset '1' tracker for byte lane
1100 for (j = 0; j < SAMPLE_SIZE; j++)
1101 {
1102 if (sampled_val[j] & msk[bl_i])
1103 {
1104 num_1s++;
1105 }
1106 else
1107 {
1108 num_0s++;
1109 }
1110 }
1111 if (num_1s > num_0s)
1112 {
1113 ret_val |= (1 << (bl_i + (bl_grp * 2)));
1114 }
1115 }
1116 }
1117
1118 // "ret_val.0" contains the status of BL0
1119 // "ret_val.1" contains the status of BL1
1120 // "ret_val.2" contains the status of BL2
1121 // etc.
1122 return ret_val;
1123 }
1124
1125 // get_addr:
1126 //
1127 // This function will return a 32 bit address in the desired channel and rank.
get_addr(MRCParams_t * mrc_params,uint8_t channel,uint8_t rank)1128 uint32_t get_addr(
1129 MRCParams_t *mrc_params,
1130 uint8_t channel,
1131 uint8_t rank)
1132 {
1133 uint32_t offset = 0x02000000; // 32MB
1134
1135 // Begin product specific code
1136 if (channel > 0)
1137 {
1138 DPF(D_ERROR, "ILLEGAL CHANNEL\n");
1139 DEAD_LOOP();
1140 }
1141
1142 if (rank > 1)
1143 {
1144 DPF(D_ERROR, "ILLEGAL RANK\n");
1145 DEAD_LOOP();
1146 }
1147
1148 // use 256MB lowest density as per DRP == 0x0003
1149 offset += rank * (256 * 1024 * 1024);
1150
1151 return offset;
1152 }
1153
1154 // byte_lane_mask:
1155 //
1156 // This function will return a 32 bit mask that will be used to check for byte lane failures.
byte_lane_mask(MRCParams_t * mrc_params)1157 uint32_t byte_lane_mask(
1158 MRCParams_t *mrc_params)
1159 {
1160 uint32_t j;
1161 uint32_t ret_val = 0x00;
1162
1163 // set "ret_val" based on NUM_BYTE_LANES such that you will check only BL0 in "result"
1164 // (each bit in "result" represents a byte lane)
1165 for (j = 0; j < MAX_BYTE_LANES; j += NUM_BYTE_LANES)
1166 {
1167 ret_val |= (1 << ((j / NUM_BYTE_LANES) * NUM_BYTE_LANES));
1168 }
1169
1170 // HSD#235037
1171 // need to adjust the mask for 16-bit mode
1172 if (mrc_params->channel_width == x16)
1173 {
1174 ret_val |= (ret_val << 2);
1175 }
1176
1177 return ret_val;
1178 }
1179
1180
1181 // read_tsc:
1182 //
1183 // This function will do some assembly to return TSC register contents as a uint64_t.
read_tsc(void)1184 uint64_t read_tsc(
1185 void)
1186 {
1187 volatile uint64_t tsc; // EDX:EAX
1188
1189 #if defined (SIM) || defined (GCC)
1190 volatile uint32_t tscH; // EDX
1191 volatile uint32_t tscL;// EAX
1192
1193 asm("rdtsc":"=a"(tscL),"=d"(tscH));
1194 tsc = tscH;
1195 tsc = (tsc<<32)|tscL;
1196 #else
1197 tsc = __rdtsc();
1198 #endif
1199
1200 return tsc;
1201 }
1202
1203 // get_tsc_freq:
1204 //
1205 // This function returns the TSC frequency in MHz
get_tsc_freq(void)1206 uint32_t get_tsc_freq(
1207 void)
1208 {
1209 static uint32_t freq[] =
1210 { 533, 400, 200, 100 };
1211 uint32_t fuse;
1212 #if 0
1213 fuse = (isbR32m(FUSE, 0) >> 12) & (BIT1|BIT0);
1214 #else
1215 // todo!!! Fixed 533MHz for emulation or debugging
1216 fuse = 0;
1217 #endif
1218 return freq[fuse];
1219 }
1220
1221 #ifndef SIM
1222 // delay_n:
1223 //
1224 // This is a simple delay function.
1225 // It takes "nanoseconds" as a parameter.
delay_n(uint32_t nanoseconds)1226 void delay_n(
1227 uint32_t nanoseconds)
1228 {
1229 // 1000 MHz clock has 1ns period --> no conversion required
1230 uint64_t final_tsc = read_tsc();
1231 final_tsc += ((get_tsc_freq() * (nanoseconds)) / 1000);
1232
1233 while (read_tsc() < final_tsc)
1234 ;
1235 return;
1236 }
1237 #endif
1238
1239 // delay_u:
1240 //
1241 // This is a simple delay function.
1242 // It takes "microseconds as a parameter.
delay_u(uint32_t microseconds)1243 void delay_u(
1244 uint32_t microseconds)
1245 {
1246 // 64 bit math is not an option, just use loops
1247 while (microseconds--)
1248 {
1249 delay_n(1000);
1250 }
1251 return;
1252 }
1253
1254 // delay_m:
1255 //
1256 // This is a simple delay function.
1257 // It takes "milliseconds" as a parameter.
delay_m(uint32_t milliseconds)1258 void delay_m(
1259 uint32_t milliseconds)
1260 {
1261 // 64 bit math is not an option, just use loops
1262 while (milliseconds--)
1263 {
1264 delay_u(1000);
1265 }
1266 return;
1267 }
1268
1269 // delay_s:
1270 //
1271 // This is a simple delay function.
1272 // It takes "seconds" as a parameter.
delay_s(uint32_t seconds)1273 void delay_s(
1274 uint32_t seconds)
1275 {
1276 // 64 bit math is not an option, just use loops
1277 while (seconds--)
1278 {
1279 delay_m(1000);
1280 }
1281 return;
1282 }
1283
1284 // post_code:
1285 //
1286 // This function will output the POST CODE to the four 7-Segment LED displays.
post_code(uint8_t major,uint8_t minor)1287 void post_code(
1288 uint8_t major,
1289 uint8_t minor)
1290 {
1291 #ifdef EMU
1292 // Update global variable for execution tracking in debug env
1293 PostCode = ((major << 8) | minor);
1294 #endif
1295
1296 // send message to UART
1297 DPF(D_INFO, "POST: 0x%01X%02X\n", major, minor);
1298
1299 // error check:
1300 if (major == 0xEE)
1301 {
1302 // todo!!! Consider updating error status and exit MRC
1303 #ifdef SIM
1304 // enable Ctrl-C handling
1305 for(;;) delay_n(100);
1306 #else
1307 DEAD_LOOP();
1308 #endif
1309 }
1310 }
1311
training_message(uint8_t channel,uint8_t rank,uint8_t byte_lane)1312 void training_message(
1313 uint8_t channel,
1314 uint8_t rank,
1315 uint8_t byte_lane)
1316 {
1317 // send message to UART
1318 DPF(D_INFO, "CH%01X RK%01X BL%01X\n", channel, rank, byte_lane);
1319 return;
1320 }
1321
print_timings(MRCParams_t * mrc_params)1322 void print_timings(
1323 MRCParams_t *mrc_params)
1324 {
1325 uint8_t algo_i;
1326 uint8_t channel_i;
1327 uint8_t rank_i;
1328 uint8_t bl_i;
1329 uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1;
1330
1331 DPF(D_INFO, "\n---------------------------");
1332 DPF(D_INFO, "\nALGO[CH:RK] BL0 BL1 BL2 BL3");
1333 DPF(D_INFO, "\n===========================");
1334 for (algo_i = 0; algo_i < eMAX_ALGOS; algo_i++)
1335 {
1336 for (channel_i = 0; channel_i < NUM_CHANNELS; channel_i++)
1337 {
1338 if (mrc_params->channel_enables & (1 << channel_i))
1339 {
1340 for (rank_i = 0; rank_i < NUM_RANKS; rank_i++)
1341 {
1342 if (mrc_params->rank_enables & (1 << rank_i))
1343 {
1344 switch (algo_i)
1345 {
1346 case eRCVN:
1347 DPF(D_INFO, "\nRCVN[%02d:%02d]", channel_i, rank_i);
1348 break;
1349 case eWDQS:
1350 DPF(D_INFO, "\nWDQS[%02d:%02d]", channel_i, rank_i);
1351 break;
1352 case eWDQx:
1353 DPF(D_INFO, "\nWDQx[%02d:%02d]", channel_i, rank_i);
1354 break;
1355 case eRDQS:
1356 DPF(D_INFO, "\nRDQS[%02d:%02d]", channel_i, rank_i);
1357 break;
1358 case eVREF:
1359 DPF(D_INFO, "\nVREF[%02d:%02d]", channel_i, rank_i);
1360 break;
1361 case eWCMD:
1362 DPF(D_INFO, "\nWCMD[%02d:%02d]", channel_i, rank_i);
1363 break;
1364 case eWCTL:
1365 DPF(D_INFO, "\nWCTL[%02d:%02d]", channel_i, rank_i);
1366 break;
1367 case eWCLK:
1368 DPF(D_INFO, "\nWCLK[%02d:%02d]", channel_i, rank_i);
1369 break;
1370 default:
1371 break;
1372 } // algo_i switch
1373 for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)
1374 {
1375 switch (algo_i)
1376 {
1377 case eRCVN:
1378 DPF(D_INFO, " %03d", get_rcvn(channel_i, rank_i, bl_i));
1379 break;
1380 case eWDQS:
1381 DPF(D_INFO, " %03d", get_wdqs(channel_i, rank_i, bl_i));
1382 break;
1383 case eWDQx:
1384 DPF(D_INFO, " %03d", get_wdq(channel_i, rank_i, bl_i));
1385 break;
1386 case eRDQS:
1387 DPF(D_INFO, " %03d", get_rdqs(channel_i, rank_i, bl_i));
1388 break;
1389 case eVREF:
1390 DPF(D_INFO, " %03d", get_vref(channel_i, bl_i));
1391 break;
1392 case eWCMD:
1393 DPF(D_INFO, " %03d", get_wcmd(channel_i));
1394 break;
1395 case eWCTL:
1396 DPF(D_INFO, " %03d", get_wctl(channel_i, rank_i));
1397 break;
1398 case eWCLK:
1399 DPF(D_INFO, " %03d", get_wclk(channel_i, rank_i));
1400 break;
1401 default:
1402 break;
1403 } // algo_i switch
1404 } // bl_i loop
1405 } // if rank_i enabled
1406 } // rank_i loop
1407 } // if channel_i enabled
1408 } // channel_i loop
1409 } // algo_i loop
1410 DPF(D_INFO, "\n---------------------------");
1411 DPF(D_INFO, "\n");
1412 return;
1413 }
1414
1415 // 32 bit LFSR with characteristic polynomial: X^32 + X^22 +X^2 + X^1
1416 // The function takes pointer to previous 32 bit value and modifies it to next value.
lfsr32(uint32_t * lfsr_ptr)1417 void lfsr32(
1418 uint32_t *lfsr_ptr)
1419 {
1420 uint32_t bit;
1421 uint32_t lfsr;
1422 uint32_t i;
1423
1424 lfsr = *lfsr_ptr;
1425
1426 for (i = 0; i < 32; i++)
1427 {
1428 bit = 1 ^ (lfsr & BIT0);
1429 bit = bit ^ ((lfsr & BIT1) >> 1);
1430 bit = bit ^ ((lfsr & BIT2) >> 2);
1431 bit = bit ^ ((lfsr & BIT22) >> 22);
1432
1433 lfsr = ((lfsr >> 1) | (bit << 31));
1434 }
1435
1436 *lfsr_ptr = lfsr;
1437 return;
1438 }
1439
1440 // The purpose of this function is to ensure the SEC comes out of reset
1441 // and IA initiates the SEC enabling Memory Scrambling.
enable_scrambling(MRCParams_t * mrc_params)1442 void enable_scrambling(
1443 MRCParams_t *mrc_params)
1444 {
1445 uint32_t lfsr = 0;
1446 uint8_t i;
1447
1448 if (mrc_params->scrambling_enables == 0)
1449 return;
1450
1451 ENTERFN();
1452
1453 // 32 bit seed is always stored in BIOS NVM.
1454 lfsr = mrc_params->timings.scrambler_seed;
1455
1456 if (mrc_params->boot_mode == bmCold)
1457 {
1458 // factory value is 0 and in first boot, a clock based seed is loaded.
1459 if (lfsr == 0)
1460 {
1461 lfsr = read_tsc() & 0x0FFFFFFF; // get seed from system clock and make sure it is not all 1's
1462 }
1463 // need to replace scrambler
1464 // get next 32bit LFSR 16 times which is the last part of the previous scrambler vector.
1465 else
1466 {
1467 for (i = 0; i < 16; i++)
1468 {
1469 lfsr32(&lfsr);
1470 }
1471 }
1472 mrc_params->timings.scrambler_seed = lfsr; // save new seed.
1473 } // if (cold_boot)
1474
1475 // In warm boot or S3 exit, we have the previous seed.
1476 // In cold boot, we have the last 32bit LFSR which is the new seed.
1477 lfsr32(&lfsr); // shift to next value
1478 isbW32m(MCU, SCRMSEED, (lfsr & 0x0003FFFF));
1479 for (i = 0; i < 2; i++)
1480 {
1481 isbW32m(MCU, SCRMLO + i, (lfsr & 0xAAAAAAAA));
1482 }
1483
1484 LEAVEFN();
1485 return;
1486 }
1487
1488 // This function will store relevant timing data
1489 // This data will be used on subsequent boots to speed up boot times
1490 // and is required for Suspend To RAM capabilities.
store_timings(MRCParams_t * mrc_params)1491 void store_timings(
1492 MRCParams_t *mrc_params)
1493 {
1494 uint8_t ch, rk, bl;
1495 MrcTimings_t *mt = &mrc_params->timings;
1496
1497 for (ch = 0; ch < NUM_CHANNELS; ch++)
1498 {
1499 for (rk = 0; rk < NUM_RANKS; rk++)
1500 {
1501 for (bl = 0; bl < NUM_BYTE_LANES; bl++)
1502 {
1503 mt->rcvn[ch][rk][bl] = get_rcvn(ch, rk, bl); // RCVN
1504 mt->rdqs[ch][rk][bl] = get_rdqs(ch, rk, bl); // RDQS
1505 mt->wdqs[ch][rk][bl] = get_wdqs(ch, rk, bl); // WDQS
1506 mt->wdq[ch][rk][bl] = get_wdq(ch, rk, bl); // WDQ
1507 if (rk == 0)
1508 {
1509 mt->vref[ch][bl] = get_vref(ch, bl); // VREF (RANK0 only)
1510 }
1511 }
1512 mt->wctl[ch][rk] = get_wctl(ch, rk); // WCTL
1513 }
1514 mt->wcmd[ch] = get_wcmd(ch); // WCMD
1515 }
1516
1517 // need to save for a case of changing frequency after warm reset
1518 mt->ddr_speed = mrc_params->ddr_speed;
1519
1520 return;
1521 }
1522
1523 // This function will retrieve relevant timing data
1524 // This data will be used on subsequent boots to speed up boot times
1525 // and is required for Suspend To RAM capabilities.
restore_timings(MRCParams_t * mrc_params)1526 void restore_timings(
1527 MRCParams_t *mrc_params)
1528 {
1529 uint8_t ch, rk, bl;
1530 const MrcTimings_t *mt = &mrc_params->timings;
1531
1532 for (ch = 0; ch < NUM_CHANNELS; ch++)
1533 {
1534 for (rk = 0; rk < NUM_RANKS; rk++)
1535 {
1536 for (bl = 0; bl < NUM_BYTE_LANES; bl++)
1537 {
1538 set_rcvn(ch, rk, bl, mt->rcvn[ch][rk][bl]); // RCVN
1539 set_rdqs(ch, rk, bl, mt->rdqs[ch][rk][bl]); // RDQS
1540 set_wdqs(ch, rk, bl, mt->wdqs[ch][rk][bl]); // WDQS
1541 set_wdq(ch, rk, bl, mt->wdq[ch][rk][bl]); // WDQ
1542 if (rk == 0)
1543 {
1544 set_vref(ch, bl, mt->vref[ch][bl]); // VREF (RANK0 only)
1545 }
1546 }
1547 set_wctl(ch, rk, mt->wctl[ch][rk]); // WCTL
1548 }
1549 set_wcmd(ch, mt->wcmd[ch]); // WCMD
1550 }
1551
1552 return;
1553 }
1554
1555 // Configure default settings normally set as part of read training
1556 // Some defaults have to be set earlier as they may affect earlier
1557 // training steps.
default_timings(MRCParams_t * mrc_params)1558 void default_timings(
1559 MRCParams_t *mrc_params)
1560 {
1561 uint8_t ch, rk, bl;
1562
1563 for (ch = 0; ch < NUM_CHANNELS; ch++)
1564 {
1565 for (rk = 0; rk < NUM_RANKS; rk++)
1566 {
1567 for (bl = 0; bl < NUM_BYTE_LANES; bl++)
1568 {
1569 set_rdqs(ch, rk, bl, 24); // RDQS
1570 if (rk == 0)
1571 {
1572 set_vref(ch, bl, 32); // VREF (RANK0 only)
1573 }
1574 }
1575 }
1576 }
1577
1578 return;
1579 }
1580
1581