1 /************************************************************************
2  *
3  * Copyright (c) 2013-2015 Intel Corporation.
4  *
5 * This program and the accompanying materials
6 * are licensed and made available under the terms and conditions of the BSD License
7 * which accompanies this distribution.  The full text of the license may be found at
8 * http://opensource.org/licenses/bsd-license.php
9 *
10 * THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
12  *
13  ***************************************************************************/
14 
15 #include "mrc.h"
16 #include "memory_options.h"
17 
18 #include "meminit_utils.h"
19 #include "hte.h"
20 #include "io.h"
21 
22 void select_hte(
23     MRCParams_t *mrc_params);
24 
25 static uint8_t first_run = 0;
26 
27 const uint8_t vref_codes[64] =
28 { // lowest to highest
29     0x3F, 0x3E, 0x3D, 0x3C, 0x3B, 0x3A, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, // 00 - 15
30     0x2F, 0x2E, 0x2D, 0x2C, 0x2B, 0x2A, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, // 16 - 31
31     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, // 32 - 47
32     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F  // 48 - 63
33 };
34 
35 #ifdef EMU
36 // Track current post code for debugging purpose
37 uint32_t PostCode;
38 #endif
39 
40 // set_rcvn:
41 //
42 // This function will program the RCVEN delays.
43 // (currently doesn't comprehend rank)
set_rcvn(uint8_t channel,uint8_t rank,uint8_t byte_lane,uint32_t pi_count)44 void set_rcvn(
45     uint8_t channel,
46     uint8_t rank,
47     uint8_t byte_lane,
48     uint32_t pi_count)
49 {
50   uint32_t reg;
51   uint32_t msk;
52   uint32_t tempD;
53 
54   ENTERFN();
55   DPF(D_TRN, "Rcvn ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);
56 
57   // RDPTR (1/2 MCLK, 64 PIs)
58   // BL0 -> B01PTRCTL0[11:08] (0x0-0xF)
59   // BL1 -> B01PTRCTL0[23:20] (0x0-0xF)
60   reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
61   msk = (byte_lane & BIT0) ? (BIT23 | BIT22 | BIT21 | BIT20) : (BIT11 | BIT10 | BIT9 | BIT8);
62   tempD = (byte_lane & BIT0) ? ((pi_count / HALF_CLK) << 20) : ((pi_count / HALF_CLK) << 8);
63   isbM32m(DDRPHY, reg, tempD, msk);
64 
65   // Adjust PI_COUNT
66   pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
67 
68   // PI (1/64 MCLK, 1 PIs)
69   // BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)
70   // BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)
71   reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
72   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
73   msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24);
74   tempD = pi_count << 24;
75   isbM32m(DDRPHY, reg, tempD, msk);
76 
77   // DEADBAND
78   // BL0/1 -> B01DBCTL1[08/11] (+1 select)
79   // BL0/1 -> B01DBCTL1[02/05] (enable)
80   reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
81   msk = 0x00;
82   tempD = 0x00;
83   // enable
84   msk |= (byte_lane & BIT0) ? (BIT5) : (BIT2);
85   if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
86   {
87     tempD |= msk;
88   }
89   // select
90   msk |= (byte_lane & BIT0) ? (BIT11) : (BIT8);
91   if (pi_count < EARLY_DB)
92   {
93     tempD |= msk;
94   }
95   isbM32m(DDRPHY, reg, tempD, msk);
96 
97   // error check
98   if (pi_count > 0x3F)
99   {
100     training_message(channel, rank, byte_lane);
101     post_code(0xEE, 0xE0);
102   }
103 
104   LEAVEFN();
105   return;
106 }
107 
108 // get_rcvn:
109 //
110 // This function will return the current RCVEN delay on the given channel, rank, byte_lane as an absolute PI count.
111 // (currently doesn't comprehend rank)
get_rcvn(uint8_t channel,uint8_t rank,uint8_t byte_lane)112 uint32_t get_rcvn(
113     uint8_t channel,
114     uint8_t rank,
115     uint8_t byte_lane)
116 {
117   uint32_t reg;
118   uint32_t tempD;
119   uint32_t pi_count;
120 
121   ENTERFN();
122 
123   // RDPTR (1/2 MCLK, 64 PIs)
124   // BL0 -> B01PTRCTL0[11:08] (0x0-0xF)
125   // BL1 -> B01PTRCTL0[23:20] (0x0-0xF)
126   reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
127   tempD = isbR32m(DDRPHY, reg);
128   tempD >>= (byte_lane & BIT0) ? (20) : (8);
129   tempD &= 0xF;
130 
131   // Adjust PI_COUNT
132   pi_count = tempD * HALF_CLK;
133 
134   // PI (1/64 MCLK, 1 PIs)
135   // BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)
136   // BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)
137   reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
138   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
139   tempD = isbR32m(DDRPHY, reg);
140   tempD >>= 24;
141   tempD &= 0x3F;
142 
143   // Adjust PI_COUNT
144   pi_count += tempD;
145 
146   LEAVEFN();
147   return pi_count;
148 }
149 
150 // set_rdqs:
151 //
152 // This function will program the RDQS delays based on an absolute amount of PIs.
153 // (currently doesn't comprehend rank)
set_rdqs(uint8_t channel,uint8_t rank,uint8_t byte_lane,uint32_t pi_count)154 void set_rdqs(
155     uint8_t channel,
156     uint8_t rank,
157     uint8_t byte_lane,
158     uint32_t pi_count)
159 {
160   uint32_t reg;
161   uint32_t msk;
162   uint32_t tempD;
163 
164   ENTERFN();
165   DPF(D_TRN, "Rdqs ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);
166 
167   // PI (1/128 MCLK)
168   // BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)
169   // BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)
170   reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE);
171   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
172   msk = (BIT6 | BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0);
173   tempD = pi_count << 0;
174   isbM32m(DDRPHY, reg, tempD, msk);
175 
176   // error check (shouldn't go above 0x3F)
177   if (pi_count > 0x47)
178   {
179     training_message(channel, rank, byte_lane);
180     post_code(0xEE, 0xE1);
181   }
182 
183   LEAVEFN();
184   return;
185 }
186 
187 // get_rdqs:
188 //
189 // This function will return the current RDQS delay on the given channel, rank, byte_lane as an absolute PI count.
190 // (currently doesn't comprehend rank)
get_rdqs(uint8_t channel,uint8_t rank,uint8_t byte_lane)191 uint32_t get_rdqs(
192     uint8_t channel,
193     uint8_t rank,
194     uint8_t byte_lane)
195 {
196   uint32_t reg;
197   uint32_t tempD;
198   uint32_t pi_count;
199 
200   ENTERFN();
201 
202   // PI (1/128 MCLK)
203   // BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)
204   // BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)
205   reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE);
206   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
207   tempD = isbR32m(DDRPHY, reg);
208 
209   // Adjust PI_COUNT
210   pi_count = tempD & 0x7F;
211 
212   LEAVEFN();
213   return pi_count;
214 }
215 
216 // set_wdqs:
217 //
218 // This function will program the WDQS delays based on an absolute amount of PIs.
219 // (currently doesn't comprehend rank)
set_wdqs(uint8_t channel,uint8_t rank,uint8_t byte_lane,uint32_t pi_count)220 void set_wdqs(
221     uint8_t channel,
222     uint8_t rank,
223     uint8_t byte_lane,
224     uint32_t pi_count)
225 {
226   uint32_t reg;
227   uint32_t msk;
228   uint32_t tempD;
229 
230   ENTERFN();
231   DPF(D_TRN, "Wdqs ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);
232 
233   // RDPTR (1/2 MCLK, 64 PIs)
234   // BL0 -> B01PTRCTL0[07:04] (0x0-0xF)
235   // BL1 -> B01PTRCTL0[19:16] (0x0-0xF)
236   reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
237   msk = (byte_lane & BIT0) ? (BIT19 | BIT18 | BIT17 | BIT16) : (BIT7 | BIT6 | BIT5 | BIT4);
238   tempD = pi_count / HALF_CLK;
239   tempD <<= (byte_lane & BIT0) ? (16) : (4);
240   isbM32m(DDRPHY, reg, tempD, msk);
241 
242   // Adjust PI_COUNT
243   pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
244 
245   // PI (1/64 MCLK, 1 PIs)
246   // BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)
247   // BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)
248   reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
249   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
250   msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16);
251   tempD = pi_count << 16;
252   isbM32m(DDRPHY, reg, tempD, msk);
253 
254   // DEADBAND
255   // BL0/1 -> B01DBCTL1[07/10] (+1 select)
256   // BL0/1 -> B01DBCTL1[01/04] (enable)
257   reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
258   msk = 0x00;
259   tempD = 0x00;
260   // enable
261   msk |= (byte_lane & BIT0) ? (BIT4) : (BIT1);
262   if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
263   {
264     tempD |= msk;
265   }
266   // select
267   msk |= (byte_lane & BIT0) ? (BIT10) : (BIT7);
268   if (pi_count < EARLY_DB)
269   {
270     tempD |= msk;
271   }
272   isbM32m(DDRPHY, reg, tempD, msk);
273 
274   // error check
275   if (pi_count > 0x3F)
276   {
277     training_message(channel, rank, byte_lane);
278     post_code(0xEE, 0xE2);
279   }
280 
281   LEAVEFN();
282   return;
283 }
284 
285 // get_wdqs:
286 //
287 // This function will return the amount of WDQS delay on the given channel, rank, byte_lane as an absolute PI count.
288 // (currently doesn't comprehend rank)
get_wdqs(uint8_t channel,uint8_t rank,uint8_t byte_lane)289 uint32_t get_wdqs(
290     uint8_t channel,
291     uint8_t rank,
292     uint8_t byte_lane)
293 {
294   uint32_t reg;
295   uint32_t tempD;
296   uint32_t pi_count;
297 
298   ENTERFN();
299 
300   // RDPTR (1/2 MCLK, 64 PIs)
301   // BL0 -> B01PTRCTL0[07:04] (0x0-0xF)
302   // BL1 -> B01PTRCTL0[19:16] (0x0-0xF)
303   reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
304   tempD = isbR32m(DDRPHY, reg);
305   tempD >>= (byte_lane & BIT0) ? (16) : (4);
306   tempD &= 0xF;
307 
308   // Adjust PI_COUNT
309   pi_count = (tempD * HALF_CLK);
310 
311   // PI (1/64 MCLK, 1 PIs)
312   // BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)
313   // BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)
314   reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
315   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
316   tempD = isbR32m(DDRPHY, reg);
317   tempD >>= 16;
318   tempD &= 0x3F;
319 
320   // Adjust PI_COUNT
321   pi_count += tempD;
322 
323   LEAVEFN();
324   return pi_count;
325 }
326 
327 // set_wdq:
328 //
329 // This function will program the WDQ delays based on an absolute number of PIs.
330 // (currently doesn't comprehend rank)
set_wdq(uint8_t channel,uint8_t rank,uint8_t byte_lane,uint32_t pi_count)331 void set_wdq(
332     uint8_t channel,
333     uint8_t rank,
334     uint8_t byte_lane,
335     uint32_t pi_count)
336 {
337   uint32_t reg;
338   uint32_t msk;
339   uint32_t tempD;
340 
341   ENTERFN();
342   DPF(D_TRN, "Wdq ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);
343 
344   // RDPTR (1/2 MCLK, 64 PIs)
345   // BL0 -> B01PTRCTL0[03:00] (0x0-0xF)
346   // BL1 -> B01PTRCTL0[15:12] (0x0-0xF)
347   reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
348   msk = (byte_lane & BIT0) ? (BIT15 | BIT14 | BIT13 | BIT12) : (BIT3 | BIT2 | BIT1 | BIT0);
349   tempD = pi_count / HALF_CLK;
350   tempD <<= (byte_lane & BIT0) ? (12) : (0);
351   isbM32m(DDRPHY, reg, tempD, msk);
352 
353   // Adjust PI_COUNT
354   pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
355 
356   // PI (1/64 MCLK, 1 PIs)
357   // BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)
358   // BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)
359   reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
360   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
361   msk = (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8);
362   tempD = pi_count << 8;
363   isbM32m(DDRPHY, reg, tempD, msk);
364 
365   // DEADBAND
366   // BL0/1 -> B01DBCTL1[06/09] (+1 select)
367   // BL0/1 -> B01DBCTL1[00/03] (enable)
368   reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
369   msk = 0x00;
370   tempD = 0x00;
371   // enable
372   msk |= (byte_lane & BIT0) ? (BIT3) : (BIT0);
373   if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
374   {
375     tempD |= msk;
376   }
377   // select
378   msk |= (byte_lane & BIT0) ? (BIT9) : (BIT6);
379   if (pi_count < EARLY_DB)
380   {
381     tempD |= msk;
382   }
383   isbM32m(DDRPHY, reg, tempD, msk);
384 
385   // error check
386   if (pi_count > 0x3F)
387   {
388     training_message(channel, rank, byte_lane);
389     post_code(0xEE, 0xE3);
390   }
391 
392   LEAVEFN();
393   return;
394 }
395 
396 // get_wdq:
397 //
398 // This function will return the amount of WDQ delay on the given channel, rank, byte_lane as an absolute PI count.
399 // (currently doesn't comprehend rank)
get_wdq(uint8_t channel,uint8_t rank,uint8_t byte_lane)400 uint32_t get_wdq(
401     uint8_t channel,
402     uint8_t rank,
403     uint8_t byte_lane)
404 {
405   uint32_t reg;
406   uint32_t tempD;
407   uint32_t pi_count;
408 
409   ENTERFN();
410 
411   // RDPTR (1/2 MCLK, 64 PIs)
412   // BL0 -> B01PTRCTL0[03:00] (0x0-0xF)
413   // BL1 -> B01PTRCTL0[15:12] (0x0-0xF)
414   reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
415   tempD = isbR32m(DDRPHY, reg);
416   tempD >>= (byte_lane & BIT0) ? (12) : (0);
417   tempD &= 0xF;
418 
419   // Adjust PI_COUNT
420   pi_count = (tempD * HALF_CLK);
421 
422   // PI (1/64 MCLK, 1 PIs)
423   // BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)
424   // BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)
425   reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
426   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
427   tempD = isbR32m(DDRPHY, reg);
428   tempD >>= 8;
429   tempD &= 0x3F;
430 
431   // Adjust PI_COUNT
432   pi_count += tempD;
433 
434   LEAVEFN();
435   return pi_count;
436 }
437 
438 // set_wcmd:
439 //
440 // This function will program the WCMD delays based on an absolute number of PIs.
set_wcmd(uint8_t channel,uint32_t pi_count)441 void set_wcmd(
442     uint8_t channel,
443     uint32_t pi_count)
444 {
445   uint32_t reg;
446   uint32_t msk;
447   uint32_t tempD;
448 
449   ENTERFN();
450   // RDPTR (1/2 MCLK, 64 PIs)
451   // CMDPTRREG[11:08] (0x0-0xF)
452   reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET);
453   msk = (BIT11 | BIT10 | BIT9 | BIT8);
454   tempD = pi_count / HALF_CLK;
455   tempD <<= 8;
456   isbM32m(DDRPHY, reg, tempD, msk);
457 
458   // Adjust PI_COUNT
459   pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
460 
461   // PI (1/64 MCLK, 1 PIs)
462   // CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)
463   // CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)
464   // CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)
465   // CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)
466   // CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)
467   // CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)
468   // CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)
469   // CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)
470   reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);
471 
472   msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24) | (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16)
473       | (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8) | (BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0);
474 
475   tempD = (pi_count << 24) | (pi_count << 16) | (pi_count << 8) | (pi_count << 0);
476 
477   isbM32m(DDRPHY, reg, tempD, msk);
478   reg = CMDDLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); // PO
479   isbM32m(DDRPHY, reg, tempD, msk);
480 
481   // DEADBAND
482   // CMDCFGREG0[17] (+1 select)
483   // CMDCFGREG0[16] (enable)
484   reg = CMDCFGREG0 + (channel * DDRIOCCC_CH_OFFSET);
485   msk = 0x00;
486   tempD = 0x00;
487   // enable
488   msk |= BIT16;
489   if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
490   {
491     tempD |= msk;
492   }
493   // select
494   msk |= BIT17;
495   if (pi_count < EARLY_DB)
496   {
497     tempD |= msk;
498   }
499   isbM32m(DDRPHY, reg, tempD, msk);
500 
501   // error check
502   if (pi_count > 0x3F)
503   {
504     post_code(0xEE, 0xE4);
505   }
506 
507   LEAVEFN();
508   return;
509 }
510 
511 // get_wcmd:
512 //
513 // This function will return the amount of WCMD delay on the given channel as an absolute PI count.
get_wcmd(uint8_t channel)514 uint32_t get_wcmd(
515     uint8_t channel)
516 {
517   uint32_t reg;
518   uint32_t tempD;
519   uint32_t pi_count;
520 
521   ENTERFN();
522   // RDPTR (1/2 MCLK, 64 PIs)
523   // CMDPTRREG[11:08] (0x0-0xF)
524   reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET);
525   tempD = isbR32m(DDRPHY, reg);
526   tempD >>= 8;
527   tempD &= 0xF;
528 
529   // Adjust PI_COUNT
530   pi_count = tempD * HALF_CLK;
531 
532   // PI (1/64 MCLK, 1 PIs)
533   // CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)
534   // CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)
535   // CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)
536   // CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)
537   // CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)
538   // CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)
539   // CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)
540   // CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)
541   reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);
542   tempD = isbR32m(DDRPHY, reg);
543   tempD >>= 16;
544   tempD &= 0x3F;
545 
546   // Adjust PI_COUNT
547   pi_count += tempD;
548 
549   LEAVEFN();
550   return pi_count;
551 }
552 
553 // set_wclk:
554 //
555 // This function will program the WCLK delays based on an absolute number of PIs.
set_wclk(uint8_t channel,uint8_t rank,uint32_t pi_count)556 void set_wclk(
557     uint8_t channel,
558     uint8_t rank,
559     uint32_t pi_count)
560 {
561   uint32_t reg;
562   uint32_t msk;
563   uint32_t tempD;
564 
565   ENTERFN();
566   // RDPTR (1/2 MCLK, 64 PIs)
567   // CCPTRREG[15:12] -> CLK1 (0x0-0xF)
568   // CCPTRREG[11:08] -> CLK0 (0x0-0xF)
569   reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
570   msk = (BIT15 | BIT14 | BIT13 | BIT12) | (BIT11 | BIT10 | BIT9 | BIT8);
571   tempD = ((pi_count / HALF_CLK) << 12) | ((pi_count / HALF_CLK) << 8);
572   isbM32m(DDRPHY, reg, tempD, msk);
573 
574   // Adjust PI_COUNT
575   pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
576 
577   // PI (1/64 MCLK, 1 PIs)
578   // ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)
579   // ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)
580   reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0);
581   reg += (channel * DDRIOCCC_CH_OFFSET);
582   msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16) | (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8);
583   tempD = (pi_count << 16) | (pi_count << 8);
584   isbM32m(DDRPHY, reg, tempD, msk);
585   reg = (rank) ? (ECCB1DLLPICODER1) : (ECCB1DLLPICODER1);
586   reg += (channel * DDRIOCCC_CH_OFFSET);
587   isbM32m(DDRPHY, reg, tempD, msk);
588   reg = (rank) ? (ECCB1DLLPICODER2) : (ECCB1DLLPICODER2);
589   reg += (channel * DDRIOCCC_CH_OFFSET);
590   isbM32m(DDRPHY, reg, tempD, msk);
591   reg = (rank) ? (ECCB1DLLPICODER3) : (ECCB1DLLPICODER3);
592   reg += (channel * DDRIOCCC_CH_OFFSET);
593   isbM32m(DDRPHY, reg, tempD, msk);
594 
595   // DEADBAND
596   // CCCFGREG1[11:08] (+1 select)
597   // CCCFGREG1[03:00] (enable)
598   reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET);
599   msk = 0x00;
600   tempD = 0x00;
601   // enable
602   msk |= (BIT3 | BIT2 | BIT1 | BIT0); // only ??? matters
603   if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
604   {
605     tempD |= msk;
606   }
607   // select
608   msk |= (BIT11 | BIT10 | BIT9 | BIT8); // only ??? matters
609   if (pi_count < EARLY_DB)
610   {
611     tempD |= msk;
612   }
613   isbM32m(DDRPHY, reg, tempD, msk);
614 
615   // error check
616   if (pi_count > 0x3F)
617   {
618     post_code(0xEE, 0xE5);
619   }
620 
621   LEAVEFN();
622   return;
623 }
624 
625 // get_wclk:
626 //
627 // This function will return the amout of WCLK delay on the given channel, rank as an absolute PI count.
get_wclk(uint8_t channel,uint8_t rank)628 uint32_t get_wclk(
629     uint8_t channel,
630     uint8_t rank)
631 {
632   uint32_t reg;
633   uint32_t tempD;
634   uint32_t pi_count;
635 
636   ENTERFN();
637   // RDPTR (1/2 MCLK, 64 PIs)
638   // CCPTRREG[15:12] -> CLK1 (0x0-0xF)
639   // CCPTRREG[11:08] -> CLK0 (0x0-0xF)
640   reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
641   tempD = isbR32m(DDRPHY, reg);
642   tempD >>= (rank) ? (12) : (8);
643   tempD &= 0xF;
644 
645   // Adjust PI_COUNT
646   pi_count = tempD * HALF_CLK;
647 
648   // PI (1/64 MCLK, 1 PIs)
649   // ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)
650   // ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)
651   reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0);
652   reg += (channel * DDRIOCCC_CH_OFFSET);
653   tempD = isbR32m(DDRPHY, reg);
654   tempD >>= (rank) ? (16) : (8);
655   tempD &= 0x3F;
656 
657   pi_count += tempD;
658 
659   LEAVEFN();
660   return pi_count;
661 }
662 
663 // set_wctl:
664 //
665 // This function will program the WCTL delays based on an absolute number of PIs.
666 // (currently doesn't comprehend rank)
set_wctl(uint8_t channel,uint8_t rank,uint32_t pi_count)667 void set_wctl(
668     uint8_t channel,
669     uint8_t rank,
670     uint32_t pi_count)
671 {
672   uint32_t reg;
673   uint32_t msk;
674   uint32_t tempD;
675 
676   ENTERFN();
677 
678   // RDPTR (1/2 MCLK, 64 PIs)
679   // CCPTRREG[31:28] (0x0-0xF)
680   // CCPTRREG[27:24] (0x0-0xF)
681   reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
682   msk = (BIT31 | BIT30 | BIT29 | BIT28) | (BIT27 | BIT26 | BIT25 | BIT24);
683   tempD = ((pi_count / HALF_CLK) << 28) | ((pi_count / HALF_CLK) << 24);
684   isbM32m(DDRPHY, reg, tempD, msk);
685 
686   // Adjust PI_COUNT
687   pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
688 
689   // PI (1/64 MCLK, 1 PIs)
690   // ECCB1DLLPICODER?[29:24] (0x00-0x3F)
691   // ECCB1DLLPICODER?[29:24] (0x00-0x3F)
692   reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET);
693   msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24);
694   tempD = (pi_count << 24);
695   isbM32m(DDRPHY, reg, tempD, msk);
696   reg = ECCB1DLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);
697   isbM32m(DDRPHY, reg, tempD, msk);
698   reg = ECCB1DLLPICODER2 + (channel * DDRIOCCC_CH_OFFSET);
699   isbM32m(DDRPHY, reg, tempD, msk);
700   reg = ECCB1DLLPICODER3 + (channel * DDRIOCCC_CH_OFFSET);
701   isbM32m(DDRPHY, reg, tempD, msk);
702 
703   // DEADBAND
704   // CCCFGREG1[13:12] (+1 select)
705   // CCCFGREG1[05:04] (enable)
706   reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET);
707   msk = 0x00;
708   tempD = 0x00;
709   // enable
710   msk |= (BIT5 | BIT4); // only ??? matters
711   if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
712   {
713     tempD |= msk;
714   }
715   // select
716   msk |= (BIT13 | BIT12); // only ??? matters
717   if (pi_count < EARLY_DB)
718   {
719     tempD |= msk;
720   }
721   isbM32m(DDRPHY, reg, tempD, msk);
722 
723   // error check
724   if (pi_count > 0x3F)
725   {
726     post_code(0xEE, 0xE6);
727   }
728 
729   LEAVEFN();
730   return;
731 }
732 
733 // get_wctl:
734 //
735 // This function will return the amount of WCTL delay on the given channel, rank as an absolute PI count.
736 // (currently doesn't comprehend rank)
get_wctl(uint8_t channel,uint8_t rank)737 uint32_t get_wctl(
738     uint8_t channel,
739     uint8_t rank)
740 {
741   uint32_t reg;
742   uint32_t tempD;
743   uint32_t pi_count;
744 
745   ENTERFN();
746 
747   // RDPTR (1/2 MCLK, 64 PIs)
748   // CCPTRREG[31:28] (0x0-0xF)
749   // CCPTRREG[27:24] (0x0-0xF)
750   reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
751   tempD = isbR32m(DDRPHY, reg);
752   tempD >>= 24;
753   tempD &= 0xF;
754 
755   // Adjust PI_COUNT
756   pi_count = tempD * HALF_CLK;
757 
758   // PI (1/64 MCLK, 1 PIs)
759   // ECCB1DLLPICODER?[29:24] (0x00-0x3F)
760   // ECCB1DLLPICODER?[29:24] (0x00-0x3F)
761   reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET);
762   tempD = isbR32m(DDRPHY, reg);
763   tempD >>= 24;
764   tempD &= 0x3F;
765 
766   // Adjust PI_COUNT
767   pi_count += tempD;
768 
769   LEAVEFN();
770   return pi_count;
771 }
772 
773 // set_vref:
774 //
775 // This function will program the internal Vref setting in a given byte lane in a given channel.
set_vref(uint8_t channel,uint8_t byte_lane,uint32_t setting)776 void set_vref(
777     uint8_t channel,
778     uint8_t byte_lane,
779     uint32_t setting)
780 {
781   uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL);
782 
783   ENTERFN();
784   DPF(D_TRN, "Vref ch%d ln%d : val=%03X\n", channel, byte_lane, setting);
785 
786   isbM32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)),
787       (vref_codes[setting] << 2), (BIT7 | BIT6 | BIT5 | BIT4 | BIT3 | BIT2));
788   //isbM32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)), (setting<<2), (BIT7|BIT6|BIT5|BIT4|BIT3|BIT2));
789   // need to wait ~300ns for Vref to settle (check that this is necessary)
790   delay_n(300);
791   // ??? may need to clear pointers ???
792   LEAVEFN();
793   return;
794 }
795 
796 // get_vref:
797 //
798 // This function will return the internal Vref setting for the given channel, byte_lane;
get_vref(uint8_t channel,uint8_t byte_lane)799 uint32_t get_vref(
800     uint8_t channel,
801     uint8_t byte_lane)
802 {
803   uint8_t j;
804   uint32_t ret_val = sizeof(vref_codes) / 2;
805   uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL);
806 
807   uint32_t tempD;
808 
809   ENTERFN();
810   tempD = isbR32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)));
811   tempD >>= 2;
812   tempD &= 0x3F;
813   for (j = 0; j < sizeof(vref_codes); j++)
814   {
815     if (vref_codes[j] == tempD)
816     {
817       ret_val = j;
818       break;
819     }
820   }
821   LEAVEFN();
822   return ret_val;
823 }
824 
825 // clear_pointers:
826 //
827 // This function will be used to clear the pointers in a given byte lane in a given channel.
clear_pointers(void)828 void clear_pointers(
829     void)
830 {
831   uint8_t channel_i;
832   uint8_t bl_i;
833 
834   ENTERFN();
835   for (channel_i = 0; channel_i < NUM_CHANNELS; channel_i++)
836   {
837     for (bl_i = 0; bl_i < NUM_BYTE_LANES; bl_i++)
838     {
839       isbM32m(DDRPHY, (B01PTRCTL1 + (channel_i * DDRIODQ_CH_OFFSET) + ((bl_i >> 1) * DDRIODQ_BL_OFFSET)), ~(BIT8),
840           (BIT8));
841       //delay_m(1); // DEBUG
842       isbM32m(DDRPHY, (B01PTRCTL1 + (channel_i * DDRIODQ_CH_OFFSET) + ((bl_i >> 1) * DDRIODQ_BL_OFFSET)), (BIT8),
843           (BIT8));
844     }
845   }
846   LEAVEFN();
847   return;
848 }
849 
850 // void enable_cache:
enable_cache(void)851 void enable_cache(
852     void)
853 {
854   // Cache control not used in Quark MRC
855   return;
856 }
857 
858 // void disable_cache:
disable_cache(void)859 void disable_cache(
860     void)
861 {
862   // Cache control not used in Quark MRC
863   return;
864 }
865 
866 // Send DRAM command, data should be formated
867 // using DCMD_Xxxx macro or emrsXCommand structure.
dram_init_command(uint32_t data)868 static void dram_init_command(
869     uint32_t data)
870 {
871   Wr32(DCMD, 0, data);
872 }
873 
874 // find_rising_edge:
875 //
876 // This function will find the rising edge transition on RCVN or WDQS.
find_rising_edge(MRCParams_t * mrc_params,uint32_t delay[],uint8_t channel,uint8_t rank,bool rcvn)877 void find_rising_edge(
878     MRCParams_t *mrc_params,
879     uint32_t delay[],
880     uint8_t channel,
881     uint8_t rank,
882     bool rcvn)
883 {
884 
885 #define SAMPLE_CNT 3   // number of sample points
886 #define SAMPLE_DLY 26  // number of PIs to increment per sample
887 #define FORWARD true   // indicates to increase delays when looking for edge
888 #define BACKWARD false // indicates to decrease delays when looking for edge
889 
890   bool all_edges_found; // determines stop condition
891   bool direction[NUM_BYTE_LANES]; // direction indicator
892   uint8_t sample_i; // sample counter
893   uint8_t bl_i; // byte lane counter
894   uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1; // byte lane divisor
895   uint32_t sample_result[SAMPLE_CNT]; // results of "sample_dqs()"
896   uint32_t tempD; // temporary DWORD
897   uint32_t transition_pattern;
898 
899   ENTERFN();
900 
901   // select hte and request initial configuration
902   select_hte(mrc_params);
903   first_run = 1;
904 
905   // Take 3 sample points (T1,T2,T3) to obtain a transition pattern.
906   for (sample_i = 0; sample_i < SAMPLE_CNT; sample_i++)
907   {
908     // program the desired delays for sample
909     for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)
910     {
911       // increase sample delay by 26 PI (0.2 CLK)
912       if (rcvn)
913       {
914         set_rcvn(channel, rank, bl_i, delay[bl_i] + (sample_i * SAMPLE_DLY));
915       }
916       else
917       {
918         set_wdqs(channel, rank, bl_i, delay[bl_i] + (sample_i * SAMPLE_DLY));
919       }
920     } // bl_i loop
921     // take samples (Tsample_i)
922     sample_result[sample_i] = sample_dqs(mrc_params, channel, rank, rcvn);
923 
924     DPF(D_TRN, "Find rising edge %s ch%d rnk%d: #%d dly=%d dqs=%02X\n",
925         (rcvn ? "RCVN" : "WDQS"), channel, rank,
926         sample_i, sample_i * SAMPLE_DLY, sample_result[sample_i]);
927 
928   } // sample_i loop
929 
930   // This pattern will help determine where we landed and ultimately how to place RCVEN/WDQS.
931   for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)
932   {
933     // build "transition_pattern" (MSB is 1st sample)
934     transition_pattern = 0x00;
935     for (sample_i = 0; sample_i < SAMPLE_CNT; sample_i++)
936     {
937       transition_pattern |= ((sample_result[sample_i] & (1 << bl_i)) >> bl_i) << (SAMPLE_CNT - 1 - sample_i);
938     } // sample_i loop
939 
940     DPF(D_TRN, "=== transition pattern %d\n", transition_pattern);
941 
942     // set up to look for rising edge based on "transition_pattern"
943     switch (transition_pattern)
944     {
945     case 0x00: // sampled 0->0->0
946       // move forward from T3 looking for 0->1
947       delay[bl_i] += 2 * SAMPLE_DLY;
948       direction[bl_i] = FORWARD;
949       break;
950     case 0x01: // sampled 0->0->1
951     case 0x05: // sampled 1->0->1 (bad duty cycle) *HSD#237503*
952       // move forward from T2 looking for 0->1
953       delay[bl_i] += 1 * SAMPLE_DLY;
954       direction[bl_i] = FORWARD;
955       break;
956 // HSD#237503
957 //      case 0x02: // sampled 0->1->0 (bad duty cycle)
958 //        training_message(channel, rank, bl_i);
959 //        post_code(0xEE, 0xE8);
960 //        break;
961     case 0x02: // sampled 0->1->0 (bad duty cycle) *HSD#237503*
962     case 0x03: // sampled 0->1->1
963       // move forward from T1 looking for 0->1
964       delay[bl_i] += 0 * SAMPLE_DLY;
965       direction[bl_i] = FORWARD;
966       break;
967     case 0x04: // sampled 1->0->0 (assumes BL8, HSD#234975)
968       // move forward from T3 looking for 0->1
969       delay[bl_i] += 2 * SAMPLE_DLY;
970       direction[bl_i] = FORWARD;
971       break;
972 // HSD#237503
973 //      case 0x05: // sampled 1->0->1 (bad duty cycle)
974 //        training_message(channel, rank, bl_i);
975 //        post_code(0xEE, 0xE9);
976 //        break;
977     case 0x06: // sampled 1->1->0
978     case 0x07: // sampled 1->1->1
979       // move backward from T1 looking for 1->0
980       delay[bl_i] += 0 * SAMPLE_DLY;
981       direction[bl_i] = BACKWARD;
982       break;
983     default:
984       post_code(0xEE, 0xEE);
985       break;
986     } // transition_pattern switch
987     // program delays
988     if (rcvn)
989     {
990       set_rcvn(channel, rank, bl_i, delay[bl_i]);
991     }
992     else
993     {
994       set_wdqs(channel, rank, bl_i, delay[bl_i]);
995     }
996   } // bl_i loop
997 
998   // Based on the observed transition pattern on the byte lane,
999   // begin looking for a rising edge with single PI granularity.
1000   do
1001   {
1002     all_edges_found = true; // assume all byte lanes passed
1003     tempD = sample_dqs(mrc_params, channel, rank, rcvn); // take a sample
1004     // check all each byte lane for proper edge
1005     for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)
1006     {
1007       if (tempD & (1 << bl_i))
1008       {
1009         // sampled "1"
1010         if (direction[bl_i] == BACKWARD)
1011         {
1012           // keep looking for edge on this byte lane
1013           all_edges_found = false;
1014           delay[bl_i] -= 1;
1015           if (rcvn)
1016           {
1017             set_rcvn(channel, rank, bl_i, delay[bl_i]);
1018           }
1019           else
1020           {
1021             set_wdqs(channel, rank, bl_i, delay[bl_i]);
1022           }
1023         }
1024       }
1025       else
1026       {
1027         // sampled "0"
1028         if (direction[bl_i] == FORWARD)
1029         {
1030           // keep looking for edge on this byte lane
1031           all_edges_found = false;
1032           delay[bl_i] += 1;
1033           if (rcvn)
1034           {
1035             set_rcvn(channel, rank, bl_i, delay[bl_i]);
1036           }
1037           else
1038           {
1039             set_wdqs(channel, rank, bl_i, delay[bl_i]);
1040           }
1041         }
1042       }
1043     } // bl_i loop
1044   } while (!all_edges_found);
1045 
1046   // restore DDR idle state
1047   dram_init_command(DCMD_PREA(rank));
1048 
1049   DPF(D_TRN, "Delay %03X %03X %03X %03X\n",
1050       delay[0], delay[1], delay[2], delay[3]);
1051 
1052   LEAVEFN();
1053   return;
1054 }
1055 
1056 // sample_dqs:
1057 //
1058 // This function will sample the DQTRAINSTS registers in the given channel/rank SAMPLE_SIZE times looking for a valid '0' or '1'.
1059 // It will return an encoded DWORD in which each bit corresponds to the sampled value on the byte lane.
sample_dqs(MRCParams_t * mrc_params,uint8_t channel,uint8_t rank,bool rcvn)1060 uint32_t sample_dqs(
1061     MRCParams_t *mrc_params,
1062     uint8_t channel,
1063     uint8_t rank,
1064     bool rcvn)
1065 {
1066   uint8_t j; // just a counter
1067   uint8_t bl_i; // which BL in the module (always 2 per module)
1068   uint8_t bl_grp; // which BL module
1069   uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1; // byte lane divisor
1070   uint32_t msk[2]; // BLx in module
1071   uint32_t sampled_val[SAMPLE_SIZE]; // DQTRAINSTS register contents for each sample
1072   uint32_t num_0s; // tracks the number of '0' samples
1073   uint32_t num_1s; // tracks the number of '1' samples
1074   uint32_t ret_val = 0x00; // assume all '0' samples
1075   uint32_t address = get_addr(mrc_params, channel, rank);
1076 
1077   // initialise "msk[]"
1078   msk[0] = (rcvn) ? (BIT1) : (BIT9); // BL0
1079   msk[1] = (rcvn) ? (BIT0) : (BIT8); // BL1
1080 
1081 
1082   // cycle through each byte lane group
1083   for (bl_grp = 0; bl_grp < (NUM_BYTE_LANES / bl_divisor) / 2; bl_grp++)
1084   {
1085     // take SAMPLE_SIZE samples
1086     for (j = 0; j < SAMPLE_SIZE; j++)
1087     {
1088       HteMemOp(address, first_run, rcvn?0:1);
1089       first_run = 0;
1090 
1091       // record the contents of the proper DQTRAINSTS register
1092       sampled_val[j] = isbR32m(DDRPHY, (DQTRAINSTS + (bl_grp * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET)));
1093     }
1094     // look for a majority value ( (SAMPLE_SIZE/2)+1 ) on the byte lane
1095     // and set that value in the corresponding "ret_val" bit
1096     for (bl_i = 0; bl_i < 2; bl_i++)
1097     {
1098       num_0s = 0x00; // reset '0' tracker for byte lane
1099       num_1s = 0x00; // reset '1' tracker for byte lane
1100       for (j = 0; j < SAMPLE_SIZE; j++)
1101       {
1102         if (sampled_val[j] & msk[bl_i])
1103         {
1104           num_1s++;
1105         }
1106         else
1107         {
1108           num_0s++;
1109         }
1110       }
1111       if (num_1s > num_0s)
1112       {
1113         ret_val |= (1 << (bl_i + (bl_grp * 2)));
1114       }
1115     }
1116   }
1117 
1118   // "ret_val.0" contains the status of BL0
1119   // "ret_val.1" contains the status of BL1
1120   // "ret_val.2" contains the status of BL2
1121   // etc.
1122   return ret_val;
1123 }
1124 
1125 // get_addr:
1126 //
1127 // This function will return a 32 bit address in the desired channel and rank.
get_addr(MRCParams_t * mrc_params,uint8_t channel,uint8_t rank)1128 uint32_t get_addr(
1129     MRCParams_t *mrc_params,
1130     uint8_t channel,
1131     uint8_t rank)
1132 {
1133   uint32_t offset = 0x02000000; // 32MB
1134 
1135   // Begin product specific code
1136   if (channel > 0)
1137   {
1138     DPF(D_ERROR, "ILLEGAL CHANNEL\n");
1139     DEAD_LOOP();
1140   }
1141 
1142   if (rank > 1)
1143   {
1144     DPF(D_ERROR, "ILLEGAL RANK\n");
1145     DEAD_LOOP();
1146   }
1147 
1148   // use 256MB lowest density as per DRP == 0x0003
1149   offset += rank * (256 * 1024 * 1024);
1150 
1151   return offset;
1152 }
1153 
1154 // byte_lane_mask:
1155 //
1156 // This function will return a 32 bit mask that will be used to check for byte lane failures.
byte_lane_mask(MRCParams_t * mrc_params)1157 uint32_t byte_lane_mask(
1158     MRCParams_t *mrc_params)
1159 {
1160   uint32_t j;
1161   uint32_t ret_val = 0x00;
1162 
1163   // set "ret_val" based on NUM_BYTE_LANES such that you will check only BL0 in "result"
1164   // (each bit in "result" represents a byte lane)
1165   for (j = 0; j < MAX_BYTE_LANES; j += NUM_BYTE_LANES)
1166   {
1167     ret_val |= (1 << ((j / NUM_BYTE_LANES) * NUM_BYTE_LANES));
1168   }
1169 
1170   // HSD#235037
1171   // need to adjust the mask for 16-bit mode
1172   if (mrc_params->channel_width == x16)
1173   {
1174     ret_val |= (ret_val << 2);
1175   }
1176 
1177   return ret_val;
1178 }
1179 
1180 
1181 // read_tsc:
1182 //
1183 // This function will do some assembly to return TSC register contents as a uint64_t.
read_tsc(void)1184 uint64_t read_tsc(
1185     void)
1186 {
1187   volatile uint64_t tsc;  // EDX:EAX
1188 
1189 #if defined (SIM) || defined (GCC)
1190   volatile uint32_t tscH; // EDX
1191   volatile uint32_t tscL;// EAX
1192 
1193   asm("rdtsc":"=a"(tscL),"=d"(tscH));
1194   tsc = tscH;
1195   tsc = (tsc<<32)|tscL;
1196 #else
1197   tsc = __rdtsc();
1198 #endif
1199 
1200   return tsc;
1201 }
1202 
1203 // get_tsc_freq:
1204 //
1205 // This function returns the TSC frequency in MHz
get_tsc_freq(void)1206 uint32_t get_tsc_freq(
1207     void)
1208 {
1209   static uint32_t freq[] =
1210   { 533, 400, 200, 100 };
1211   uint32_t fuse;
1212 #if 0
1213   fuse = (isbR32m(FUSE, 0) >> 12) & (BIT1|BIT0);
1214 #else
1215   // todo!!! Fixed 533MHz for emulation or debugging
1216   fuse = 0;
1217 #endif
1218   return freq[fuse];
1219 }
1220 
1221 #ifndef SIM
1222 // delay_n:
1223 //
1224 // This is a simple delay function.
1225 // It takes "nanoseconds" as a parameter.
delay_n(uint32_t nanoseconds)1226 void delay_n(
1227     uint32_t nanoseconds)
1228 {
1229   // 1000 MHz clock has 1ns period --> no conversion required
1230   uint64_t final_tsc = read_tsc();
1231   final_tsc += ((get_tsc_freq() * (nanoseconds)) / 1000);
1232 
1233   while (read_tsc() < final_tsc)
1234     ;
1235   return;
1236 }
1237 #endif
1238 
1239 // delay_u:
1240 //
1241 // This is a simple delay function.
1242 // It takes "microseconds as a parameter.
delay_u(uint32_t microseconds)1243 void delay_u(
1244     uint32_t microseconds)
1245 {
1246   // 64 bit math is not an option, just use loops
1247   while (microseconds--)
1248   {
1249     delay_n(1000);
1250   }
1251   return;
1252 }
1253 
1254 // delay_m:
1255 //
1256 // This is a simple delay function.
1257 // It takes "milliseconds" as a parameter.
delay_m(uint32_t milliseconds)1258 void delay_m(
1259     uint32_t milliseconds)
1260 {
1261   // 64 bit math is not an option, just use loops
1262   while (milliseconds--)
1263   {
1264     delay_u(1000);
1265   }
1266   return;
1267 }
1268 
1269 // delay_s:
1270 //
1271 // This is a simple delay function.
1272 // It takes "seconds" as a parameter.
delay_s(uint32_t seconds)1273 void delay_s(
1274     uint32_t seconds)
1275 {
1276   // 64 bit math is not an option, just use loops
1277   while (seconds--)
1278   {
1279     delay_m(1000);
1280   }
1281   return;
1282 }
1283 
1284 // post_code:
1285 //
1286 // This function will output the POST CODE to the four 7-Segment LED displays.
post_code(uint8_t major,uint8_t minor)1287 void post_code(
1288     uint8_t major,
1289     uint8_t minor)
1290 {
1291 #ifdef EMU
1292   // Update global variable for execution tracking in debug env
1293   PostCode = ((major << 8) | minor);
1294 #endif
1295 
1296   // send message to UART
1297   DPF(D_INFO, "POST: 0x%01X%02X\n", major, minor);
1298 
1299   // error check:
1300   if (major == 0xEE)
1301   {
1302     // todo!!! Consider updating error status and exit MRC
1303 #ifdef SIM
1304     // enable Ctrl-C handling
1305     for(;;) delay_n(100);
1306 #else
1307     DEAD_LOOP();
1308 #endif
1309   }
1310 }
1311 
training_message(uint8_t channel,uint8_t rank,uint8_t byte_lane)1312 void training_message(
1313     uint8_t channel,
1314     uint8_t rank,
1315     uint8_t byte_lane)
1316 {
1317   // send message to UART
1318   DPF(D_INFO, "CH%01X RK%01X BL%01X\n", channel, rank, byte_lane);
1319   return;
1320 }
1321 
print_timings(MRCParams_t * mrc_params)1322 void print_timings(
1323     MRCParams_t *mrc_params)
1324 {
1325   uint8_t algo_i;
1326   uint8_t channel_i;
1327   uint8_t rank_i;
1328   uint8_t bl_i;
1329   uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1;
1330 
1331   DPF(D_INFO, "\n---------------------------");
1332   DPF(D_INFO, "\nALGO[CH:RK] BL0 BL1 BL2 BL3");
1333   DPF(D_INFO, "\n===========================");
1334   for (algo_i = 0; algo_i < eMAX_ALGOS; algo_i++)
1335   {
1336     for (channel_i = 0; channel_i < NUM_CHANNELS; channel_i++)
1337     {
1338       if (mrc_params->channel_enables & (1 << channel_i))
1339       {
1340         for (rank_i = 0; rank_i < NUM_RANKS; rank_i++)
1341         {
1342           if (mrc_params->rank_enables & (1 << rank_i))
1343           {
1344             switch (algo_i)
1345             {
1346             case eRCVN:
1347               DPF(D_INFO, "\nRCVN[%02d:%02d]", channel_i, rank_i);
1348               break;
1349             case eWDQS:
1350               DPF(D_INFO, "\nWDQS[%02d:%02d]", channel_i, rank_i);
1351               break;
1352             case eWDQx:
1353               DPF(D_INFO, "\nWDQx[%02d:%02d]", channel_i, rank_i);
1354               break;
1355             case eRDQS:
1356               DPF(D_INFO, "\nRDQS[%02d:%02d]", channel_i, rank_i);
1357               break;
1358             case eVREF:
1359               DPF(D_INFO, "\nVREF[%02d:%02d]", channel_i, rank_i);
1360               break;
1361             case eWCMD:
1362               DPF(D_INFO, "\nWCMD[%02d:%02d]", channel_i, rank_i);
1363               break;
1364             case eWCTL:
1365               DPF(D_INFO, "\nWCTL[%02d:%02d]", channel_i, rank_i);
1366               break;
1367             case eWCLK:
1368               DPF(D_INFO, "\nWCLK[%02d:%02d]", channel_i, rank_i);
1369               break;
1370             default:
1371               break;
1372             } // algo_i switch
1373             for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)
1374             {
1375               switch (algo_i)
1376               {
1377               case eRCVN:
1378                 DPF(D_INFO, " %03d", get_rcvn(channel_i, rank_i, bl_i));
1379                 break;
1380               case eWDQS:
1381                 DPF(D_INFO, " %03d", get_wdqs(channel_i, rank_i, bl_i));
1382                 break;
1383               case eWDQx:
1384                 DPF(D_INFO, " %03d", get_wdq(channel_i, rank_i, bl_i));
1385                 break;
1386               case eRDQS:
1387                 DPF(D_INFO, " %03d", get_rdqs(channel_i, rank_i, bl_i));
1388                 break;
1389               case eVREF:
1390                 DPF(D_INFO, " %03d", get_vref(channel_i, bl_i));
1391                 break;
1392               case eWCMD:
1393                 DPF(D_INFO, " %03d", get_wcmd(channel_i));
1394                 break;
1395               case eWCTL:
1396                 DPF(D_INFO, " %03d", get_wctl(channel_i, rank_i));
1397                 break;
1398               case eWCLK:
1399                 DPF(D_INFO, " %03d", get_wclk(channel_i, rank_i));
1400                 break;
1401               default:
1402                 break;
1403               } // algo_i switch
1404             } // bl_i loop
1405           } // if rank_i enabled
1406         } // rank_i loop
1407       } // if channel_i enabled
1408     } // channel_i loop
1409   } // algo_i loop
1410   DPF(D_INFO, "\n---------------------------");
1411   DPF(D_INFO, "\n");
1412   return;
1413 }
1414 
1415 // 32 bit LFSR with characteristic polynomial:  X^32 + X^22 +X^2 + X^1
1416 // The function takes pointer to previous 32 bit value and modifies it to next value.
lfsr32(uint32_t * lfsr_ptr)1417 void lfsr32(
1418     uint32_t *lfsr_ptr)
1419 {
1420   uint32_t bit;
1421   uint32_t lfsr;
1422   uint32_t i;
1423 
1424   lfsr = *lfsr_ptr;
1425 
1426   for (i = 0; i < 32; i++)
1427   {
1428     bit = 1 ^ (lfsr & BIT0);
1429     bit = bit ^ ((lfsr & BIT1) >> 1);
1430     bit = bit ^ ((lfsr & BIT2) >> 2);
1431     bit = bit ^ ((lfsr & BIT22) >> 22);
1432 
1433     lfsr = ((lfsr >> 1) | (bit << 31));
1434   }
1435 
1436   *lfsr_ptr = lfsr;
1437   return;
1438 }
1439 
1440 // The purpose of this function is to ensure the SEC comes out of reset
1441 // and IA initiates the SEC enabling Memory Scrambling.
enable_scrambling(MRCParams_t * mrc_params)1442 void enable_scrambling(
1443     MRCParams_t *mrc_params)
1444 {
1445   uint32_t lfsr = 0;
1446   uint8_t i;
1447 
1448   if (mrc_params->scrambling_enables == 0)
1449     return;
1450 
1451   ENTERFN();
1452 
1453   // 32 bit seed is always stored in BIOS NVM.
1454   lfsr = mrc_params->timings.scrambler_seed;
1455 
1456   if (mrc_params->boot_mode == bmCold)
1457   {
1458     // factory value is 0 and in first boot, a clock based seed is loaded.
1459     if (lfsr == 0)
1460     {
1461       lfsr = read_tsc() & 0x0FFFFFFF; // get seed from system clock and make sure it is not all 1's
1462     }
1463     // need to replace scrambler
1464     // get next 32bit LFSR 16 times which is the last part of the previous scrambler vector.
1465     else
1466     {
1467       for (i = 0; i < 16; i++)
1468       {
1469         lfsr32(&lfsr);
1470       }
1471     }
1472     mrc_params->timings.scrambler_seed = lfsr;  // save new seed.
1473   } // if (cold_boot)
1474 
1475   // In warm boot or S3 exit, we have the previous seed.
1476   // In cold boot, we have the last 32bit LFSR which is the new seed.
1477   lfsr32(&lfsr); // shift to next value
1478   isbW32m(MCU, SCRMSEED, (lfsr & 0x0003FFFF));
1479   for (i = 0; i < 2; i++)
1480   {
1481     isbW32m(MCU, SCRMLO + i, (lfsr & 0xAAAAAAAA));
1482   }
1483 
1484   LEAVEFN();
1485   return;
1486 }
1487 
1488 // This function will store relevant timing data
1489 // This data will be used on subsequent boots to speed up boot times
1490 // and is required for Suspend To RAM capabilities.
store_timings(MRCParams_t * mrc_params)1491 void store_timings(
1492     MRCParams_t *mrc_params)
1493 {
1494   uint8_t ch, rk, bl;
1495   MrcTimings_t *mt = &mrc_params->timings;
1496 
1497   for (ch = 0; ch < NUM_CHANNELS; ch++)
1498   {
1499     for (rk = 0; rk < NUM_RANKS; rk++)
1500     {
1501       for (bl = 0; bl < NUM_BYTE_LANES; bl++)
1502       {
1503         mt->rcvn[ch][rk][bl] = get_rcvn(ch, rk, bl); // RCVN
1504         mt->rdqs[ch][rk][bl] = get_rdqs(ch, rk, bl); // RDQS
1505         mt->wdqs[ch][rk][bl] = get_wdqs(ch, rk, bl); // WDQS
1506         mt->wdq[ch][rk][bl] = get_wdq(ch, rk, bl);  // WDQ
1507         if (rk == 0)
1508         {
1509           mt->vref[ch][bl] = get_vref(ch, bl);  // VREF (RANK0 only)
1510         }
1511       }
1512       mt->wctl[ch][rk] = get_wctl(ch, rk); // WCTL
1513     }
1514     mt->wcmd[ch] = get_wcmd(ch); // WCMD
1515   }
1516 
1517   // need to save for a case of changing frequency after warm reset
1518   mt->ddr_speed = mrc_params->ddr_speed;
1519 
1520   return;
1521 }
1522 
1523 // This function will retrieve relevant timing data
1524 // This data will be used on subsequent boots to speed up boot times
1525 // and is required for Suspend To RAM capabilities.
restore_timings(MRCParams_t * mrc_params)1526 void restore_timings(
1527     MRCParams_t *mrc_params)
1528 {
1529   uint8_t ch, rk, bl;
1530   const MrcTimings_t *mt = &mrc_params->timings;
1531 
1532   for (ch = 0; ch < NUM_CHANNELS; ch++)
1533   {
1534     for (rk = 0; rk < NUM_RANKS; rk++)
1535     {
1536       for (bl = 0; bl < NUM_BYTE_LANES; bl++)
1537       {
1538         set_rcvn(ch, rk, bl, mt->rcvn[ch][rk][bl]); // RCVN
1539         set_rdqs(ch, rk, bl, mt->rdqs[ch][rk][bl]); // RDQS
1540         set_wdqs(ch, rk, bl, mt->wdqs[ch][rk][bl]); // WDQS
1541         set_wdq(ch, rk, bl, mt->wdq[ch][rk][bl]);  // WDQ
1542         if (rk == 0)
1543         {
1544           set_vref(ch, bl, mt->vref[ch][bl]); // VREF (RANK0 only)
1545         }
1546       }
1547       set_wctl(ch, rk, mt->wctl[ch][rk]); // WCTL
1548     }
1549     set_wcmd(ch, mt->wcmd[ch]); // WCMD
1550   }
1551 
1552   return;
1553 }
1554 
1555 // Configure default settings normally set as part of read training
1556 // Some defaults have to be set earlier as they may affect earlier
1557 // training steps.
default_timings(MRCParams_t * mrc_params)1558 void default_timings(
1559     MRCParams_t *mrc_params)
1560 {
1561   uint8_t ch, rk, bl;
1562 
1563   for (ch = 0; ch < NUM_CHANNELS; ch++)
1564   {
1565     for (rk = 0; rk < NUM_RANKS; rk++)
1566     {
1567       for (bl = 0; bl < NUM_BYTE_LANES; bl++)
1568       {
1569         set_rdqs(ch, rk, bl, 24); // RDQS
1570         if (rk == 0)
1571         {
1572           set_vref(ch, bl, 32); // VREF (RANK0 only)
1573         }
1574       }
1575     }
1576   }
1577 
1578   return;
1579 }
1580 
1581