| 1 | /* Name: usbdrvasm12.inc |
| 2 | * Project: AVR USB driver |
| 3 | * Author: Christian Starkjohann |
| 4 | * Creation Date: 2004-12-29 |
| 5 | * Tabsize: 4 |
| 6 | * Copyright: (c) 2007 by OBJECTIVE DEVELOPMENT Software GmbH |
| 7 | * License: GNU GPL v2 (see License.txt), GNU GPL v3 or proprietary (CommercialLicense.txt) |
| 8 | * This Revision: $Id: usbdrvasm12.inc 692 2008-11-07 15:07:40Z cs $ |
| 9 | */ |
| 10 | |
| 11 | /* Do not link this file! Link usbdrvasm.S instead, which includes the |
| 12 | * appropriate implementation! |
| 13 | */ |
| 14 | |
| 15 | /* |
| 16 | General Description: |
| 17 | This file is the 12 MHz version of the asssembler part of the USB driver. It |
| 18 | requires a 12 MHz crystal (not a ceramic resonator and not a calibrated RC |
| 19 | oscillator). |
| 20 | |
| 21 | See usbdrv.h for a description of the entire driver. |
| 22 | |
| 23 | Since almost all of this code is timing critical, don't change unless you |
| 24 | really know what you are doing! Many parts require not only a maximum number |
| 25 | of CPU cycles, but even an exact number of cycles! |
| 26 | |
| 27 | |
| 28 | Timing constraints according to spec (in bit times): |
| 29 | timing subject min max CPUcycles |
| 30 | --------------------------------------------------------------------------- |
| 31 | EOP of OUT/SETUP to sync pattern of DATA0 (both rx) 2 16 16-128 |
| 32 | EOP of IN to sync pattern of DATA0 (rx, then tx) 2 7.5 16-60 |
| 33 | DATAx (rx) to ACK/NAK/STALL (tx) 2 7.5 16-60 |
| 34 | */ |
| 35 | |
| 36 | ;Software-receiver engine. Strict timing! Don't change unless you can preserve timing! |
| 37 | ;interrupt response time: 4 cycles + insn running = 7 max if interrupts always enabled |
| 38 | ;max allowable interrupt latency: 34 cycles -> max 25 cycles interrupt disable |
| 39 | ;max stack usage: [ret(2), YL, SREG, YH, shift, x1, x2, x3, cnt, x4] = 11 bytes |
| 40 | ;Numbers in brackets are maximum cycles since SOF. |
| 41 | USB_INTR_VECTOR: |
| 42 | ;order of registers pushed: YL, SREG [sofError], YH, shift, x1, x2, x3, cnt |
| 43 | push YL ;2 [35] push only what is necessary to sync with edge ASAP |
| 44 | in YL, SREG ;1 [37] |
| 45 | push YL ;2 [39] |
| 46 | ;---------------------------------------------------------------------------- |
| 47 | ; Synchronize with sync pattern: |
| 48 | ;---------------------------------------------------------------------------- |
| 49 | ;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K] |
| 50 | ;sync up with J to K edge during sync pattern -- use fastest possible loops |
| 51 | ;The first part waits at most 1 bit long since we must be in sync pattern. |
| 52 | ;YL is guarenteed to be < 0x80 because I flag is clear. When we jump to |
| 53 | ;waitForJ, ensure that this prerequisite is met. |
| 54 | waitForJ: |
| 55 | inc YL |
| 56 | sbis USBIN, USBMINUS |
| 57 | brne waitForJ ; just make sure we have ANY timeout |
| 58 | waitForK: |
| 59 | ;The following code results in a sampling window of 1/4 bit which meets the spec. |
| 60 | sbis USBIN, USBMINUS |
| 61 | rjmp foundK |
| 62 | sbis USBIN, USBMINUS |
| 63 | rjmp foundK |
| 64 | sbis USBIN, USBMINUS |
| 65 | rjmp foundK |
| 66 | sbis USBIN, USBMINUS |
| 67 | rjmp foundK |
| 68 | sbis USBIN, USBMINUS |
| 69 | rjmp foundK |
| 70 | #if USB_COUNT_SOF |
| 71 | lds YL, usbSofCount |
| 72 | inc YL |
| 73 | sts usbSofCount, YL |
| 74 | #endif /* USB_COUNT_SOF */ |
| 75 | #ifdef USB_SOF_HOOK |
| 76 | USB_SOF_HOOK |
| 77 | #endif |
| 78 | rjmp sofError |
| 79 | foundK: |
| 80 | ;{3, 5} after falling D- edge, average delay: 4 cycles [we want 4 for center sampling] |
| 81 | ;we have 1 bit time for setup purposes, then sample again. Numbers in brackets |
| 82 | ;are cycles from center of first sync (double K) bit after the instruction |
| 83 | push YH ;2 [2] |
| 84 | lds YL, usbInputBufOffset;2 [4] |
| 85 | clr YH ;1 [5] |
| 86 | subi YL, lo8(-(usbRxBuf));1 [6] |
| 87 | sbci YH, hi8(-(usbRxBuf));1 [7] |
| 88 | |
| 89 | sbis USBIN, USBMINUS ;1 [8] we want two bits K [sample 1 cycle too early] |
| 90 | rjmp haveTwoBitsK ;2 [10] |
| 91 | pop YH ;2 [11] undo the push from before |
| 92 | rjmp waitForK ;2 [13] this was not the end of sync, retry |
| 93 | haveTwoBitsK: |
| 94 | ;---------------------------------------------------------------------------- |
| 95 | ; push more registers and initialize values while we sample the first bits: |
| 96 | ;---------------------------------------------------------------------------- |
| 97 | push shift ;2 [16] |
| 98 | push x1 ;2 [12] |
| 99 | push x2 ;2 [14] |
| 100 | |
| 101 | in x1, USBIN ;1 [17] <-- sample bit 0 |
| 102 | ldi shift, 0xff ;1 [18] |
| 103 | bst x1, USBMINUS ;1 [19] |
| 104 | bld shift, 0 ;1 [20] |
| 105 | push x3 ;2 [22] |
| 106 | push cnt ;2 [24] |
| 107 | |
| 108 | in x2, USBIN ;1 [25] <-- sample bit 1 |
| 109 | ser x3 ;1 [26] [inserted init instruction] |
| 110 | eor x1, x2 ;1 [27] |
| 111 | bst x1, USBMINUS ;1 [28] |
| 112 | bld shift, 1 ;1 [29] |
| 113 | ldi cnt, USB_BUFSIZE;1 [30] [inserted init instruction] |
| 114 | rjmp rxbit2 ;2 [32] |
| 115 | |
| 116 | ;---------------------------------------------------------------------------- |
| 117 | ; Receiver loop (numbers in brackets are cycles within byte after instr) |
| 118 | ;---------------------------------------------------------------------------- |
| 119 | |
| 120 | unstuff0: ;1 (branch taken) |
| 121 | andi x3, ~0x01 ;1 [15] |
| 122 | mov x1, x2 ;1 [16] x2 contains last sampled (stuffed) bit |
| 123 | in x2, USBIN ;1 [17] <-- sample bit 1 again |
| 124 | ori shift, 0x01 ;1 [18] |
| 125 | rjmp didUnstuff0 ;2 [20] |
| 126 | |
| 127 | unstuff1: ;1 (branch taken) |
| 128 | mov x2, x1 ;1 [21] x1 contains last sampled (stuffed) bit |
| 129 | andi x3, ~0x02 ;1 [22] |
| 130 | ori shift, 0x02 ;1 [23] |
| 131 | nop ;1 [24] |
| 132 | in x1, USBIN ;1 [25] <-- sample bit 2 again |
| 133 | rjmp didUnstuff1 ;2 [27] |
| 134 | |
| 135 | unstuff2: ;1 (branch taken) |
| 136 | andi x3, ~0x04 ;1 [29] |
| 137 | ori shift, 0x04 ;1 [30] |
| 138 | mov x1, x2 ;1 [31] x2 contains last sampled (stuffed) bit |
| 139 | nop ;1 [32] |
| 140 | in x2, USBIN ;1 [33] <-- sample bit 3 |
| 141 | rjmp didUnstuff2 ;2 [35] |
| 142 | |
| 143 | unstuff3: ;1 (branch taken) |
| 144 | in x2, USBIN ;1 [34] <-- sample stuffed bit 3 [one cycle too late] |
| 145 | andi x3, ~0x08 ;1 [35] |
| 146 | ori shift, 0x08 ;1 [36] |
| 147 | rjmp didUnstuff3 ;2 [38] |
| 148 | |
| 149 | unstuff4: ;1 (branch taken) |
| 150 | andi x3, ~0x10 ;1 [40] |
| 151 | in x1, USBIN ;1 [41] <-- sample stuffed bit 4 |
| 152 | ori shift, 0x10 ;1 [42] |
| 153 | rjmp didUnstuff4 ;2 [44] |
| 154 | |
| 155 | unstuff5: ;1 (branch taken) |
| 156 | andi x3, ~0x20 ;1 [48] |
| 157 | in x2, USBIN ;1 [49] <-- sample stuffed bit 5 |
| 158 | ori shift, 0x20 ;1 [50] |
| 159 | rjmp didUnstuff5 ;2 [52] |
| 160 | |
| 161 | unstuff6: ;1 (branch taken) |
| 162 | andi x3, ~0x40 ;1 [56] |
| 163 | in x1, USBIN ;1 [57] <-- sample stuffed bit 6 |
| 164 | ori shift, 0x40 ;1 [58] |
| 165 | rjmp didUnstuff6 ;2 [60] |
| 166 | |
| 167 | ; extra jobs done during bit interval: |
| 168 | ; bit 0: store, clear [SE0 is unreliable here due to bit dribbling in hubs] |
| 169 | ; bit 1: se0 check |
| 170 | ; bit 2: overflow check |
| 171 | ; bit 3: recovery from delay [bit 0 tasks took too long] |
| 172 | ; bit 4: none |
| 173 | ; bit 5: none |
| 174 | ; bit 6: none |
| 175 | ; bit 7: jump, eor |
| 176 | rxLoop: |
| 177 | eor x3, shift ;1 [0] reconstruct: x3 is 0 at bit locations we changed, 1 at others |
| 178 | in x1, USBIN ;1 [1] <-- sample bit 0 |
| 179 | st y+, x3 ;2 [3] store data |
| 180 | ser x3 ;1 [4] |
| 181 | nop ;1 [5] |
| 182 | eor x2, x1 ;1 [6] |
| 183 | bst x2, USBMINUS;1 [7] |
| 184 | bld shift, 0 ;1 [8] |
| 185 | in x2, USBIN ;1 [9] <-- sample bit 1 (or possibly bit 0 stuffed) |
| 186 | andi x2, USBMASK ;1 [10] |
| 187 | breq se0 ;1 [11] SE0 check for bit 1 |
| 188 | andi shift, 0xf9 ;1 [12] |
| 189 | didUnstuff0: |
| 190 | breq unstuff0 ;1 [13] |
| 191 | eor x1, x2 ;1 [14] |
| 192 | bst x1, USBMINUS;1 [15] |
| 193 | bld shift, 1 ;1 [16] |
| 194 | rxbit2: |
| 195 | in x1, USBIN ;1 [17] <-- sample bit 2 (or possibly bit 1 stuffed) |
| 196 | andi shift, 0xf3 ;1 [18] |
| 197 | breq unstuff1 ;1 [19] do remaining work for bit 1 |
| 198 | didUnstuff1: |
| 199 | subi cnt, 1 ;1 [20] |
| 200 | brcs overflow ;1 [21] loop control |
| 201 | eor x2, x1 ;1 [22] |
| 202 | bst x2, USBMINUS;1 [23] |
| 203 | bld shift, 2 ;1 [24] |
| 204 | in x2, USBIN ;1 [25] <-- sample bit 3 (or possibly bit 2 stuffed) |
| 205 | andi shift, 0xe7 ;1 [26] |
| 206 | breq unstuff2 ;1 [27] |
| 207 | didUnstuff2: |
| 208 | eor x1, x2 ;1 [28] |
| 209 | bst x1, USBMINUS;1 [29] |
| 210 | bld shift, 3 ;1 [30] |
| 211 | didUnstuff3: |
| 212 | andi shift, 0xcf ;1 [31] |
| 213 | breq unstuff3 ;1 [32] |
| 214 | in x1, USBIN ;1 [33] <-- sample bit 4 |
| 215 | eor x2, x1 ;1 [34] |
| 216 | bst x2, USBMINUS;1 [35] |
| 217 | bld shift, 4 ;1 [36] |
| 218 | didUnstuff4: |
| 219 | andi shift, 0x9f ;1 [37] |
| 220 | breq unstuff4 ;1 [38] |
| 221 | nop2 ;2 [40] |
| 222 | in x2, USBIN ;1 [41] <-- sample bit 5 |
| 223 | eor x1, x2 ;1 [42] |
| 224 | bst x1, USBMINUS;1 [43] |
| 225 | bld shift, 5 ;1 [44] |
| 226 | didUnstuff5: |
| 227 | andi shift, 0x3f ;1 [45] |
| 228 | breq unstuff5 ;1 [46] |
| 229 | nop2 ;2 [48] |
| 230 | in x1, USBIN ;1 [49] <-- sample bit 6 |
| 231 | eor x2, x1 ;1 [50] |
| 232 | bst x2, USBMINUS;1 [51] |
| 233 | bld shift, 6 ;1 [52] |
| 234 | didUnstuff6: |
| 235 | cpi shift, 0x02 ;1 [53] |
| 236 | brlo unstuff6 ;1 [54] |
| 237 | nop2 ;2 [56] |
| 238 | in x2, USBIN ;1 [57] <-- sample bit 7 |
| 239 | eor x1, x2 ;1 [58] |
| 240 | bst x1, USBMINUS;1 [59] |
| 241 | bld shift, 7 ;1 [60] |
| 242 | didUnstuff7: |
| 243 | cpi shift, 0x04 ;1 [61] |
| 244 | brsh rxLoop ;2 [63] loop control |
| 245 | unstuff7: |
| 246 | andi x3, ~0x80 ;1 [63] |
| 247 | ori shift, 0x80 ;1 [64] |
| 248 | in x2, USBIN ;1 [65] <-- sample stuffed bit 7 |
| 249 | nop ;1 [66] |
| 250 | rjmp didUnstuff7 ;2 [68] |
| 251 | |
| 252 | macro POP_STANDARD ; 12 cycles |
| 253 | pop cnt |
| 254 | pop x3 |
| 255 | pop x2 |
| 256 | pop x1 |
| 257 | pop shift |
| 258 | pop YH |
| 259 | endm |
| 260 | macro POP_RETI ; 5 cycles |
| 261 | pop YL |
| 262 | out SREG, YL |
| 263 | pop YL |
| 264 | endm |
| 265 | |
| 266 | #include "asmcommon.inc" |
| 267 | |
| 268 | ;---------------------------------------------------------------------------- |
| 269 | ; Transmitting data |
| 270 | ;---------------------------------------------------------------------------- |
| 271 | |
| 272 | txByteLoop: |
| 273 | txBitloop: |
| 274 | stuffN1Delay: ; [03] |
| 275 | ror shift ;[-5] [11] [59] |
| 276 | brcc doExorN1 ;[-4] [60] |
| 277 | subi x4, 1 ;[-3] |
| 278 | brne commonN1 ;[-2] |
| 279 | lsl shift ;[-1] compensate ror after rjmp stuffDelay |
| 280 | nop ;[00] stuffing consists of just waiting 8 cycles |
| 281 | rjmp stuffN1Delay ;[01] after ror, C bit is reliably clear |
| 282 | |
| 283 | sendNakAndReti: ;0 [-19] 19 cycles until SOP |
| 284 | ldi x3, USBPID_NAK ;1 [-18] |
| 285 | rjmp usbSendX3 ;2 [-16] |
| 286 | sendAckAndReti: ;0 [-19] 19 cycles until SOP |
| 287 | ldi x3, USBPID_ACK ;1 [-18] |
| 288 | rjmp usbSendX3 ;2 [-16] |
| 289 | sendCntAndReti: ;0 [-17] 17 cycles until SOP |
| 290 | mov x3, cnt ;1 [-16] |
| 291 | usbSendX3: ;0 [-16] |
| 292 | ldi YL, 20 ;1 [-15] 'x3' is R20 |
| 293 | ldi YH, 0 ;1 [-14] |
| 294 | ldi cnt, 2 ;1 [-13] |
| 295 | ; rjmp usbSendAndReti fallthrough |
| 296 | |
| 297 | ; USB spec says: |
| 298 | ; idle = J |
| 299 | ; J = (D+ = 0), (D- = 1) or USBOUT = 0x01 |
| 300 | ; K = (D+ = 1), (D- = 0) or USBOUT = 0x02 |
| 301 | ; Spec allows 7.5 bit times from EOP to SOP for replies (= 60 cycles) |
| 302 | |
| 303 | ;usbSend: |
| 304 | ;pointer to data in 'Y' |
| 305 | ;number of bytes in 'cnt' -- including sync byte |
| 306 | ;uses: x1...x2, x4, shift, cnt, Y [x1 = mirror USBOUT, x2 = USBMASK, x4 = bitstuff cnt] |
| 307 | ;Numbers in brackets are time since first bit of sync pattern is sent (start of instruction) |
| 308 | usbSendAndReti: |
| 309 | in x2, USBDDR ;[-12] 12 cycles until SOP |
| 310 | ori x2, USBMASK ;[-11] |
| 311 | sbi USBOUT, USBMINUS ;[-10] prepare idle state; D+ and D- must have been 0 (no pullups) |
| 312 | out USBDDR, x2 ;[-8] <--- acquire bus |
| 313 | in x1, USBOUT ;[-7] port mirror for tx loop |
| 314 | ldi shift, 0x40 ;[-6] sync byte is first byte sent (we enter loop after ror) |
| 315 | ldi x2, USBMASK ;[-5] |
| 316 | push x4 ;[-4] |
| 317 | doExorN1: |
| 318 | eor x1, x2 ;[-2] [06] [62] |
| 319 | ldi x4, 6 ;[-1] [07] [63] |
| 320 | commonN1: |
| 321 | stuffN2Delay: |
| 322 | out USBOUT, x1 ;[00] [08] [64] <--- set bit |
| 323 | ror shift ;[01] |
| 324 | brcc doExorN2 ;[02] |
| 325 | subi x4, 1 ;[03] |
| 326 | brne commonN2 ;[04] |
| 327 | lsl shift ;[05] compensate ror after rjmp stuffDelay |
| 328 | rjmp stuffN2Delay ;[06] after ror, C bit is reliably clear |
| 329 | doExorN2: |
| 330 | eor x1, x2 ;[04] [12] |
| 331 | ldi x4, 6 ;[05] [13] |
| 332 | commonN2: |
| 333 | nop ;[06] [14] |
| 334 | subi cnt, 171 ;[07] [15] trick: (3 * 171) & 0xff = 1 |
| 335 | out USBOUT, x1 ;[08] [16] <--- set bit |
| 336 | brcs txBitloop ;[09] [25] [41] |
| 337 | |
| 338 | stuff6Delay: |
| 339 | ror shift ;[42] [50] |
| 340 | brcc doExor6 ;[43] |
| 341 | subi x4, 1 ;[44] |
| 342 | brne common6 ;[45] |
| 343 | lsl shift ;[46] compensate ror after rjmp stuffDelay |
| 344 | nop ;[47] stuffing consists of just waiting 8 cycles |
| 345 | rjmp stuff6Delay ;[48] after ror, C bit is reliably clear |
| 346 | doExor6: |
| 347 | eor x1, x2 ;[45] [53] |
| 348 | ldi x4, 6 ;[46] |
| 349 | common6: |
| 350 | stuff7Delay: |
| 351 | ror shift ;[47] [55] |
| 352 | out USBOUT, x1 ;[48] <--- set bit |
| 353 | brcc doExor7 ;[49] |
| 354 | subi x4, 1 ;[50] |
| 355 | brne common7 ;[51] |
| 356 | lsl shift ;[52] compensate ror after rjmp stuffDelay |
| 357 | rjmp stuff7Delay ;[53] after ror, C bit is reliably clear |
| 358 | doExor7: |
| 359 | eor x1, x2 ;[51] [59] |
| 360 | ldi x4, 6 ;[52] |
| 361 | common7: |
| 362 | ld shift, y+ ;[53] |
| 363 | tst cnt ;[55] |
| 364 | out USBOUT, x1 ;[56] <--- set bit |
| 365 | brne txByteLoop ;[57] |
| 366 | |
| 367 | ;make SE0: |
| 368 | cbr x1, USBMASK ;[58] prepare SE0 [spec says EOP may be 15 to 18 cycles] |
| 369 | lds x2, usbNewDeviceAddr;[59] |
| 370 | lsl x2 ;[61] we compare with left shifted address |
| 371 | subi YL, 2 + 20 ;[62] Only assign address on data packets, not ACK/NAK in x3 |
| 372 | sbci YH, 0 ;[63] |
| 373 | out USBOUT, x1 ;[00] <-- out SE0 -- from now 2 bits = 16 cycles until bus idle |
| 374 | ;2006-03-06: moved transfer of new address to usbDeviceAddr from C-Code to asm: |
| 375 | ;set address only after data packet was sent, not after handshake |
| 376 | breq skipAddrAssign ;[01] |
| 377 | sts usbDeviceAddr, x2 ; if not skipped: SE0 is one cycle longer |
| 378 | skipAddrAssign: |
| 379 | ;end of usbDeviceAddress transfer |
| 380 | ldi x2, 1<<USB_INTR_PENDING_BIT;[03] int0 occurred during TX -- clear pending flag |
| 381 | USB_STORE_PENDING(x2) ;[04] |
| 382 | ori x1, USBIDLE ;[05] |
| 383 | in x2, USBDDR ;[06] |
| 384 | cbr x2, USBMASK ;[07] set both pins to input |
| 385 | mov x3, x1 ;[08] |
| 386 | cbr x3, USBMASK ;[09] configure no pullup on both pins |
| 387 | pop x4 ;[10] |
| 388 | nop2 ;[12] |
| 389 | nop2 ;[14] |
| 390 | out USBOUT, x1 ;[16] <-- out J (idle) -- end of SE0 (EOP signal) |
| 391 | out USBDDR, x2 ;[17] <-- release bus now |
| 392 | out USBOUT, x3 ;[18] <-- ensure no pull-up resistors are active |
| 393 | rjmp doReturn |