/* Mednafen - Multi-system Emulator * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "psx.h" #include "mdec.h" #include "cdc.h" #include "spu.h" #include "../state_helpers.h" #include "../pgxp/pgxp_mem.h" /* Notes: Channel 4(SPU): Write: Doesn't seem to work properly with CHCR=0x01000001 Hung when CHCR=0x11000601 Channel 6: DMA hangs if D28 of CHCR is 0? D1 did not have an apparent effect. */ enum { CH_MDEC_IN = 0, CH_MDEC_OUT = 1, CH_GPU = 2, CH_CDC = 3, CH_SPU = 4, CH_FIVE = 5, CH_OT = 6 }; extern int32_t EventCycles; static int32_t DMACycleCounter; static uint32_t DMAControl; /* DMA control register */ static uint32_t DMAIntControl; static uint8_t DMAIntStatus; static bool IRQOut; /* IRQ enable for individual channels */ struct Channel { uint32_t BaseAddr; uint32_t BlockControl; uint32_t ChanControl; uint32_t CurAddr; uint16_t WordCounter; int32_t ClockCounter; }; static Channel DMACH[7]; static int32_t lastts; static const char *PrettyChannelNames[7] = { "MDEC IN", "MDEC OUT", "GPU", "CDC", "SPU", "PIO", "OTC" }; void DMA_Init(void) { } void DMA_Kill(void) { } static INLINE void RecalcIRQOut(void) { bool irqo = (bool)DMAIntStatus; irqo &= (DMAIntControl >> 23) & 1; irqo |= (DMAIntControl >> 15) & 1; IRQOut = irqo; ::IRQ_Assert(IRQ_DMA, irqo); } void DMA_ResetTS(void) { lastts = 0; } void DMA_Power(void) { lastts = 0; memset(DMACH, 0, sizeof(DMACH)); DMACycleCounter = EventCycles; DMAControl = 0; DMAIntControl = 0; DMAIntStatus = 0; RecalcIRQOut(); } static INLINE bool ChCan(const unsigned ch, const uint32_t CRModeCache) { switch(ch) { case CH_MDEC_IN: return(MDEC_DMACanWrite()); case CH_MDEC_OUT: return(MDEC_DMACanRead()); case CH_GPU: if(CRModeCache & 0x1) return(GPU_DMACanWrite()); case CH_CDC: case CH_SPU: return(true); case CH_FIVE: return(false); case CH_OT: return((bool)(DMACH[ch].ChanControl & (1U << 28))); } /* should not happen */ return false; } static void RecalcHalt(void) { bool Halt = false; unsigned ch = 0; unsigned tmp = 0; for(ch = 0; ch < 7; ch++) { if(DMACH[ch].ChanControl & (1U << 24)) { if(!(DMACH[ch].ChanControl & (7U << 8))) { if(DMACH[ch].WordCounter > 0) { Halt = true; break; } } } } #if 0 if((DMACH[0].WordCounter || (DMACH[0].ChanControl & (1 << 24))) && (DMACH[0].ChanControl & 0x200) /*&& MDEC_DMACanWrite()*/) Halt = true; if((DMACH[1].WordCounter || (DMACH[1].ChanControl & (1 << 24))) && (DMACH[1].ChanControl & 0x200) && (DMACH[1].WordCounter || MDEC_DMACanRead())) Halt = true; if((DMACH[2].WordCounter || (DMACH[2].ChanControl & (1 << 24))) && (DMACH[2].ChanControl & 0x200) && ((DMACH[2].ChanControl & 0x1) && (DMACH[2].WordCounter || GPU_DMACanWrite()))) Halt = true; if((DMACH[3].WordCounter || (DMACH[3].ChanControl & (1 << 24))) && !(DMACH[3].ChanControl & 0x100)) Halt = true; if(DMACH[6].WordCounter || (DMACH[6].ChanControl & (1 << 24))) Halt = true; #endif //printf("Halt: %d\n", Halt); if(!Halt && (DMACH[2].ChanControl & (1U << 24)) && ((DMACH[2].ChanControl & 0x700) == 0x200) && ChCan(2, DMACH[2].ChanControl)) { tmp = DMACH[2].BlockControl & 0xFFFF; if(tmp > 0) tmp--; } PSX_SetDMACycleSteal(tmp); PSX_CPU->SetHalt(Halt); } static INLINE void ChRW(const unsigned ch, const uint32_t CRModeCache, const uint32_t addr, uint32_t *V, uint32_t *offset) { unsigned extra_cyc_overhead = 0; switch(ch) { case CH_MDEC_IN: if(CRModeCache & 0x1) MDEC_DMAWrite(*V); else *V = 0; break; case CH_MDEC_OUT: if(CRModeCache & 0x1) { } else *V = MDEC_DMARead(offset); break; case CH_GPU: if(CRModeCache & 0x1) GPU_WriteDMA(*V, addr); else *V = GPU_ReadDMA(); break; case CH_CDC: // 0x1f801018 affects CDC DMA timing. #if 0 if(CRModeCache & 0x100) // For CDC DMA(at least): When this bit is set, DMA controller doesn't appear to hog the (RAM?) bus. { if(CRModeCache & 0x00400000) // For CDC DMA(at least): When this bit is set, DMA controller appears to get even less bus time(or has a lower priority?) { DMACH[ch].ClockCounter -= 44 * 20 / 12; } else { DMACH[ch].ClockCounter -= 29 * 20 / 12; } } else { DMACH[ch].ClockCounter -= 23 * 20 / 12; // (23 + 1) = 24. (Though closer to 24.5 or 24.4 on average per tests on a PS1) } #endif if(CRModeCache & 0x1) { } else { extra_cyc_overhead = 8; // FIXME: Test. *V = PSX_CDC->DMARead(); // Note: Legend of Mana's opening movie is sensitive to DMA timing, including CDC. } break; case CH_SPU: // 0x1f801014 affects SPU DMA timing. // Wild conjecture about 0x1f801014: // // & 0x0000000F // & 0x000001E0 --- Used if (& 0x20000000) == 0? // & 0x00001000 --- Double total bus cycle time if value == 0? // & 0x0f000000 --- (value << 1) 33MHz cycles, bus cycle extension(added to 4?)? // & 0x20000000 --- // // // TODO?: SPU DMA will "complete" much faster if there's a mismatch between the CHCR read/write mode bit and the SPU control register DMA mode. // // // Investigate: SPU DMA doesn't seem to work right if the value written to 0x1F801DAA doesn't have the upper bit set to 1(0x8000) on a PS1. extra_cyc_overhead = 47; // Should be closer to 69, average, but actual timing is...complicated. if(CRModeCache & 0x1) PSX_SPU->WriteDMA(*V); else *V = PSX_SPU->ReadDMA(); break; case CH_FIVE: if(CRModeCache & 0x1) { } else { *V = 0; } break; case CH_OT: if(DMACH[ch].WordCounter == 1) *V = 0xFFFFFF; else *V = (DMACH[ch].CurAddr - 4) & 0x1FFFFF; break; } // GROSS APPROXIMATION, shoehorning multiple effects together, TODO separate(especially SPU and CDC) DMACH[ch].ClockCounter -= std::max(extra_cyc_overhead, (CRModeCache & 0x100) ? 7 : 0); } static INLINE void RunChannelI(const unsigned ch, const uint32_t CRModeCache, int32_t clocks) { } static INLINE void RunChannel(int32_t timestamp, int32_t clocks, int ch) { // Mask out the bits that the DMA controller will modify during the course of operation. uint32_t CRModeCache = DMACH[ch].ChanControl &~(0x11 << 24); uint32_t crmodecache = CRModeCache; switch(ch) { case 0: if(MDFN_LIKELY(CRModeCache == 0x00000201)) crmodecache = 0x00000201; break; case 1: if(MDFN_LIKELY(CRModeCache == 0x00000200)) crmodecache = 0x00000200; break; case 2: switch (CRModeCache) { case 0x00000401: case 0x00000201: case 0x00000200: crmodecache = CRModeCache; break; } break; case 3: switch (CRModeCache) { case 0x00000000: case 0x00000100: crmodecache = CRModeCache; break; } break; case 4: switch (CRModeCache) { case 0x00000201: case 0x00000200: crmodecache = CRModeCache; break; } break; case 6: if(MDFN_LIKELY(CRModeCache == 0x00000002)) crmodecache = 0x00000002; break; } // // Remember to handle an end condition on the same iteration of the while(DMACH[ch].ClockCounter > 0) loop that caused it, // otherwise RecalcHalt() might take the CPU out of a halted state before the end-of-DMA is signaled(especially a problem considering our largeish // DMA update timing granularity). if (ch >= 0 && ch <= 6) { CRModeCache = crmodecache; DMACH[ch].ClockCounter += clocks; while(MDFN_LIKELY(DMACH[ch].ClockCounter > 0)) { if(DMACH[ch].WordCounter == 0) // Begin WordCounter reload. { if(!(DMACH[ch].ChanControl & (1 << 24))) // Needed for the forced-DMA-stop kludge(see DMA_Write()). break; if(!ChCan(ch, CRModeCache)) break; DMACH[ch].CurAddr = DMACH[ch].BaseAddr; if(CRModeCache & (1U << 10)) { uint32_t header; if(MDFN_UNLIKELY(DMACH[ch].CurAddr & 0x800000)) { DMACH[ch].ChanControl &= ~(0x11 << 24); DMAIntControl |= 0x8000; RecalcIRQOut(); break; } header = MainRAM->ReadU32(DMACH[ch].CurAddr & 0x1FFFFC); DMACH[ch].CurAddr = (DMACH[ch].CurAddr + 4) & 0xFFFFFF; DMACH[ch].WordCounter = header >> 24; DMACH[ch].BaseAddr = header & 0xFFFFFF; // printf to debug Soul Reaver ;) //if(DMACH[ch].WordCounter > 0x10) // printf("What the lala? 0x%02x @ 0x%08x\n", DMACH[ch].WordCounter, DMACH[ch].CurAddr - 4); if(DMACH[ch].WordCounter) DMACH[ch].ClockCounter -= 15; else DMACH[ch].ClockCounter -= 10; goto SkipPayloadStuff; // 3 cheers for gluten-free spaghetticode(necessary because the newly-loaded WordCounter might be 0, and we actually // want 0 to mean 0 and not 65536 in this context)! } else { DMACH[ch].WordCounter = DMACH[ch].BlockControl & 0xFFFF; if(CRModeCache & (1U << 9)) { if(ch == 2) // Technically should apply to all channels, but since we don't implement CPU read penalties for channels other than 2 yet, it's like this to avoid making DMA longer than what games can handle. DMACH[ch].ClockCounter -= 7; DMACH[ch].BlockControl = (DMACH[ch].BlockControl & 0xFFFF) | ((DMACH[ch].BlockControl - (1U << 16)) & 0xFFFF0000); } } } // End WordCounter reload. else if(CRModeCache & 0x100) // BLARGH BLARGH FISHWHALE { //printf("LoadWC: %u(oldWC=%u)\n", DMACH[ch].BlockControl & 0xFFFF, DMACH[ch].WordCounter); DMACH[ch].CurAddr = DMACH[ch].BaseAddr; DMACH[ch].WordCounter = DMACH[ch].BlockControl & 0xFFFF; } // Do the payload read/write { uint32_t vtmp; uint32_t voffs = 0; if(MDFN_UNLIKELY(DMACH[ch].CurAddr & 0x800000)) { DMACH[ch].ChanControl &= ~(0x11 << 24); DMAIntControl |= 0x8000; RecalcIRQOut(); break; } if(CRModeCache & 0x1) vtmp = MainRAM->ReadU32(DMACH[ch].CurAddr & 0x1FFFFC); //iCB: Pass address of memory for GPU ChRW(ch, CRModeCache, DMACH[ch].CurAddr, &vtmp, &voffs); if(!(CRModeCache & 0x1)) { MainRAM->WriteU32((DMACH[ch].CurAddr + (voffs << 2)) & 0x1FFFFC, vtmp); #ifdef HAVE_LIGHTREC PSX_CPU->lightrec_plugin_clear((DMACH[ch].CurAddr + (voffs << 2)) & 0x1FFFFC, 1); #endif } } if(CRModeCache & 0x2) DMACH[ch].CurAddr = (DMACH[ch].CurAddr - 4) & 0xFFFFFF; else DMACH[ch].CurAddr = (DMACH[ch].CurAddr + 4) & 0xFFFFFF; DMACH[ch].WordCounter--; DMACH[ch].ClockCounter--; SkipPayloadStuff: ; if(CRModeCache & 0x100) // BLARGH BLARGH WHALEFISH { DMACH[ch].BaseAddr = DMACH[ch].CurAddr; DMACH[ch].BlockControl = (DMACH[ch].BlockControl & 0xFFFF0000) | DMACH[ch].WordCounter; //printf("SaveWC: %u\n", DMACH[ch].WordCounter); } // // Handle channel end condition: // if(DMACH[ch].WordCounter == 0) { bool ChannelEndTC = false; if(!(DMACH[ch].ChanControl & (1 << 24))) // Needed for the forced-DMA-stop kludge(see DMA_Write()). break; switch((CRModeCache >> 9) & 0x3) { case 0x0: ChannelEndTC = true; break; case 0x1: DMACH[ch].BaseAddr = DMACH[ch].CurAddr; if((DMACH[ch].BlockControl >> 16) == 0) ChannelEndTC = true; break; case 0x2: case 0x3: // Not sure about 0x3. if(DMACH[ch].BaseAddr == 0xFFFFFF) ChannelEndTC = true; break; } if(ChannelEndTC) { DMACH[ch].ChanControl &= ~(0x11 << 24); if(DMAIntControl & (1U << (16 + ch))) { DMAIntStatus |= 1U << ch; RecalcIRQOut(); } break; } } } if(DMACH[ch].ClockCounter > 0) DMACH[ch].ClockCounter = 0; } } static INLINE int32_t CalcNextEvent(int32_t next_event) { if(DMACycleCounter < next_event) next_event = DMACycleCounter; overclock_device_to_cpu(next_event); return(next_event); } int32_t DMA_Update(const int32_t timestamp) { int32_t clocks, i; // uint32_t dc = (DMAControl >> (ch * 4)) & 0xF; clocks = timestamp - lastts; overclock_cpu_to_device(clocks); lastts = timestamp; GPU_Update(timestamp); MDEC_Run(clocks); for (i = 0; i < 7; i++) RunChannel(timestamp, clocks, i); DMACycleCounter -= clocks; while(DMACycleCounter <= 0) DMACycleCounter += EventCycles; RecalcHalt(); return (timestamp + CalcNextEvent(0x10000000)); } void DMA_Write(const int32_t timestamp, uint32_t A, uint32_t V) { bool will_set_event = false; int ch = (A & 0x7F) >> 4; //if(ch == 2 || ch == 7) //PSX_WARNING("[DMA] Write: %08x %08x, DMAIntStatus=%08x", A, V, DMAIntStatus); // FIXME if we ever have "accurate" bus emulation V <<= (A & 3) * 8; DMA_Update(timestamp); switch(A & 0xC) { case 0x0: if (ch == 7) { DMAControl = V; RecalcHalt(); } else { DMACH[ch].BaseAddr = V & 0xFFFFFF; will_set_event = true; } break; case 0x4: if (ch == 7) { DMAIntControl = V & 0x00ff803f; DMAIntStatus &= ~(V >> 24); RecalcIRQOut(); } else { DMACH[ch].BlockControl = V; will_set_event = true; } break; case 0xC: case 0x8: if (ch != 7) { uint32_t OldCC = DMACH[ch].ChanControl; //printf("CHCR: %u, %08x --- 0x%08x\n", ch, V, DMACH[ch].BlockControl); // // Kludge for DMA timing granularity and other issues. Needs to occur before setting all bits of ChanControl to the new value, to accommodate the // case of a game cancelling DMA and changing the type of DMA(read/write, etc.) at the same time. // if((DMACH[ch].ChanControl & (1 << 24)) && !(V & (1 << 24))) { DMACH[ch].ChanControl &= ~(1 << 24); // Clear bit before RunChannel(), so it will only finish the block it's on at most. RunChannel(timestamp, 128 * 16, ch); DMACH[ch].WordCounter = 0; #if 0 // TODO(maybe, need to work out worst-case performance for abnormally/brokenly large block sizes) DMACH[ch].ClockCounter = (1 << 30); RunChannel(timestamp, 1, ch); DMACH[ch].ClockCounter = 0; PSX_WARNING("[DMA] Forced stop for channel %d -- scanline=%d", ch, GPU_GetScanlineNum()); MDFND_DispMessage(3, RETRO_LOG_ERROR, RETRO_MESSAGE_TARGET_ALL, RETRO_MESSAGE_TYPE_NOTIFICATION_ALT, "[DMA] Forced stop for channel %d", ch); #endif } if(ch == 6) DMACH[ch].ChanControl = (V & 0x51000000) | 0x2; else DMACH[ch].ChanControl = V & 0x71770703; if(!(OldCC & (1 << 24)) && (V & (1 << 24))) { //if(ch == 0 || ch == 1) // PSX_WARNING("[DMA] Started DMA for channel=%d --- CHCR=0x%08x --- BCR=0x%08x --- scanline=%d", ch, DMACH[ch].ChanControl, DMACH[ch].BlockControl, GPU_GetScanlineNum()); DMACH[ch].WordCounter = 0; DMACH[ch].ClockCounter = 0; // // Viewpoint starts a short MEM->GPU LL DMA and apparently has race conditions that can cause a crash if it doesn't finish almost immediately( // or at least very quickly, which the current DMA granularity has issues with, so run the channel ahead a bit to take of this issue and potentially // games with similar issues). // // Though, Viewpoint isn't exactly a good game, so maybe we shouldn't bother? ;) // // Also, it's needed for RecalcHalt() to work with some semblance of workiness. // RunChannel(timestamp, EventCycles/2, ch); //std::max(128 - DMACycleCounter, 1)); //64); //1); //128 - DMACycleCounter); } RecalcHalt(); will_set_event = true; break; } default: PSX_WARNING("[DMA] Unknown write: %08x %08x", A, V); break; } if (will_set_event) PSX_SetEventNT(PSX_EVENT_DMA, timestamp + CalcNextEvent(0x10000000)); } uint32_t DMA_Read(const int32_t timestamp, uint32_t A) { int ch = (A & 0x7F) >> 4; uint32_t ret = 0; switch(A & 0xC) { case 0x0: if (ch == 7) ret = DMAControl; else ret = DMACH[ch].BaseAddr; break; case 0x4: if (ch == 7) ret = DMAIntControl | (DMAIntStatus << 24) | (IRQOut << 31); else ret = DMACH[ch].BlockControl; break; case 0xC: case 0x8: if (ch != 7) { ret = DMACH[ch].ChanControl; break; } default: PSX_WARNING("[DMA] Unknown read: %08x", A); break; } return (ret >> ((A & 3) * 8)); } #define SFDMACH(n) SFVARN(DMACH[n].BaseAddr, #n "BaseAddr"), \ SFVARN(DMACH[n].BlockControl, #n "BlockControl"), \ SFVARN(DMACH[n].ChanControl, #n "ChanControl"), \ SFVARN(DMACH[n].CurAddr, #n "CurAddr"), \ SFVARN(DMACH[n].WordCounter, #n "WordCounter"), \ SFVARN(DMACH[n].ClockCounter, #n "ClockCounter") int DMA_StateAction(StateMem *sm, int load, int data_only) { SFORMAT StateRegs[] = { SFVAR(DMACycleCounter), SFVAR(DMAControl), SFVAR(DMAIntControl), SFVAR(DMAIntStatus), SFVAR(IRQOut), SFDMACH(0), SFDMACH(1), SFDMACH(2), SFDMACH(3), SFDMACH(4), SFDMACH(5), SFDMACH(6), SFEND }; int ret = MDFNSS_StateAction(sm, load, data_only, StateRegs, "DMA"); if(load) { } return(ret); }