<?xml version="1.0" encoding="UTF-8"?><xml><records><record><source-app name="Biblio" version="7.x">Drupal-Biblio</source-app><ref-type>47</ref-type><contributors><authors><author><style face="normal" font="default" size="100%">Ziqiang Huang</style></author><author><style face="normal" font="default" size="100%">Andrew D. Hilton</style></author><author><style face="normal" font="default" size="100%">Benjamin C. Lee</style></author></authors></contributors><titles><title><style face="normal" font="default" size="100%">Decoupling Loads for Nano-Instruction Set Computers</style></title><secondary-title><style face="normal" font="default" size="100%">Proceedings of the 43rd International Symposium on Computer Architecture</style></secondary-title></titles><dates><year><style  face="normal" font="default" size="100%">2016</style></year></dates><urls><web-urls><url><style face="normal" font="default" size="100%">https://dl.acm.org/citation.cfm?id=3001181</style></url></web-urls></urls><volume><style face="normal" font="default" size="100%">44</style></volume><pages><style face="normal" font="default" size="100%">406-417</style></pages><language><style face="normal" font="default" size="100%">eng</style></language><abstract><style face="normal" font="default" size="100%">We propose an ISA extension that decouples the data access and register write operations in a load instruction. We describe system and hardware support for decoupled loads. Furthermore, we show how compilers can generate better static instruction schedules by hoisting a decoupled load's data access above may-alias stores and branches. We find that decoupled loads improve performance with geometric mean speedups of 8.4%.</style></abstract></record></records></xml>