Publications

The materials presented on this page is to ensure timely dissemination of scholarly and technical work. Copyright and all rights therein are retained by authors or by other copyright holders. All persons copying this information are expected to adhere to the terms and constraints invoked by each author's copyright. In most cases, these works may not be reposted without the explicit permission of the copyright holder.

Underlined names are students advised by me.

BibBase https://danielwong.org/publication.bib

generated by

Group by
- Year
- Author
- Type
- Keyword
- Downloads

2024 (1)

[ISPASS'24] Characterizing In-Kernel Observability of Latency-Sensitive Request-level Metrics with eBPF.
Mohammadreza Rezvani, Ali Jahanshahi, & Daniel Wong. In Proceedings of the 2024 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), 2024. (Best Paper Nominee) to appear

link bibtex

@inproceedings{rezvani2024,
  title={[ISPASS'24] Characterizing In-Kernel Observability of Latency-Sensitive Request-level Metrics with eBPF},
  author={Rezvani</u>, <br/> <u>Mohammadreza and Jahanshahi</u>, <u>Ali and Wong</b>, <b>Daniel},
  booktitle={Proceedings of the 2024 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)}, 
  year={2024},
  note={<b>(Best Paper Nominee)</b> <small>to appear</small><br/>},
}

2023 (4)

[HPCA'23] KRISP: Enabling Kernel-wise Right-sizing for Spatial Partitioned GPU Inference Servers.
Marcus Chow, Ali Jahanshahi, & Daniel Wong. In Proceedings of the 29th IEEE International Symposium on High Performance Computer Architecture (HPCA), 2023. (Acceptance Rate: 25.0%)

[HPCA'23] KRISP: Enabling Kernel-wise Right-sizing for Spatial Partitioned GPU Inference Servers [pdf]

paper link bibtex

@inproceedings{chow2023krisp,
  title={[HPCA'23] KRISP: Enabling Kernel-wise Right-sizing for Spatial Partitioned GPU Inference Servers},
  author={Chow</u>, <br/> <u>Marcus and Jahanshahi</u>, <u>Ali and Wong</b>, <b>Daniel},
  booktitle={Proceedings of the 29th IEEE International Symposium on High Performance Computer Architecture (HPCA)}, 
  year={2023},
  note={<small>(Acceptance Rate: 25.0\%)</small><br/>},
  url_Paper = {https://danielwong.org/files/KRISP-HPCA2023.pdf}
}

[IGSC'23] CoFRIS: Coordinated Frequency and Resource Scaling for GPU Inference Servers.
Marcus Chow, & Daniel Wong. In Proceedings of the 14th International Green and Sustainable Computing Conference (IGSC), 2023.

[IGSC'23] CoFRIS: Coordinated Frequency and Resource Scaling for GPU Inference Servers [pdf]

paper link bibtex

@inproceedings{chow2023igsc,
  title={[IGSC'23] CoFRIS: Coordinated Frequency and Resource Scaling for GPU Inference Servers},
  author={Chow</u>, <br/> <u>Marcus and Wong</b>, <b>Daniel},
  booktitle={Proceedings of the 14th International Green and Sustainable Computing Conference (IGSC)}, 
  year={2023},
  url_Paper = {https://danielwong.org/files/CoFRIS-IGSC23.pdf}
}

[IGSC'23] WattWiser: Power Resource-Efficient Scheduling for Multi-Model Multi-GPU Inference Servers.
Ali Jahanshahi, Mohammadreza Rezvani, & Daniel Wong. In Proceedings of the 14th International Green and Sustainable Computing Conference (IGSC), 2023.

[IGSC'23] WattWiser: Power Resource-Efficient Scheduling for Multi-Model Multi-GPU Inference Servers [pdf]

paper link bibtex

@inproceedings{ali2023igsc,
  title={[IGSC'23] WattWiser: Power Resource-Efficient Scheduling for Multi-Model Multi-GPU Inference Servers},
  author={Jahanshahi</u>, <br/> <u>Ali and Rezvani</u>, <u>Mohammadreza and Wong</b>, <b>Daniel},
  booktitle={Proceedings of the 14th International Green and Sustainable Computing Conference (IGSC)}, 
  year={2023},
  url_Paper = {https://danielwong.org/files/WattWiser-IGSC23.pdf}
}

[AI4Dev'23] VSCuda: LLM based CUDA extension for Visual Studio Code.
Brian Chen, Nafis Mustakin, Alvin Hoang, Sakib Fuad, & Daniel Wong. In First Workshop on AI Assisted Software Development for HPC (AI4Dev), 2023.

[AI4Dev'23] VSCuda: LLM based CUDA extension for Visual Studio Code [link]

paper link bibtex

@inproceedings{vscuda,
  title={[AI4Dev'23] VSCuda: LLM based CUDA extension for Visual Studio Code},
  author={Chen</u>, <br/> <u>Brian and Mustakin</u>, <u>Nafis and Hoang</u>, <u>Alvin and Fuad, Sakib and Wong</b>, <b>Daniel},
  booktitle={First Workshop on AI Assisted Software Development for HPC (AI4Dev)}, 
  year={2023},
  url_Paper = {https://dl.acm.org/doi/abs/10.1145/3624062.3624064}
}

2022 (3)

[ACM TACO'22] PowerMorph: QoS-Aware Server Power Reshaping For Data Center Regulation Service.
Ali Jahanshahi, Nanpeng Yu, & Daniel Wong. ACM Transactions on Architecture and Code Optimization (TACO), Volume 19(Issue 3): 1–27. September 2022.

[ACM TACO'22] PowerMorph: QoS-Aware Server Power Reshaping For Data Center Regulation Service [pdf]

paper link bibtex

@article{jahanshahi2022powermorph,
  title={[ACM TACO'22] {PowerMorph}: QoS-Aware Server Power Reshaping For Data Center Regulation Service},
  author={Jahanshahi</u>, <br/> <u>Ali and Yu, Nanpeng and Wong</b>, <b>Daniel},
  journal={ACM Transactions on Architecture and Code Optimization (TACO)},
  volume={Volume 19},
  number={Issue 3},
  pages={1--27},
  year={2022},
  month={September},
  url_Paper = {https://danielwong.org/iles/Powermorph-TACO.pdf},
  note={<br/>}
}

[ISPASS'22] GPUCalorie: Floorplan Estimation for GPU Thermal Evaluation.
Marcus Chow, Ali Jahanshahi, Ana Cardenas Beltran, Sheldon Tan, & Daniel Wong. In Proceedings of the IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), 2022. (Poster)

link bibtex

@inproceedings{chow2022gpucalorie,
  title={[ISPASS'22] GPUCalorie: Floorplan Estimation for GPU Thermal Evaluation},
  author={Chow</u>, <br/> <u>Marcus and Jahanshahi</u>, <u>Ali and Beltran</u>, <u>Ana Cardenas and Tan, Sheldon and Wong</b>, <b>Daniel},
  booktitle={Proceedings of the IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)},
  year={2022},
  note={<small>(Poster)</small><br/>}
}

[GPGPU'22] Scaleserve: A Scalable Multi-GPU Machine Learning Inference System And Benchmarking Suite.
Ali Jahanshahi, Marcus Chow, & Daniel Wong. In Proceedings of the 14th Workshop on General Purpose Processing Using GPU (GPGPU), 2022. (Short paper)

link bibtex

@inproceedings{jahanshahi2022scaleserve,
  title={[GPGPU'22] Scaleserve: A Scalable Multi-GPU Machine Learning Inference System And Benchmarking Suite},
  author={Jahanshahi</u>, <br/><u>Ali and Chow</u>, <u>Marcus and Wong</b>, <b>Daniel},
  booktitle={Proceedings of the 14th Workshop on General Purpose Processing Using GPU (GPGPU)},
  year={2022},
  note={<small>(Short paper)</small><br/>}
}

2021 (7)

[ISCA'21] BlockMaestro: Enabling Programmer-Transparent Task-Based Execution In GPU Systems.
AmirAli Abdolrashidi, Hodjat Asghari Esfeden, Ali Jahanshahi, Kaustubh Singh, Nael Abu-Ghazaleh, & Daniel Wong. In Proceedings of the 48th ACM/IEEE International Symposium on Computer Architecture (ISCA), 2021. (Acceptance Rate: 18.7%)

[ISCA'21] BlockMaestro: Enabling Programmer-Transparent Task-Based Execution In GPU Systems [pdf]

paper link bibtex

@inproceedings{abdolrashidi2021blockmaestro,
  title={[ISCA'21] {BlockMaestro}: Enabling Programmer-Transparent Task-Based Execution In GPU Systems},
  author={Abdolrashidi</u>, <br/> <u>AmirAli and Esfeden, Hodjat Asghari and Jahanshahi</u>, <u>Ali and Singh</u>, <u>Kaustubh and Abu-Ghazaleh, Nael and Wong</b>, <b>Daniel},
  booktitle={Proceedings of the 48th ACM/IEEE International Symposium on Computer Architecture (ISCA)},
  year={2021},
  note={<small>(Acceptance Rate: 18.7\%)</small><br/>},
  url_Paper = {files/Blockmaestro-ISCA2021.pdf}
}

[SC'21] MAPA: Multi-Accelerator Pattern Allocation Policy For Multi-Tenant GPU Servers.
Kiran Ranganath, Joshua D Suetterlein, Joseph B Manzano, Shuaiwen Leon Song, & Daniel Wong. In Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC), 2021. (Acceptance Rate: 26.8%)

[SC'21] MAPA: Multi-Accelerator Pattern Allocation Policy For Multi-Tenant GPU Servers [pdf]

paper link bibtex

@inproceedings{ranganath2021mapa,
  title={[SC'21] {MAPA}: Multi-Accelerator Pattern Allocation Policy For Multi-Tenant GPU Servers},
  author={Ranganath</u>, <br/> <u>Kiran and Suetterlein, Joshua D and Manzano, Joseph B and Song, Shuaiwen Leon and Wong</b>, <b>Daniel},
  booktitle={Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC)},
  year={2021},
  note={<small>(Acceptance Rate: 26.8\%)</small><br/>},
  url_Paper = {files/MAPA-SC2021.pdf}
}

[ACM TACO'21] PAVER: Locality Graph-Based Thread Block Scheduling For GPUs.
Devashree Tripathy, Amirali Abdolrashidi, Laxmi Narayan Bhuyan, Liang Zhou, & Daniel Wong. ACM Transactions on Architecture and Code Optimization (TACO), Volume 18(Issue 3): 1–26. June 2021.

[ACM TACO'21] PAVER: Locality Graph-Based Thread Block Scheduling For GPUs [pdf]

paper link bibtex

@article{tripathy2021paver,
  title={[ACM TACO'21] {PAVER}: Locality Graph-Based Thread Block Scheduling For GPUs},
  author={Tripathy</u>, <br/><u>Devashree and Abdolrashidi</u>, <u>Amirali and Bhuyan, Laxmi Narayan and Zhou, Liang and Wong</b>, <b>Daniel},
  journal={ACM Transactions on Architecture and Code Optimization (TACO)},
  volume={Volume 18},
  number={Issue 3},
  pages={1--26},
  year={2021},
  month={June},
  publisher={ACM},
  url_Paper = {files/PAVER-TACO.pdf},
  note={<br/>}
}

[NAS'21] LocalityGuru: A Ptx Analyzer For Extracting Thread Block-Level Locality In GPGPUs.
Devashree Tripathy, Amirali Abdolrashidi, Quan Fan, Daniel Wong, & Manoranjan Satpathy. In Proceedings of the 15th IEEE International Conference on Networking, Architecture and Storage (NAS), 2021.
link bibtex

@inproceedings{tripathy2021localityguru,
  title={[NAS'21] {LocalityGuru}: A Ptx Analyzer For Extracting Thread Block-Level Locality In GPGPUs},
  author={Tripathy</u>, <br/><u>Devashree and Abdolrashidi</u>, <u>Amirali and Fan, Quan and Wong</b>, <b>Daniel and Satpathy, Manoranjan},
  booktitle={Proceedings of the 15th IEEE International Conference on Networking, Architecture and Storage (NAS)},
  year={2021},
  note={}
}

[NAS'21] ICAP: Designing Inrush Current Aware Power Gating Switch For GPGPU.
Hadi Zamani, Devashree Tripathy, Ali Jahanshahi, & Daniel Wong. In Proceedings of the 15th IEEE International Conference on Networking, Architecture and Storage (NAS), 2021.
link bibtex

@inproceedings{zamani2021icap,
  title={[NAS'21] {ICAP}: Designing Inrush Current Aware Power Gating Switch For GPGPU},
  author={Zamani, <br/>Hadi and Tripathy</u>, <u>Devashree and Jahanshahi</u>, <u>Ali and Wong</b>, <b>Daniel},
  booktitle={Proceedings of the 15th IEEE International Conference on Networking, Architecture and Storage (NAS)},
  year={2021},
  note={}
}

[LCPC'21] LC-MEMENTO: A Memory Model for Accelerated Architectures.
Kiran Ranganath, Jesun Firoz, Joshua Suetterlein, Joseph Manzano, Andres Marquez, Mark Raugas, & Daniel Wong. In Languages and Compilers for Parallel Computing (LCPC), 2021.
link bibtex

@inproceedings{10.1007/978-3-030-99372-6_5,
  title = {[LCPC'21] LC-MEMENTO: A Memory Model for Accelerated Architectures},
  author = {Ranganath</u>, <br/><u>Kiran and Firoz, Jesun and Suetterlein, Joshua and Manzano, Joseph and Marquez, Andres and Raugas, Mark and Wong</b>, <b>Daniel},
  year = {2021},
  booktitle={Languages and Compilers for Parallel Computing (LCPC)},
  note={}
}

[RSDHA'21] Energy Efficient Task Graph Execution Using Compute Unit Masking In GPUs.
Marcus Chow, Kiran Ranganath, Robert Lerias, Mika Shanela Carodan, & Daniel Wong. In Workshop on Redefining Scalability for Diversely Heterogeneous Architectures (RSDHA), 2021.
link bibtex

@inproceedings{chow2021energy,
  title={[RSDHA'21] Energy Efficient Task Graph Execution Using Compute Unit Masking In GPUs},
  author={Chow</u>, <br/> <u>Marcus and Ranganath</u>, <u>Kiran and Lerias</u>, <u>Robert and Carodan</u>, <u>Mika Shanela and Wong</b>, <b>Daniel},
  booktitle={Workshop on Redefining Scalability for Diversely Heterogeneous Architectures (RSDHA)},
  year={2021},
  note={}
}

2020 (3)

[MICRO'20] BOW: Breathing Operand Windows To Exploit Bypassing In GPUs.
Hodjat Asghari Esfeden, Amirali Abdolrashidi, Shafiur Rahman, Daniel Wong, & Nael Abu-Ghazaleh. In Proceedings of the 53rd IEEE/ACM International Symposium on Microarchitecture (MICRO), 2020. (Acceptance Rate: 19.4%)

[MICRO'20] BOW: Breathing Operand Windows To Exploit Bypassing In GPUs [pdf]

paper link bibtex

@inproceedings{esfeden2020bow,
  title={[MICRO'20] {BOW}: Breathing Operand Windows To Exploit Bypassing In GPUs},
  author={Esfeden, <br/> Hodjat Asghari and Abdolrashidi</u>, <u>Amirali and Rahman, Shafiur and Wong</b>, <b>Daniel and Abu-Ghazaleh, Nael},
  booktitle={Proceedings of the 53rd IEEE/ACM International Symposium on Microarchitecture (MICRO)},
  year={2020},
  note={<small>(Acceptance Rate: 19.4\%)</small><br/>},
  url_Paper = {files/BOW-MICRO2020.pdf}
}

[IEEE CAL'20] GPU-NEST: Characterizing Energy Efficiency Of Multi-GPU Inference Servers.
Ali Jahanshahi, Hadi Zamani Sabzi, Chester Lau, & Daniel Wong. IEEE Computer Architecture Letters, Volume 19(Issue 2): 139–142. 2020.
link bibtex

@article{jahanshahi2020gpu,
  title={[IEEE CAL'20] {GPU-NEST}: Characterizing Energy Efficiency Of Multi-GPU Inference Servers},
  author={Jahanshahi</u>, <br/><u>Ali and Sabzi, Hadi Zamani and Lau</u>, <u>Chester and Wong</b>, <b>Daniel},
  journal={IEEE Computer Architecture Letters},
  volume={Volume 19},
  number={Issue 2},
  pages={139--142},
  year={2020},
  publisher={IEEE},
  note={}
}

[FCCM'20] High-Performance Parallel Radix Sort On FPGA.
Bashar Romanous, Mohammadreza Rezvani, Junjie Huang, Daniel Wong, Evangelos E Papalexakis, Vassilis J Tsotras, & Walid Najjar. In Proceedings of the 28th IEEE International Symposium on Field-Programmable Custom Computing Machines (FCCM), 2020. (poster)

link bibtex

@inproceedings{romanous2020high,
  title={[FCCM'20] High-Performance Parallel Radix Sort On FPGA},
  author={Romanous, <br/> Bashar and Rezvani</u>, <u>Mohammadreza and Huang, Junjie and Wong</b>, <b>Daniel and Papalexakis, Evangelos E and Tsotras, Vassilis J and Najjar, Walid},
  booktitle={Proceedings of the 28th IEEE International Symposium on Field-Programmable Custom Computing Machines (FCCM)},
  year={2020},
  note={<small>(poster)</small><br/>}
}

2019 (6)

[ASPLOS'19] CORF: Coalescing Operand Register File For GPUs.
Hodjat Asghari Esfeden, Farzad Khorasani, Hyeran Jeon, Daniel Wong, & Nael Abu-Ghazaleh. In Proceedings of the 24th International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS), 2019. (Acceptance Rate: 21.1%)

[ASPLOS'19] CORF: Coalescing Operand Register File For GPUs [pdf]

paper link bibtex

@inproceedings{asghari2019corf,
  title={[ASPLOS'19] CORF: Coalescing Operand Register File For GPUs},
  author={Asghari Esfeden, <br/> Hodjat and Khorasani, Farzad and Jeon, Hyeran and Wong</b>, <b>Daniel and Abu-Ghazaleh, Nael},
  booktitle={Proceedings of the 24th International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS)},
  year={2019},
  note={<small>(Acceptance Rate: 21.1\%)</small><br/>},
  url_Paper = {files/CORF-ASPLOS19.pdf}
}

[HPCA'19] μDPM: Dynamic Power Management For The Microsecond Era.
Chih-Hsun Chou, Laxmi N Bhuyan, & Daniel Wong. In Proceedings of the 25th IEEE International Symposium on High Performance Computer Architecture (HPCA), 2019. (Acceptance Rate: 19.7%)

[HPCA'19] μDPM: Dynamic Power Management For The Microsecond Era [pdf]

paper

slides link bibtex

@inproceedings{chou2019mudpm,
  title={[HPCA'19] {\muDPM}: Dynamic Power Management For The Microsecond Era},
  author={Chou, <br/> Chih-Hsun and Bhuyan, Laxmi N and Wong</b>, <b>Daniel},
  booktitle={Proceedings of the 25th IEEE International Symposium on High Performance Computer Architecture (HPCA)},
  year={2019},
  note={<small>(Acceptance Rate: 19.7\%)</small><br/>},
  url_Paper = {files/uDPM-HPCA2019.pdf},
  url_Slides = {files/uDPM-HPCA2019-Presentation.pdf}
}

[IEEE CAL'19] Speeding Up Collective Communications Through Inter-Gpu Re-Routing.
Kiran Ranganath, AmirAli Abdolrashidi, Shuaiwen Leon Song, & Daniel Wong. IEEE Computer Architecture Letters, Volume 18(Issue 2): 128–131. 2019.

paper link bibtex

@article{ranganath2019speeding,
  title={[IEEE CAL'19] Speeding Up Collective Communications Through Inter-Gpu Re-Routing},
  author={Ranganath</u>, <br/> <u>Kiran and Abdolrashidi</u>, <u>AmirAli and Song, Shuaiwen Leon and Wong</b>, <b>Daniel},
  journal={IEEE Computer Architecture Letters},
  volume={Volume 18},
  number={Issue 2},
  pages={128--131},
  year={2019},
  publisher={IEEE},
  url_Paper = {files/NVLink-CAL2019.pdf},
  note={}
}

[IEEE CAL'19] Locality-Aware GPU Register File.
Hyeran Jeon, Hodjat Asghari Esfeden, Nael Abu-Ghazaleh, Daniel Wong, & Sindhuja Elango. IEEE Computer Architecture Letters, Volume 18(Issue 2): 153–156. 2019.
link bibtex

@article{jeon2019locality,
  title={[IEEE CAL'19] Locality-Aware GPU Register File},
  author={Jeon, <br/> Hyeran and Esfeden, Hodjat Asghari and Abu-Ghazaleh, Nael and Wong</b>, <b>Daniel and Elango, Sindhuja},
  journal={IEEE Computer Architecture Letters},
  volume={Volume 18},
  number={Issue 2},
  pages={153--156},
  year={2019},
  publisher={IEEE},
  note={}
}

[Applied Energy'19] Frequency Regulation Service Provision In Data Center With Computational Flexibility.
Wei Wang, Amirali Abdolrashidi, Nanpeng Yu, & Daniel Wong. Applied Energy, Volume 251. October 2019. (IF: 8.4)

[Applied Energy'19] Frequency Regulation Service Provision In Data Center With Computational Flexibility [pdf]

paper

[Applied Energy'19] Frequency Regulation Service Provision In Data Center With Computational Flexibility [link]

link link bibtex

@article{wang2019frequency,
  title={[Applied Energy'19] Frequency Regulation Service Provision In Data Center With Computational Flexibility},
  author={Wang, <br/> Wei and Abdolrashidi</u>, <u>Amirali and Yu, Nanpeng and Wong</b>, <b>Daniel},
  journal={Applied Energy},
  volume={Volume 251},
  year={2019},
  month={October},
  publisher={Elsevier},
  note={<small>(IF: 8.4)</small><br/>},
  url_Paper = {files/2019-Data-Center-Frequency-Regulation.pdf},
  url_Link = {https://www.sciencedirect.com/science/article/pii/S0306261919309663}

}

[SMACD'19] Long-Term Reliability Management For Multitasking GPGPUs.
Zeyu Sun, Taeyoung Kim, Marcus Chow, Shaoyi Peng, Han Zhou, Hyoseung Kim, Daniel Wong, & Sheldon X-D Tan. In Proceedings of the 16th International Conference on Synthesis, Modeling, Analysis and Simulation Methods and Applications to Circuit Design (SMACD), 2019.
link bibtex

@inproceedings{sun2019long,
  title={[SMACD'19] Long-Term Reliability Management For Multitasking GPGPUs},
  author={Sun, <br/> Zeyu and Kim, Taeyoung and Chow</u>, <u>Marcus and Peng, Shaoyi and Zhou, Han and Kim, Hyoseung and Wong</b>, <b>Daniel and Tan, Sheldon X-D},
  booktitle={Proceedings of the 16th International Conference on Synthesis, Modeling, Analysis and Simulation Methods and Applications to Circuit Design (SMACD)},
  year={2019},
  note={} 
}

2018 (2)

[IPDPS'18] Joint Server And Network Energy Saving In Data Centers For Latency-Sensitive Applications.
Liang Zhou, Chih-Hsun Chou, Laxmi N Bhuyan, KK Ramakrishnan, & Daniel Wong. In Proceedings of the 32nd IEEE International Parallel and Distributed Processing Symposium (IPDPS), 2018. (Acceptance Rate: 24.5%)

[IPDPS'18] Joint Server And Network Energy Saving In Data Centers For Latency-Sensitive Applications [pdf]

paper link bibtex

@inproceedings{zhou2018joint,
  title={[IPDPS'18] Joint Server And Network Energy Saving In Data Centers For Latency-Sensitive Applications},
  author={Zhou, <br/> Liang and Chou, Chih-Hsun and Bhuyan, Laxmi N and Ramakrishnan, KK and Wong</b>, <b>Daniel},
  booktitle={Proceedings of the 32nd IEEE International Parallel and Distributed Processing Symposium (IPDPS)},
  year={2018},
  note={<small>(Acceptance Rate: 24.5\%)</small><br/>},
  url_Paper = {files/Eprons-IPDPS2018.pdf}
}

[ISLPED'18] Load-Triggered Warp Approximation On GPU.
Zhenhong Liu, Daniel Wong, & Nam Sung Kim. In Proceedings of the ACM/IEEE International Symposium on Low Power Electronics and Design (ISLPED), 2018. (Acceptance Rate: 23.3%)

link bibtex

@inproceedings{liu2018load,
  title={[ISLPED'18] Load-Triggered Warp Approximation On GPU},
  author={Liu, <br/> Zhenhong and Wong</b>, <b>Daniel and Kim, Nam Sung},
  booktitle={Proceedings of the ACM/IEEE International Symposium on Low Power Electronics and Design (ISLPED)},
  year={2018},
  note={<small>(Acceptance Rate: 23.3\%)</small><br/>}

}

2017 (1)

[MICRO'17] Wireframe: Supporting Data-Dependent Parallelism Through Dependency Graph Execution In GPUs.
AmirAli Abdolrashidi, Devashree Tripathy, Mehmet Esat Belviranli, Laxmi Narayan Bhuyan, & Daniel Wong. In Proceedings of the 50th Annual IEEE/ACM International Symposium on Microarchitecture (MICRO), 2017. (Acceptance Rate: 18.6%)

[MICRO'17] Wireframe: Supporting Data-Dependent Parallelism Through Dependency Graph Execution In GPUs [pdf]

paper

slides link bibtex

@inproceedings{abdolrashidi2017wireframe,
  title={[MICRO'17] Wireframe: Supporting Data-Dependent Parallelism Through Dependency Graph Execution In GPUs},
  author={Abdolrashidi</u>, <br/> <u>AmirAli and Tripathy</u>, <u>Devashree and Belviranli, Mehmet Esat and Bhuyan, Laxmi Narayan and Wong</b>, <b>Daniel},
  booktitle={Proceedings of the 50th Annual IEEE/ACM International Symposium on Microarchitecture (MICRO)},
  year={2017},
  note={<small>(Acceptance Rate: 18.6\%)</small><br/>},
  url_Paper = {files/Wireframe-MICRO17.pdf},
  url_Slides = {files/Wireframe-MICRO2017-Presentation.pdf}
}

2016 (6)

[ISCA'16] Peak Efficiency Aware Scheduling for Highly Energy Proportional Servers.
Daniel Wong. In Proceedings of the 43rd ACM/IEEE International Symposium on Computer Architecture (ISCA), 2016. (Acceptance Rate: 19.5%)

[ISCA'16] Peak Efficiency Aware Scheduling for Highly Energy Proportional Servers [pdf]

paper

slides link bibtex

@inproceedings{wong2016peak,
  title={[ISCA'16] Peak Efficiency Aware Scheduling for Highly Energy Proportional Servers},
  author={Wong</b>, <br/><b>Daniel},
  booktitle={Proceedings of the 43rd ACM/IEEE International Symposium on Computer Architecture (ISCA)},
  year={2016},
  note={<small>(Acceptance Rate: 19.5\%)</small><br/>},
  url_Paper = {files/PEAS-ISCA2016.pdf},
  url_Slides = {files/PEAS-ISCA2016-Presentation.pdf}
}

[HPCA'16] Approximating Warps with Intra-Warp Operand Value Similarity.
Daniel Wong, Nam Sung Kim, & Murali Annavaram. In Proceedings of the 22nd IEEE International Symposium on High Performance Computer Architecture (HPCA), 2016. (Acceptance Rate: 22%)

[HPCA'16] Approximating Warps with Intra-Warp Operand Value Similarity [pdf]

paper

[HPCA'16] Approximating Warps with Intra-Warp Operand Value Similarity [pptx]

slides link bibtex

@inproceedings{wong2016approximating,
  title={[HPCA'16] Approximating Warps with Intra-Warp Operand Value Similarity},
  author={Wong</b>, <br/><b>Daniel and Kim, Nam Sung and Annavaram, Murali},
  booktitle={Proceedings of the 22nd IEEE International Symposium on High Performance Computer Architecture (HPCA)},
  year={2016},
  note={<small>(Acceptance Rate: 22\%)</small><br/>},
  url_Paper = {files/Approx-HPCAv2.pdf},
  url_Slides = {files/Approx-HPCA2016.pptx}
}

[ICS'16] Origami: Folding Warps For Energy Efficient GPUs.
Mohammad Abdel-Majeed, Daniel Wong, Justin Kuang, & Murali Annavaram. In Proceedings of the ACM International Conference on Supercomputing (ICS), 2016. (Acceptance Rate: 24%)

[ICS'16] Origami: Folding Warps For Energy Efficient GPUs [pdf]

paper

slides link bibtex

@inproceedings{abdel2016origami,
  title={[ICS'16] Origami: Folding Warps For Energy Efficient GPUs},
  author={Abdel-Majeed, <br/>Mohammad and Wong</b>, <b>Daniel and Kuang, Justin and Annavaram, Murali},
  booktitle={Proceedings of the ACM International Conference on Supercomputing (ICS)},
  year={2016},
  note={<small>(Acceptance Rate: 24\%)</small><br/>},
  url_Paper = {files/Origami-ICS2016.pdf},
  url_Slides = {files/Origami-ICS2016-Presentation.pdf}

}

[ISLPED'16] Dynsleep: Fine-Grained Power Management For A Latency-Critical Data Center Application.
Chih-Hsun Chou, Daniel Wong, & Laxmi N Bhuyan. In Proceedings of the International Symposium on Low Power Electronics and Design (ISLPED), 2016. (Acceptance Rate: 23%)

[ISLPED'16] Dynsleep: Fine-Grained Power Management For A Latency-Critical Data Center Application [pptx]

slides link bibtex

@inproceedings{chou2016dynsleep,
  title={[ISLPED'16] Dynsleep: Fine-Grained Power Management For A Latency-Critical Data Center Application},
  author={Chou, <br/>Chih-Hsun and Wong</b>, <b>Daniel and Bhuyan, Laxmi N},
  booktitle={Proceedings of the International Symposium on Low Power Electronics and Design (ISLPED)},
  year={2016},
  note={<small>(Acceptance Rate: 23\%)</small><br/>},
  url_Slides = {files/DynSleep-ISLPED2016-Presentation.pptx},

}

[DAC'16] Invited - Cross-Layer Modeling And Optimization For Electromigration Induced Reliability.
Taeyoung Kim, Zeyu Sun, Chase Cook, Hengyang Zhao, Ruiwen Li, Daniel Wong, & Sheldon X-D Tan. In Proceedings of the 53rd Annual Design Automation Conference (DAC)), 2016.
link bibtex

@inproceedings{kim2016cross,
  title={[DAC'16] Invited - Cross-Layer Modeling And Optimization For Electromigration Induced Reliability},
  author={Kim, <br/> Taeyoung and Sun, Zeyu and Cook, Chase and Zhao, Hengyang and Li, Ruiwen and Wong</b>, <b>Daniel and Tan, Sheldon X-D},
  booktitle={Proceedings of the 53rd Annual Design Automation Conference (DAC))},
  year={2016},
  note={}
}

[SBAC-PAD'16] STOMP: Statistical Techniques For Optimizing And Modeling Performance Of Blocked Sparse Matrix Vector Multiplication.
Steena Monteiro, Forrest Iandola, & Daniel Wong. In Proceedings of the 28th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD), 2016.
link bibtex

@inproceedings{monteiro2016stomp,
  title={[SBAC-PAD'16] STOMP: Statistical Techniques For Optimizing And Modeling Performance Of Blocked Sparse Matrix Vector Multiplication},
  author={Monteiro, <br/> Steena and Iandola, Forrest and Wong</b>, <b>Daniel},
  booktitle={Proceedings of the 28th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)},
  year={2016},
  note={}
}

2015 (1)

[IISWC'15] A Retrospective Look Back On The Road Towards Energy Proportionality.
Daniel Wong, Julia Chen, & Murali Annavaram. In Proceedings of the 2015 IEEE International Symposium on Workload Characterization (IISWC), 2015. (Short paper with presentation)

[IISWC'15] A Retrospective Look Back On The Road Towards Energy Proportionality [pdf]

paper link bibtex

@inproceedings{wong2015retrospective,
  title={[IISWC'15] A Retrospective Look Back On The Road Towards Energy Proportionality},
  author={Wong</b>, <br/><b>Daniel and Chen, Julia and Annavaram, Murali},
  booktitle={Proceedings of the 2015 IEEE International Symposium on Workload Characterization (IISWC)},
  year={2015},
  note={<small>(Short paper with presentation)</small><br/>},
  url_Paper = {files/EP-IISWC2015.pdf}
}

2014 (1)

[HPCA'14] Implications of High Energy Proportional Servers on Cluster-Wide Energy Proportionality.
Daniel Wong, & Murali Annavaram. In Proceedings of the 20th IEEE International Symposium on High Performance Computer Architecture (HPCA), 2014. (Acceptance Rate: 25.6%)

[HPCA'14] Implications of High Energy Proportional Servers on Cluster-Wide Energy Proportionality [pdf]

paper

[HPCA'14] Implications of High Energy Proportional Servers on Cluster-Wide Energy Proportionality [pptx]

slides link bibtex

@inproceedings{wong2014implications,
  title={[HPCA'14] Implications of High Energy Proportional Servers on Cluster-Wide Energy Proportionality},
  author={Wong</b>, <br/><b>Daniel and Annavaram, Murali},
  booktitle={Proceedings of the 20th IEEE International Symposium on High Performance Computer Architecture (HPCA)},
  year={2014},
  note={<small>(Acceptance Rate: 25.6\%)</small><br/>},
  url_Paper = {files/HighEP-HPCA2014.pdf},
  url_Slides = {files/HighEP-HPCA2014.pptx}
}

2013 (2)

[Top Picks'13] Scaling The Energy Proportionality Wall With Knightshift.
Daniel Wong, & Murali Annavaram. IEEE Micro's "Top Picks from the Computer Architecture Conferences of 2012", Volume 33(Issue 3): 28–37. 2013.
link bibtex

@article{wong2013scaling,
  title={[Top Picks'13] Scaling The Energy Proportionality Wall With Knightshift},
  author={Wong</b>, <br/><b>Daniel and Annavaram, Murali},
  journal={IEEE Micro's "Top Picks from the Computer Architecture Conferences of 2012"},
  volume={Volume 33},
  number={Issue 3},
  pages={28--37},
  year={2013},
  publisher={IEEE},
  note={}
}

[MICRO'13] Warped Gates: Gating Aware Scheduling and Power Gating For GPGPUs.
Mohammad Abdel-Majeed*, Daniel Wong*, & Murali Annavaram. In Proceedings of the 46th IEEE/ACM International Symposium on Microarchitecture (MICRO), 2013. (Acceptance Rate: 16.3%)
* Authors contributed equally

[MICRO'13] Warped Gates: Gating Aware Scheduling and Power Gating For GPGPUs [pdf]

paper

[MICRO'13] Warped Gates: Gating Aware Scheduling and Power Gating For GPGPUs [pptx]

slides link bibtex

@inproceedings{abdel2013warped,
  title={[MICRO'13] Warped Gates: Gating Aware Scheduling and Power Gating For GPGPUs},
  author={Abdel-Majeed*, <br/>Mohammad and Wong</b>*, <b>Daniel and Annavaram, Murali},
  booktitle={Proceedings of the 46th IEEE/ACM International Symposium on Microarchitecture (MICRO)},
  year={2013},
  note={<small>(Acceptance Rate: 16.3\%) </small><br/> * Authors contributed equally<br/>},
  url_Paper = {WarpedGates-MICRO2013.pdf},
  url_Slides = {WarpedGates-MICRO2013.pptx}
}

2012 (2)

[MICRO'12] KnightShift: Scaling the Energy Proportionality Wall through Server-Level Heterogeneity.
Daniel Wong, & Murali Annavaram. In Proceedings of the 45th IEEE/ACM International Symposium on Microarchitecture (MICRO), 2012. (Acceptance Rate: 17.5%)
Selected as 1 of 11 IEEE Micro Top Pick in Computer Architecture 2013

[MICRO'12] KnightShift: Scaling the Energy Proportionality Wall through Server-Level Heterogeneity [pdf]

paper

[MICRO'12] KnightShift: Scaling the Energy Proportionality Wall through Server-Level Heterogeneity [pptx]

slides link bibtex

@inproceedings{wong2012knightshift,
  title={[MICRO'12] KnightShift: Scaling the Energy Proportionality Wall through Server-Level Heterogeneity},
  author={Wong</b>, <br/><b>Daniel and Annavaram, Murali},
  booktitle={Proceedings of the 45th IEEE/ACM International Symposium on Microarchitecture (MICRO)},
  year={2012},
  note={<small>(Acceptance Rate: 17.5\%)</small> <br/>Selected as 1 of 11 IEEE Micro Top Pick in Computer Architecture 2013 <br/>},
  url_Paper = {files/KnightShift-MICRO2012.pdf},
  url_Slides = {files/KnightShift-MICRO2012.pptx}

}

[WEED'12] Evaluating A Prototype KnightShift-enabled server.
Daniel Wong, & Murali Annavaram. In Workshop on Energy-Efficient Design (WEED), 2012.

[WEED'12] Evaluating A Prototype KnightShift-enabled server [pdf]

paper link bibtex

@inproceedings{wong2012evaluating,
  title={[WEED'12] Evaluating A Prototype KnightShift-enabled server},
  author={Wong</b>, <br/><b>Daniel and Annavaram, Murali},
  booktitle={Workshop on Energy-Efficient Design (WEED)},
  year={2012},
  url_Paper = {files/KnightShift-WEED2012.pdf}
}

2010 (4)

[MICRO'10] Adaptive and Speculative Slack Simulations of CMPs on CMPs.
Jainwei Chen, Lakshmi Kumar Dabbiru, Daniel Wong, Murali Annavaram, & Michel Dubois. In Proceedings of the 43rd IEEE/ACM International Symposium on Microarchitecture (MICRO), 2010. (Acceptance Rate: 17.4%)

[MICRO'10] Adaptive and Speculative Slack Simulations of CMPs on CMPs [pdf]

paper link bibtex

@inproceedings{chen2010adaptive,
  title={[MICRO'10] Adaptive and Speculative Slack Simulations of CMPs on CMPs},
  author={Chen, <br/>Jainwei and Dabbiru, Lakshmi Kumar and Wong</b>, <b>Daniel and Annavaram, Murali and Dubois, Michel},
  booktitle={Proceedings of the 43rd IEEE/ACM International Symposium on Microarchitecture (MICRO)},
  year={2010},
  note={<small>(Acceptance Rate: 17.4\%)</small><br/>},
  url_Paper = {files/SlackSim-MICRO2010.pdf}
}

[FDG'10] Implementing Games On Pinball Machines.
Daniel Wong, Darren Earl, Fred Zyda, Ryan Zink, Sven Koenig, Allen Pan, Selby Shlosberg, Jaspreet Singh, & Nathan Sturtevant. In Proceedings of the Fifth International Conference on the Foundations of Digital Games (FDG), 2010. (Acceptance Rate: 34%)

link bibtex

@inproceedings{wong2010implementing,
  title={[FDG'10] Implementing Games On Pinball Machines},
  author={Wong</b>, <br/><b>Daniel and Earl, Darren and Zyda, Fred and Zink, Ryan and Koenig, Sven and Pan, Allen and Shlosberg, Selby and Singh, Jaspreet and Sturtevant, Nathan},
  booktitle={Proceedings of the Fifth International Conference on the Foundations of Digital Games (FDG)},
  year={2010},
  note={<small>(Acceptance Rate: 34\%)</small><br/>}
}

[AAAI Spring'10] Teaching Robotics And Computer Science With Pinball Machines.
Daniel Wong, Darren Earl, Fred Zyda, & Sven Koenig. In Papers of the 2010 AAAI Spring Symposium Series, 2010.
link bibtex

@inproceedings{wong2010teaching,
  title={[AAAI Spring'10] Teaching Robotics And Computer Science With Pinball Machines},
  author={Wong</b>, <br/><b>Daniel and Earl, Darren and Zyda, Fred and Koenig, Sven},
  booktitle={Papers of the 2010 AAAI Spring Symposium Series},
  year={2010}
}

[EAAI'10] Teaching Artificial Intelligence and Robotics Via Games.
Daniel Wong, Ryan Zink, & Sven Koenig. In Proceedings of the First AAAI Symposium on Educational Advances in Artificial Intelligence (EAAI), 2010.
link bibtex

@inproceedings{Wong_Zink_Koenig_2010, 
  title={[EAAI'10] Teaching Artificial Intelligence and Robotics Via Games}, 
  booktitle={Proceedings of the First AAAI Symposium on Educational Advances in Artificial Intelligence (EAAI)}, 
  author={Wong</b>, <br/><b>Daniel and Zink, Ryan and Koenig, Sven}, 
  year={2010}, 
}

Non-Referred Publications

Joseph Bungo, Daniel Wong, Bringing GPU Accelerated Computing and Deep Learning to the Classroom, Journal of Computational Science Education (JOCSE), Volume 12, Issue 2. Presented in Seventh SC Workshop on Best Practices for HPC Training and Education (BPHTE), 2020.

Daniel Wong, S. Lloyd, M. Gokhale, A Memory-mapped Approach to Checkpointing. Technical Report LLNL-TR-635611, Lawrence Livermore National Laboratory (LLNL), Livermore, CA, 2013.

I. Karlin, A. Bhatele, B. Chamberlain, J. Cohen, Z. Devito, M. Gokhale, R. Haque, R. Hornung, J. Keasler, D. Laney, E. Luke, S. Lloyd, J. McGraw, R. Neely, D. Richards, M. Schulz, C.H. Still, F. Wang, Daniel Wong, LULESH Programming Model and Performance Ports Overview. Technical Report LLNL-TR-608824, Lawrence Livermore National Laboratory (LLNL), Livermore, CA, 2012.

Daniel Wong, Murali Annavaram, Scalable System-level Active Low Power Mode with Bounded Latency. Technical Report CENG-2012-5, Department of Electrical Engineering, University of Southern California, Los Angeles (California), 2012.

Daniel Wong, Murali Annavaram, Enhancing Server Energy Efficiency by Shifting Light Burden to an Assistant. 2nd Annual Ming Hsiegh Department of Electrical Engineering Research Festival, 2012. Honorable Mention Poster Award Also presented at Sixth USC-Tsinghua Symposium on Green Technology and Energy Informatics

Daniel Wong, R. Zink and S. Koenig, Teaching Artificial Intelligence and Robotics via Games [Poster Abstract], Proceedings of the AAAI Symposium on Educational Advances in Artificial Intelligence (EAAI), 2010

Daniel Wong, M. Gokhale, Real-World Performance of Document-Similarity Web Attack Classifier In Embedded Hardware. LLNL Summer Intern Poster Symposium, 2010.

John O'Hollaren, Vairavan Laxman, Noah Olsman, Michael Benzimra, Daniel Wong, and Nielson Bernardo. SeaBee III. Technical report, University of Southern California Competition Robotics (USCR), University of Southern California, 2010.

Daniel Wong, D. Earl, F. Zyda and S. Koenig. Programming Pinball Machines for Fun and Education. Technical Report 08-901, Department of Computer Science, University of Southern California, Los Angeles (California), 2008.

Press

GPU Computing 101: Why University Educators Are Pulling NVIDIA Teaching Kits into Their Classrooms, Nvidia, https://blogs.nvidia.com/blog/2019/05/23/nvidia-teaching-kits/, 2019

Interview, Nvidia's Turing Chip Opens Door to New Virtual Reality Realm, ECT News Network, https://www.ectnews.com/story/85506.html, 2018

Daniel Wong, S. Koenig, PinHorse: Teaching Old Pinball Machines New Tricks, https://www.pinballnews.com/learn/pinhorse/, 2009