diff --git a/arch/aarch64/arch-mmu.hh b/arch/aarch64/arch-mmu.hh
index 53e09397ab8564984eae5ff71b88e57584514c04..33a92f591ef9251c5304d6c12f8bfe7369d58809 100644
--- a/arch/aarch64/arch-mmu.hh
+++ b/arch/aarch64/arch-mmu.hh
@@ -21,6 +21,8 @@
 
 namespace mmu {
 constexpr int max_phys_addr_size = 48;
+constexpr int device_range_start = 0x8000000;
+constexpr int device_range_stop = 0x10000000;
 
 class arch_pt_element {
 public:
diff --git a/arch/aarch64/mmu.cc b/arch/aarch64/mmu.cc
index ea30e325d7b0c4bc93ad3918a7542651a2d3256b..f8537b27e42d1fb57b5aa3bdf349713913d206c6 100644
--- a/arch/aarch64/mmu.cc
+++ b/arch/aarch64/mmu.cc
@@ -1,5 +1,6 @@
 #include <osv/mmu.hh>
 #include <osv/prio.hh>
+#include <osv/debug.h>
 
 namespace mmu {
 
@@ -42,10 +43,16 @@ pt_element make_pte(phys addr, bool large,
 
     arch_pt_element::set_user(&pte, false);
     arch_pt_element::set_accessed(&pte, true);
-    /* at the moment we hardcode memory attributes,
-       but the API would need to be adapted for device direct assignment */
     arch_pt_element::set_share(&pte, true);
-    arch_pt_element::set_attridx(&pte, 4);
+
+    if (addr >= mmu::device_range_start && addr < mmu::device_range_stop) {
+        /* we need to mark device memory as such, because the
+           semantics of the load/store instructions change */
+        debug_early_u64("make_pte: device memory at ", (u64)addr);
+        arch_pt_element::set_attridx(&pte, 0);
+    } else {
+        arch_pt_element::set_attridx(&pte, 4);
+    }
 
     return pte;
 }