1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
//! A caching layer for block based storage devices.
//! 
//! For many storage devices, calls to the backing medium are quite expensive. This layer intends to reduce those calls,
//! improving efficiency in exchange for additional memory usage. Note that this crate is intended to be used as a part of
//! `block_io`, but should work on its own.
//! 
//! # Limitations
//! Currently, the `BlockCache` struct is hardcoded to use a `StorageDevice` reference,
//! when in reality it should just use anything that implements traits like `BlockReader + BlockWriter`. 
//! 
//! The read and write functions currently only support reading/writing individual blocks from disk even
//! when the caller might prefer reading a larger number of contiguous blocks. This is inneficient and an
//! optimized implementation should read multiple blocks at once if possible.
//! 
//! Cached blocks are stored as vectors of bytes on the heap, 
//! we should do something else such as separate mapped regions. 
//! Cached blocks cannot yet be dropped to relieve memory pressure. 
//! 
//! Note that this cache only holds a reference to the underlying block device.
//! As such if any other system crates perform writes to the underlying device,
//! in that case the cache will give incorrect and potentially inconsistent results.
//! In the long run, the only way around this would be to only expose a `BlockCache`
//! instead of exposing a `StorageDevice`. I suppose the least disruptive way to implement this
//! might be with a layer of indirection to an implementor of a BlockReader like trait,
//! so that the underlying block reader can be switched out when a cache is enabled.

#![no_std]

#[macro_use] extern crate alloc;
extern crate hashbrown;
extern crate storage_device;

use alloc::vec::Vec;
use hashbrown::{
    HashMap,
    hash_map::Entry,
};
use storage_device::{StorageDevice, StorageDeviceRef};
use alloc::borrow::{Cow, ToOwned};

/// A cache to store read and written blocks from a storage device.
pub struct BlockCache {
    /// The cache of blocks (sectors) read from the storage device,
    /// a map from block number to data byte array.
    cache: InternalCache,
    /// The underlying storage device from where the blocks are read/written.
    storage_device: StorageDeviceRef,
}

impl BlockCache {
    /// Creates a new `BlockCache` device 
    pub fn new(storage_device: StorageDeviceRef) -> BlockCache {
        BlockCache {
            cache: HashMap::new(),
            storage_device,
        }
    }

    /// Flushes the given block to the backing storage device. 
    /// If the `block_to_flush` is None, all blocks in the entire cache
    /// will be written back to the storage device.
    pub fn flush(&mut self, block_num: Option<usize>) -> Result<(), &'static str> {
        let mut locked_device = self.storage_device.lock();
        if let Some(bn) = block_num {
            // Flush just one block
            if let Some(cached_block) = self.cache.get_mut(&bn) {
                Self::flush_block(&mut *locked_device, bn, cached_block)?;
            }
            // If the block wasn't in the cache, do nothing.
        }
        else {
            // Flush all blocks
            for (bn, cached_block) in self.cache.iter_mut() {
                Self::flush_block(&mut *locked_device, *bn, cached_block)?;
            }
        }
        Ok(())
    }

    /// An internal function that first checks the cache for a specific block
    /// in order to avoid reading from the storage device.
    /// If that block exists in the cache, it is copied into the buffer. 
    /// If not, it is read from the storage device into the cache, and then copied into the buffer.
    pub fn read_block(cache: &mut BlockCache, block: usize) -> Result<&[u8], &'static str> {
        let mut locked_device = cache.storage_device.lock();
        match cache.cache.entry(block) {
            Entry::Occupied(occ) => {
                // An existing entry in the cache can be used directly (without going to the backing store)
                // if it's in the `Modified` or `Shared` state.
                // But if it's in the `Invalid` state, we have to re-read the block from the storage device.
                let cached_block = occ.into_mut();
                match cached_block.state {
                    CacheState::Modified | CacheState::Shared => Ok(&cached_block.block),
                    CacheState::Invalid => {
                        locked_device.read_blocks(&mut cached_block.block, block)?;
                        cached_block.state = CacheState::Shared;
                        Ok(&cached_block.block)
                    }
                }
            }
            Entry::Vacant(vacant) => {
                // A vacant entry will be read from the backing storage device,
                // so it will always start out in the `Shared` state.
                let mut v = vec![0; locked_device.block_size()];
                locked_device.read_blocks(&mut v, block)?;
                let cb = CachedBlock {
                    block: v,
                    state: CacheState::Shared,
                };
                let cached_block = vacant.insert(cb);
                Ok(&cached_block.block)
            }
        }
    }

    pub fn write_block(&mut self, block_num: usize, buffer_to_write: Cow<[u8]>)
    //pub fn write_block(&mut self, block_num: usize, buffer_to_write: Cow<[u8]>)
        -> Result<(), &'static str> 
        {
            let mut locked_device = self.storage_device.lock();

            let owned_buffer: Vec<u8> = match buffer_to_write {
                Cow::Borrowed(slice_ref) => slice_ref.to_owned(),
                Cow::Owned(vec_owned) => vec_owned, 
            };

            let mut new_cached_block = CachedBlock {
                block: owned_buffer,
                state: CacheState::Modified,
            };
            // Currently using a write-through policy right now, so flush the block immediately
            BlockCache::flush_block(&mut *locked_device, block_num, &mut new_cached_block)?;
            self.cache.insert(block_num, new_cached_block);

            Ok(())
    }

    /// An internal function that writes out the given `cached_block`
    /// to the given locked `StorageDevice` if the cached block is in the `Modified` state.
    fn flush_block(locked_device: &mut dyn StorageDevice, block_num: usize, cached_block: &mut CachedBlock) -> Result<(), &'static str> {
        // we only need to actually write blocks in the `Modified` state.
        match cached_block.state {
            CacheState::Shared | CacheState::Invalid => { },
            CacheState::Modified => {
                locked_device.write_blocks(&cached_block.block, block_num)?;
                cached_block.state = CacheState::Shared;
            }
        }
        Ok(())
    }
}



/// A block from a storage device stored in a cache.
/// This currently includes the actual owned cached content as a vector of bytes on the heap,
/// in addition to the `CacheState` of the cached item.
/// 
/// TODO: allow non-dirty blocks to be freed (reclaimed) upon memory pressure. 
#[derive(Debug)]
struct CachedBlock { // Not sure if this should be public, but it seems necessary to fix type leak. TODO make non-public.
    block: Vec<u8>,
    state: CacheState,
}

type InternalCache = HashMap<usize, CachedBlock>;


/// The states of an item in the cache, following the MSI cache coherence protocol.
#[derive(Debug)]
#[allow(dead_code)]
enum CacheState {
    /// Dirty: the cached item has been modified more recently than the backing store,
    /// so it must be flushed at a future time to guarantee data correctness and consistency.
    /// A `Modified` cached item **cannot** be safely dropped from the cache.
    /// A `Modified` cached item can be safely read from or overwritten without going to the backing store.  
    Modified,
    /// Clean: the cached item and the backing store are in sync; they have the same value.
    /// A `Shared` cached item can be safely dropped from the cache.
    Shared,
    /// The cached item is out-of-date and should not be read from,
    /// as the backing storage has a more recent copy than the cache.
    /// Therefore, if a read of an `Invalid` cached item is requested,
    /// it must be re-read from the backing storage.
    /// An `Invalid` item can still be overwritten in the cache without going to the backing store. 
    /// An `Invalid` item can be safely dropped from the cache.
    Invalid,  
}