1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
//! Support for the Performance Monitoring Unit 
//! 
//! We have support for PMU version 2. Each succesive PMU version includes the features provided by the previous versions.
//! 
//! Version 1 Support:
//! To configure an architectural performance monitoring event, we program the performance event select registers (IA32_PERFEVTSELx MSRs).
//! The result of the performance monitoring event is reported in a general purpose Performance Monitoring Counter (PMC) (IA32_PMCx MSR).
//! There is one PMC for each performance event select register, and one PMU per logical core. 
//! 
//! Version 2 Support:
//! Three of the architectural events are counted using fixed function MSRs (IA32_FIXED_CTR0 through IA32_FIXED_CTR2), 
//! each with an associated control register.
//! Three more MSRS are provided to simplify event programming. They are: 
//! * IA32_PERF_GLOBAL_CTRL: 
//!     allows software to enable/disable event counting of any combination of fixed-function PMCs or any general-purpose PMCs via a single WRMSR.
//! * IA32_PERF_GLOBAL_STATUS: 
//!     allows software to query counter overflow conditions on any combination of fixed-function PMCs or general-purpose PMCs via a single RDMSR.
//! * IA32_PERF_GLOBAL_OVF_CTRL: 
//!     allows software to clear counter overflow conditions on any combination of fixed-function PMCs or general-purpose PMCs via a single WRMSR.
//! 
//! We support 2 ways to use the PMU. One is to measure the number of events that take place over a length of code.
//! The second is Event Based Sampling, where after a specified number of events occur, an interrupt is called and we store the instruction pointer 
//! and task id running at that point.
//! 
//! Currently we support a maximum core ID of 255, and up to 8 general purpose counters per core. 
//! A core ID greater than 255 is not supported in Theseus in general since the ID has to fit within a u8.
//! 
//! If the core ID limit is changed and we need to update the PMU data structures to support more cores then: 
//! - Increase WORDS_IN_BITMAP and CORES_SUPPORTED_BY_PMU as required. For example, the cores supported is 256 so there are 4 64-bit words in the bitmap, one bit per core. 
//! - Add additional AtomicU64 variables to the initialization of the CORES_SAMPLING and RESULTS_READY bitmaps. 
//! 
//! If the general purpose PMC limit is reached then: 
//! - Update PMCS_SUPPORTED_BY_PMU to the new PMC limit.
//! - Change the element type in the PMCS_AVAILABLE vector to be larger than AtomicU8 so that there is one bit per counter.
//! - Update INIT_PMCS_AVAILABLE to the new maximum value for the per core bitmap.
//! 
//! Monitoring without interrupts is almost free (around 0.3% performance penalty) - source: "These are Not Your Grand Daddy's CPU Performance Counters" Blackhat USA, 2015
//! 
//! # Example
//! ```
//! pmu_x86::init();
//! 
//! let counter_freq = 0xFFFFF;
//! let num_samples = 500;
//! let sampler = pmu_x86::start_samples(pmu_x86::EventType::UnhaltedReferenceCycles, counter_freq, None, num_samples);
//! 
//! if let Ok(my_sampler) = sampler {
//! 
//!     // wait some time here
//!     
//!     if let Ok(mut samples) = pmu_x86::retrieve_samples() {
//!         pmu_x86::print_samples(&mut samples);
//!     }
//! }
//! ```
//!
//! # Note
//! Currently, the PMU-based sampler will only capture samples on the same core as it was initialized and started from. 
//! So, if you run `pmu_x86::init()` and `pmu_x86::start_samples()` on CPU core 2, it will only sample events on core 2.

#![no_std]

extern crate spin;
#[macro_use] extern crate lazy_static;
extern crate x86_64;
extern crate msr;
extern crate raw_cpuid;
extern crate task;
extern crate memory;
extern crate sync_irq;
extern crate alloc;
extern crate apic;
extern crate cpu;
#[macro_use] extern crate log;
extern crate mod_mgmt;
extern crate bit_field;

use msr::*;
use x86_64::{VirtAddr, registers::model_specific::Msr, structures::idt::InterruptStackFrame};
use raw_cpuid::CpuId as X86CpuIdInstr;
use spin::Once;
use sync_irq::IrqSafeMutex;
use alloc::{
    collections::{BTreeMap, BTreeSet},
    string::{String, ToString},
    vec::Vec,
};
use bit_field::BitField;
use core::{
    convert::{TryFrom, TryInto},
    sync::atomic::{
        Ordering, 
        AtomicU64, 
        AtomicU8
    },
};


pub mod stat;

/// The minimum version ID a PMU can have, as retrieved by the `CPUID` instruction.
/// Anything lower than this means PMU is not supported.
const MIN_PMU_VERSION: u8 = 1;
/// Set bits in the global_ctrl MSR to enable the fixed counters.
const ENABLE_FIXED_PERFORMANCE_COUNTERS: u64 = 0x7 << 32;
/// Set bits in the global_ctrl MSR to enable the general purpose PMCs.
const ENABLE_GENERAL_PERFORMANCE_COUNTERS: u64 = 0xF;
/// Set bits in fixed_ctr_ctrl MSR to enable fixed counters for all privilege levels. 
const ENABLE_FIXED_COUNTERS_FOR_ALL_PRIVILEGE_LEVELS: u64 = 0x333;

/// read from the fixed counter 0 to retrieve the instructions retired
const FIXED_FUNC_0_RDPMC: u32 = 1 << 30;
/// read from the fixed counter 1 to retrieve the clock cycles
const FIXED_FUNC_1_RDPMC: u32 = (1 << 30) + 1;
/// read from the fixed counter 2 to retrieve reference cycles
const FIXED_FUNC_2_RDPMC: u32 = (1 << 30) + 2;

/// Set bit 22 to enable the general PMC to start counting
const PMC_ENABLE: u64 = 0x01 << 22;
/// Set bit 20 in the event select register to enable interrupts on a counter overflow
const INTERRUPT_ENABLE: u64 = 0x01 << 20;
/// Value to write to the overflow control MSR to clear it
const CLEAR_PERF_STATUS_MSR: u64 = 0x0000_0000_0000_000F;
/// The number of words in the CORES_SAMPLING and RESULTS_READY bitmaps, so that information for 256 cores can be recorded
const WORDS_IN_BITMAP: usize = 4;
/// The number of cores the current PMU implementation supports
const CORES_SUPPORTED_BY_PMU: usize = 256;
/// The number of general purpose PMCs the current PMU implementation supports
const PMCS_SUPPORTED_BY_PMU: u8 = 8;
/// The initial value for the bitmaps in PMCS_AVAILABLE 
const INIT_VAL_PMCS_AVAILABLE: u8 = core::u8::MAX;

/// Stores an 8-bit bitmap for each core to show whether the PMCs for that core are available (1) or in use (0).
/// This restricts us to 8 general purpose PMCs per core.
static PMCS_AVAILABLE: Once<Vec<AtomicU8>> = Once::new();
/// Bitmap to store the cores where the PMU is currently being used for sampling. It records information for 256 cores.
static CORES_SAMPLING: [AtomicU64; WORDS_IN_BITMAP] = [AtomicU64::new(0), AtomicU64::new(0), AtomicU64::new(0), AtomicU64::new(0)];
/// Bitmap to store the cores which have sampling results ready to be retrieved. It records information for 256 cores.
static RESULTS_READY: [AtomicU64; WORDS_IN_BITMAP] = [AtomicU64::new(0), AtomicU64::new(0), AtomicU64::new(0), AtomicU64::new(0)];

lazy_static! {
    /// PMU version supported by the current hardware. The default is zero since the performance monitoring information would not be retrieved only if there was no PMU available.
    static ref PMU_VERSION: u8 = X86CpuIdInstr::new().get_performance_monitoring_info().map(|pmi| pmi.version_id()).unwrap_or(0);
    /// The number of general purpose PMCs that can be used. The default is zero since the performance monitoring information would not be retrieved only if there was no PMU available.
    static ref NUM_PMC: u8 = X86CpuIdInstr::new().get_performance_monitoring_info().map(|pmi| pmi.number_of_counters()).unwrap_or(0);
    /// The number of fixed function counters. The default is zero since the performance monitoring information would not be retrieved only if there was no PMU available.
    static ref NUM_FIXED_FUNC_COUNTERS: u8 = X86CpuIdInstr::new().get_performance_monitoring_info().map(|pmi| pmi.fixed_function_counters()).unwrap_or(0);
}

/// Set to store the cores that the PMU has already been initialized on
static CORES_INITIALIZED: IrqSafeMutex<BTreeSet<u8>> = IrqSafeMutex::new(BTreeSet::new());
/// The sampling information for each core
static SAMPLING_INFO: IrqSafeMutex<BTreeMap<u8, SampledEvents>> =  IrqSafeMutex::new(BTreeMap::new());

/// Used to select the event type to count. Event types are described in the Intel SDM 18.2.1 for PMU Version 1.
/// The discriminant value for each event type is the value written to the event select register for a general purpose PMC.
pub enum EventType{
    /// This event counts the number of instructions at retirement. For instructions that consist of multiple micro-ops,
    /// this event counts the retirement of the last micro-op of the instruction.
    /// This is counted by IA32_FIXED_CTR0.
    InstructionsRetired = (0x03 << 16) | 0xC0,
    /// This event counts core clock cycles when the clock signal on a specific core is running (not halted).
    /// This is counted by IA32_FIXED_CTR1. 
    UnhaltedCoreCycles = (0x03 << 16) | 0x3C,
    /// This event counts reference clock cycles at a fixed frequency while the clock signal on the core is running. 
    /// The event counts at a fixed frequency, irrespective of core frequency changes due to performance state transitions. 
    /// Current implementations use the TSC clock.
    /// This is counted by IA32_FIXED_CTR2. 
    UnhaltedReferenceCycles = (0x03 << 16) | (0x01 << 8) | 0x3C,
    /// This event counts requests originating from the core that reference a cache line in the last level on-die cache.
    /// The event count includes speculation and cache line fills due to the first-level cache hardware prefetcher, but
    /// may exclude cache line fills due to other hardware-prefetchers.
    LastLevelCacheReferences = (0x03 << 16) | (0x4F << 8) | 0x2E,
    /// This event counts each cache miss condition for references to the last level on-die cache. The event count may
    /// include speculation and cache line fills due to the first-level cache hardware prefetcher, but may exclude cache
    /// line fills due to other hardware-prefetchers.
    LastLevelCacheMisses = (0x03 << 16) | (0x41 << 8) | 0x2E,
    /// This event counts branch instructions at retirement. It counts the retirement of the last micro-op of a branch instruction.
    BranchInstructionsRetired = (0x03 << 16) | 0xC4,
    /// This event counts mispredicted branch instructions at retirement. It counts the retirement of the last micro-op
    /// of a branch instruction in the architectural path of execution and experienced misprediction in the branch
    /// prediction hardware.
    BranchMissesRetired = (0x03 << 16) | 0xC5,
}

fn num_general_purpose_counters() -> u8 {
    *NUM_PMC
}

fn get_pmcs_available() -> Result<&'static Vec<AtomicU8>, &'static str>{
    PMCS_AVAILABLE.get().ok_or("pmu_x86: The variable storing the available counters for each core hasn't been initialized")
}

fn get_pmcs_available_for_core(core_id: u8) -> Result<&'static AtomicU8, &'static str>{
    let pmc = PMCS_AVAILABLE.get().ok_or("pmu_x86: The variable storing the available counters for each core hasn't been initialized")?;
    Ok(&pmc[core_id as usize])
}

/// Returns the maximum core id for this machine
fn max_core_id() -> Result<u8, &'static str> {
    let lapics = apic::get_lapics();
    let apic_id = lapics.iter()
        .max_by_key(|(apic_id, _)| apic_id.value())
        .map(|(apic_id, _)| *apic_id)
        .ok_or("pmu_x86: Could not find a maximum core id")?;
    let core_id_as_u8 = apic_id.value()
        .try_into()
        .unwrap_or_else(|_| panic!("couldn't convert ApicId {apic_id} into a u8"));
    Ok(core_id_as_u8)
}

/// Returns true if there are core ids that are larger than can be handled by the current PMU data structures
fn greater_core_id_than_expected() -> Result<bool, &'static str> {
    let max_core = max_core_id()? as usize;
    Ok(max_core >= CORES_SUPPORTED_BY_PMU)
}

/// Returns true if there are more general purpose PMCs than can be handled by the current PMU data structures
fn more_pmcs_than_expected(num_pmc: u8) -> Result<bool, &'static str> {
    Ok(num_pmc > PMCS_SUPPORTED_BY_PMU)
}


/// Initialization function that enables the PMU if one is available.
/// We initialize the 3 fixed PMCs and general purpose PMCs. Calling this initialization function again
/// on a core that has already been initialized will do nothing.
/// 
/// Currently we support a maximum core ID of 255, and up to 8 general purpose counters per core. 
/// A core ID greater than 255 is not supported in Theseus in general since the ID has to fit within a u8.
/// 
/// If the core ID limit is changed and we need to update the PMU data structures to support more cores then: 
/// - Increase WORDS_IN_BITMAP and CORES_SUPPORTED_BY_PMU as required. For example, the cores supported is 256 so there are 4 64-bit words in the bitmap, one bit per core. 
/// - Add additional AtomicU64 variables to the initialization of the CORES_SAMPLING and RESULTS_READY bitmaps. 
/// 
/// If the general purpose PMC limit is reached then: 
/// - Update PMCS_SUPPORTED_BY_PMU to the new PMC limit.
/// - Change the element type in the PMCS_AVAILABLE vector to be larger than AtomicU8 so that there is one bit per counter.
/// - Update INIT_PMCS_AVAILABLE to the new maximum value for the per core bitmap.
/// 
/// # Warning
/// This function should only be called after all the cores have been booted up.
pub fn init() -> Result<(), &'static str> {
    let mut cores_initialized = CORES_INITIALIZED.lock();
    let core_id = cpu::current_cpu().into_u8();

    if cores_initialized.contains(&core_id) {
        warn!("PMU has already been intitialized on core {}", core_id);
        return Ok(());
    } 
    else {

        if *PMU_VERSION >= MIN_PMU_VERSION {
                
            if greater_core_id_than_expected()? {
                return Err("pmu_x86: There are larger core ids in this machine than can be handled by the existing structures which store information about the PMU");
            }

            if more_pmcs_than_expected(*NUM_PMC)? {
                return Err("pmu_x86: There are more general purpose PMCs in this machine than can be handled by the existing structures which store information about the PMU");
            }

            PMCS_AVAILABLE.call_once(|| {
                // initialize the PMCS_AVAILABLE bitmap 
                let core_capacity = max_core_id().expect("cores have not been booted up and so cannot retrieve a max core id") as usize + 1;
                let mut pmcs_available = Vec::with_capacity(core_capacity);
                for _ in 0..core_capacity {
                    pmcs_available.push(AtomicU8::new(INIT_VAL_PMCS_AVAILABLE));
                } 
                trace!("PMU initialized for the first time: version: {} with fixed counters: {} and general counters: {}", *PMU_VERSION, *NUM_FIXED_FUNC_COUNTERS, *NUM_PMC);
                pmcs_available
            });
        }
        else {
            error!("This machine does not support a PMU");
            return Err("This machine does not support a PMU");
        }
        
        init_registers();
        cores_initialized.insert(core_id);
        trace!("PMU initialized on core {}", core_id);
    }
    
    Ok(())
}

/// Part of the initialization routine which actually does the work of setting up the registers.
/// This must be called for every core that wants to use the PMU.
fn init_registers() {
    unsafe {
        // disables all the performance counters
        Msr::new(IA32_PERF_GLOBAL_CTRL).write(0);
        // clear the general purpose PMCs
        Msr::new(IA32_PMC0).write(0);
        Msr::new(IA32_PMC1).write(0);
        Msr::new(IA32_PMC2).write(0);
        Msr::new(IA32_PMC3).write(0);
        // clear the fixed event counters
        Msr::new(IA32_FIXED_CTR0).write(0);
        Msr::new(IA32_FIXED_CTR1).write(0);
        Msr::new(IA32_FIXED_CTR2).write(0);
        // sets fixed function counters to count events at all privilege levels
        Msr::new(IA32_FIXED_CTR_CTRL).write(ENABLE_FIXED_COUNTERS_FOR_ALL_PRIVILEGE_LEVELS);
        // enables all counters: each counter has another enable bit in other MSRs so these should likely never be cleared once first set
        Msr::new(IA32_PERF_GLOBAL_CTRL).write(ENABLE_FIXED_PERFORMANCE_COUNTERS | ENABLE_GENERAL_PERFORMANCE_COUNTERS);
    }
}

/// A logical counter object to correspond to a physical PMC
pub struct Counter {
    /// Initial value stored in the counter before counting starts
    start_count: u64,
    /// value passed to rdpmc instruction to read counter value
    msr_mask: u32,
    /// General PMC register number. It is -1 for a fixed counter.
    pmc: i32,
    /// Core this PMC is a part of.
    core: u8,
}

impl Counter {
    /// Creates a Counter object and assigns a physical counter for it. 
    /// If it's a general PMC, writes the UMASK and Event Code to the relevant MSR, but leaves enable bit clear.  
    pub fn new(event: EventType) -> Result<Counter, &'static str> {
        // ensures PMU available and initialized
        check_pmu_availability()?;
        
        match event {
            EventType::InstructionsRetired => create_fixed_counter(FIXED_FUNC_0_RDPMC),
            EventType::UnhaltedCoreCycles => create_fixed_counter(FIXED_FUNC_1_RDPMC),
            EventType::UnhaltedReferenceCycles => create_fixed_counter(FIXED_FUNC_2_RDPMC),
            _ => create_general_counter(event as u64)	
        }
    }
    
    /// Starts the count.
    pub fn start(&mut self) -> Result<(), &'static str> {
        let num_pmc = num_general_purpose_counters();

        // for a fixed value counter (that's already enabled), simply saves the current value as the starting count
        if self.msr_mask > num_pmc as u32 {
            self.start_count = rdpmc(self.msr_mask);
        } 
        // for a general PMC, it enables the counter to start counting from 0
        else {
            self.start_count = 0;
            unsafe { 
                let umask = Msr::new(IA32_PERFEVTSEL0 + self.pmc as u32).read();
                Msr::new(IA32_PERFEVTSEL0 + self.pmc as u32).write(umask | PMC_ENABLE);
            }
        }
        Ok(())
    }
    
    /// Allows user to get count since start without stopping/releasing the counter.
    pub fn get_count_since_start(&self) -> Result<u64, &'static str> {
        let num_pmc = num_general_purpose_counters();

        //checks to make sure the counter hasn't already been released
        if self.msr_mask < num_pmc as u32 && counter_is_available(self.core, self.msr_mask as u8)? {
            return Err("Counter used for this event was marked as free, value stored is likely inaccurate.");
        }
        
        Ok(rdpmc(self.msr_mask) - self.start_count)
    }
    
    /// Stops counting, releases the counter, and returns the count of events since the counter was initialized.
    /// This will consume the counter object since after freeing the counter, the counter should not be accessed.
    pub fn end(self) -> Result<u64, &'static str> {
        let end_val = rdpmc(self.msr_mask);
        let start_count = self.start_count;
        drop(self);
        Ok(end_val - start_count)
    }

    /// lightweight function with no checks to get the counter value from when it was started.
    pub fn diff(&self) -> u64 {
        rdpmc(self.msr_mask) - self.start_count
    }
}

impl Drop for Counter {
    fn drop(&mut self) {
        let num_pmc = num_general_purpose_counters(); 

        // A programmable counter would be claimed at this point, so free it now so it can be used again.
        // Otherwise the counter is a fixed function counter and nothing needs to be done.
        if self.msr_mask < num_pmc as u32 {
            // clears event counting settings and counter 
            unsafe{
                Msr::new(IA32_PERFEVTSEL0 + self.msr_mask).write(0);
                Msr::new(IA32_PMC0 + self.msr_mask).write(0);
            }
            free_counter(self.core, self.msr_mask as u8); 
        }
    }

}

/// Returns true if the counter is not in use
fn counter_is_available(core_id: u8, counter: u8) -> Result<bool, &'static str> {
    let pmcs = get_pmcs_available_for_core(core_id)?;
    Ok(pmcs.load(Ordering::SeqCst).get_bit(counter as usize))
}

/// Sets the counter bit to indicate it is available
fn free_counter(core_id: u8, counter: u8) {
    let pmcs = get_pmcs_available_for_core(core_id).expect("Trying to free a PMU counter when the PMU is not initialized");

    pmcs.fetch_update(
        Ordering::SeqCst,
        Ordering::SeqCst,
        |mut x| {
            if !x.get_bit(counter as usize) {
                x.set_bit(counter as usize, true);
                Some(x)
            }
            else {
                None
            }
        }
    ).unwrap_or_else(|x| { 
        warn!("The counter you are trying to free has been previously freed"); 
        x
    });
}

/// Clears the counter bit to show it's in use
fn claim_counter(core_id: u8, counter: u8) -> Result<(), &'static str> {
    let pmcs = get_pmcs_available_for_core(core_id)?;

    pmcs.fetch_update(
        Ordering::SeqCst,
        Ordering::SeqCst,
        |mut x| {
            if x.get_bit(counter as usize) {
                x.set_bit(counter as usize, false);
                Some(x)
            }
            else {
                None
            }
        }
    ).map_err(|_e| "pmu_x86: Could not claim counter because it is already in use")?;

    Ok(())
}

/// Frees all counters and make them available to be used.
/// Essentially sets the PMU to its initial state.
pub fn reset_pmu() -> Result<(), &'static str> {
    for pmc in get_pmcs_available()?.iter() {
        pmc.store(INIT_VAL_PMCS_AVAILABLE, Ordering::SeqCst);
    }
    for core in CORES_SAMPLING.iter() {
        core.store(0, Ordering::SeqCst);
    }
    for result in RESULTS_READY.iter() {
        result.store(0, Ordering::SeqCst);
    }
    SAMPLING_INFO.lock().clear();

    Ok(())
}

/// Returns the word position and the bit in the word for the given bit number
/// which will be used to access the bitmap. 
fn find_word_and_offset_from_bit(bit_num: u8) -> (usize, usize) {
    let total_bits_in_word = core::mem::size_of::<u64>() * 8;
    let word_num = bit_num as usize / total_bits_in_word;
    let bit_in_word = bit_num as usize % total_bits_in_word;
    (word_num, bit_in_word)
}

/// Adds the core to the list of those where PMU sampling is underway
/// by setting the core_id bit
fn add_core_to_sampling_list(core_id: u8) -> Result<(), &'static str> {
    let (word_num, bit_in_word) = find_word_and_offset_from_bit(core_id);

    CORES_SAMPLING[word_num].fetch_update(
        Ordering::SeqCst,
        Ordering::SeqCst,
        |mut x| {
            if !x.get_bit(bit_in_word) {
                x.set_bit(bit_in_word, true);
                Some(x)
            }
            else{
                None
            }
        }
    ).map_err(|_e| "pmu_x86: could not add core to sampling list since sampling is already started on this core")?;

    Ok(())
}

/// Removes the core from the list of those where PMU sampling is underway
/// by clearing the core_id bit
fn remove_core_from_sampling_list(core_id: u8) -> Result<(), &'static str> {
    let (word_num, bit_in_word) = find_word_and_offset_from_bit(core_id);

    CORES_SAMPLING[word_num].fetch_update(
        Ordering::SeqCst,
        Ordering::SeqCst,
        |mut x| {
            if x.get_bit(bit_in_word) {
                x.set_bit(bit_in_word, false);
                Some(x)
            }
            else {
                None
            }
        }
    ).map_err(|_e| "pmu_x86: could not remove core from sampling list since sampling has already finished on this core")?;

    Ok(())
}

fn core_is_currently_sampling(core_id: u8) -> bool {
    let (word_num, bit_in_word) = find_word_and_offset_from_bit(core_id);
    CORES_SAMPLING[word_num].load(Ordering::SeqCst).get_bit(bit_in_word)
}

/// Notify that sampling results are ready to be retrieved by setting the core_id bit
fn notify_sampling_results_are_ready(core_id: u8) -> Result<(), &'static str> {
    let (word_num, bit_in_word) = find_word_and_offset_from_bit(core_id);

    RESULTS_READY[word_num].fetch_update(
        Ordering::SeqCst,
        Ordering::SeqCst,
        |mut x| {
            if !x.get_bit(bit_in_word) {
                x.set_bit(bit_in_word, true);
                Some(x)
            }
            else {
                None
            }
        }
    ).map_err(|_e|"pmu_x86: could not add core to results ready list as sampling results have already been added")?;

    Ok(())
}

/// Notify that sampling results have been retrieved by clearing the core_id bit
fn sampling_results_have_been_retrieved(core_id: u8) -> Result<(), &'static str> {
    let (word_num, bit_in_word) = find_word_and_offset_from_bit(core_id);

    RESULTS_READY[word_num].fetch_update(
        Ordering::SeqCst,
        Ordering::SeqCst,
        |mut x| {
            if x.get_bit(bit_in_word) {
                x.set_bit(bit_in_word, false);
                Some(x)
            }
            else {
                None
            }
        }
    ).map_err(|_e| "pmu_x86: could not remove core from results ready list as sampling results have already been retrieved")?;

    Ok(())
}

fn sampling_results_are_ready(core_id: u8) -> bool {
    let (word_num, bit_in_word) = find_word_and_offset_from_bit(core_id);
    RESULTS_READY[word_num].load(Ordering::SeqCst).get_bit(bit_in_word)
}

/// Creates a counter object for a general purpose PMC given the event type.
fn create_general_counter(event_mask: u64) -> Result<Counter, &'static str> {
    programmable_start(event_mask)
}

/// Does the work of iterating through programmable counters and using whichever one is free. Returns Err if none free
fn programmable_start(event_mask: u64) -> Result<Counter, &'static str> {
    let my_core = cpu::current_cpu().into_u8();
    let num_pmc = num_general_purpose_counters();

    for pmc in 0..num_pmc {
        //If counter i on this core is currently in use, checks the next counter
        if !counter_is_available(my_core, pmc)? {
            continue;
        }
        //Claims the counter using the AtomicMap and writes the values to initialize counter (except for enable bit)
        claim_counter(my_core, pmc)?;

        unsafe{
            Msr::new(IA32_PMC0 + (pmc as u32)).write(0);
            Msr::new(IA32_PERFEVTSEL0 + (pmc as u32)).write(event_mask);
        }
        return Ok(Counter {
            start_count: 0, 
            msr_mask: pmc as u32, 
            pmc: pmc as i32, 
            core: my_core
        });
    }
    Err("All programmable counters currently in use.")
}

/// Creates a counter object for a fixed hardware counter
fn create_fixed_counter(msr_mask: u32) -> Result<Counter, &'static str> {
    let my_core = cpu::current_cpu().into_u8();
    let count = rdpmc(msr_mask);
    
    Ok(Counter {
        start_count: count, 
        msr_mask, 
        pmc: -1, 
        core: my_core
    })
}

/// Checks that the PMU has been initialized. If it has been,
/// the version ID tells whether the system has performance monitoring capabilities. 
fn check_pmu_availability() -> Result<(), &'static str>  {
    let core_id = cpu::current_cpu().into_u8();
    if !CORES_INITIALIZED.lock().contains(&core_id) {
        if *PMU_VERSION >= MIN_PMU_VERSION {
            error!("PMU version {} is available. It still needs to be initialized on this core", *PMU_VERSION);
            return Err("PMU is available. It still needs to be initialized on this core");
        }
        else {
            error!("This machine does not support a PMU");
            return Err("This machine does not support a PMU");
        }
    }

    Ok(())
}



/// The information stored for each core when event based sampling is in progress
struct SampledEvents{
    start_value: usize,
    task_id: usize,
    sample_count: u32,
    ip_list: Vec<VirtAddr>,
    task_id_list: Vec<usize>,
}

impl SampledEvents {
    pub fn with_capacity(capacity: usize) -> SampledEvents {
        SampledEvents {
            start_value: 0,
            task_id: 0,
            sample_count: 0,
            ip_list: Vec::with_capacity(capacity),
            task_id_list: Vec::with_capacity(capacity),
        }
    }
}

/// Start interrupt process in order to take samples using the PMU. 
/// It loads the starting value as such that an overflow will occur at "event_per_sample" events. 
/// That overflow triggers an interrupt where information about the current running task is sampled.
/// 
/// # Arguments
/// * `event_type`: determines type of event that sampling interval is based on
/// * `event_per_sample`: how many of those events should occur between each sample 
/// * `task_id`: allows the user to choose to only sample from a specific task by inputting a number or sample from all tasks by inputting None 
/// * `sample_count`: specifies how many samples should be recorded. 
/// 
/// # Note
/// The function clears any previous values from sampling stored for this core, so values must be retrieved before starting a new sampling process.
/// Currently can only sample "this" core - core that the function was called on. 
/// 
pub fn start_samples(event_type: EventType, event_per_sample: u32, task_id: Option<usize>, sample_count: u32) -> Result<(),&'static str> {        
    check_pmu_availability()?;

    // perform checks to ensure that counter is ready to use and that previous results are not being unintentionally discarded
    let my_core_id = cpu::current_cpu().into_u8();

    trace!("start_samples: the core id is : {}", my_core_id);

    // If counter 0 is currently in use (false), then return 
    // PMC0 is always used for sampling, but is not reserved for it.
    if !counter_is_available(my_core_id, 0)? {
        return Err("PMU counter 0 is currently in use and can't be used for sampling. End all other PMU tasks and try again");
    }

    // set the counter as in use
    claim_counter(my_core_id, 0)?;

    // check to make sure that sampling is not already in progress on this core
    // this should never happen since the counter would not be available for use
    if core_is_currently_sampling(my_core_id) {
        return Err("Sampling is already being performed on this CPU.");
    }
    else {
        // check if there are previous sampling results that haven't been retrieved
        if sampling_results_are_ready(my_core_id) {
            return Err("Sample results from previous test have not yet been retrieved. Please use retrieve_samples() function or 
            set RESULTS_READY AtomicBool for this cpu as false to indicate intent to discard sample results.");
        }
        add_core_to_sampling_list(my_core_id)?;
    }
    
    let mut sampled_events = SampledEvents::with_capacity(sample_count as usize);

    // if a task_id was requested, sets the value for the interrupt handler to later read
    if let Some(task_id_num) = task_id {
        sampled_events.task_id = task_id_num;
    }

    sampled_events.sample_count = sample_count;

    if event_per_sample == 0 {
        return Err("Number of events per sample invalid: must be nonzero");
    }
    // This check can never trigger since `event_per_sample` is a `u32`
    // and is therefore by definition in the range `u32::MIN..=u32::MAX`.
    // We'll check anyways, just in case `event_per_sample`'s type is changed.
    #[allow(clippy::useless_conversion)]
    if u32::try_from(event_per_sample).is_err() {
        return Err("Number of events per sample invalid: must be within unsigned 32 bit");
    }

    let start_value = core::u32::MAX - event_per_sample;
    // SAMPLE_START_VALUE.store(start_value as usize, Ordering::SeqCst);
    sampled_events.start_value = start_value as usize;

    // add this sampled events to the atomic map
    SAMPLING_INFO.lock().insert(my_core_id, sampled_events);

    // selects the appropriate mask for the event type and starts the counter
    let event_mask = event_type as u64;

    unsafe{
        Msr::new(IA32_PMC0).write(start_value as u64);
        Msr::new(IA32_PERFEVTSEL0).write(event_mask | PMC_ENABLE | INTERRUPT_ENABLE);
    }

    Ok(())

}

/// Function to manually stop the sampling interrupts. Marks the stored instruction pointers and task IDs as ready to retrieve. 
fn stop_samples(core_id: u8, samples: &mut SampledEvents) -> Result<(), &'static str> {
    // immediately stops counting and clears the counter
    unsafe{
        Msr::new(IA32_PERFEVTSEL0).write(0);
        Msr::new(IA32_PMC0).write(0);
        Msr::new(IA32_PERF_GLOBAL_OVF_CTRL).write(CLEAR_PERF_STATUS_MSR);
    }

    // clears values so that even if exception is somehow triggered, it stops at the next iteration
    samples.start_value = 0;
    samples.task_id = 0; 
    samples.sample_count = 0;

    // marks core as no longer sampling and results as ready
    remove_core_from_sampling_list(core_id)?;
    notify_sampling_results_are_ready(core_id)?;
    free_counter(core_id, 0);

    trace!("Stopped taking samples with the PMU");
    
    Ok(())
}

/// Stores the instruction pointers and corresponding task IDs from the samples
pub struct SampleResults {
    pub instruction_pointers: Vec<memory::VirtualAddress>,
    pub task_ids:  Vec<usize>,
}

/// Returns the samples that were stored during sampling in the form of a SampleResults object. 
/// If samples are not yet finished, forces them to stop.  
pub fn retrieve_samples() -> Result<SampleResults, &'static str> {
    let my_core_id = cpu::current_cpu().into_u8();

    let mut sampling_info = SAMPLING_INFO.lock();
    let samples = sampling_info.get_mut(&my_core_id).ok_or("pmu_x86::retrieve_samples: could not retrieve sampling information for this core")?;

    // the interrupt handler might have stopped samples already so thsi check is required
    if core_is_currently_sampling(my_core_id) {
        stop_samples(my_core_id, samples)?;
    }
    
    sampling_results_have_been_retrieved(my_core_id)?;

    let mut instruction_pointers = Vec::with_capacity(samples.ip_list.len());
    instruction_pointers.extend(samples.ip_list.iter().map(|va| memory::VirtualAddress::new_canonical(va.as_u64() as usize)));

    Ok(SampleResults { instruction_pointers, task_ids: samples.task_id_list.clone() })   
}

/// Simple function to print values from SampleResults in a form that the script "post-mortem pmu analysis.py" can parse. 
pub fn print_samples(sample_results: &SampleResults) {
    debug!("Printing Samples:");
    for results in sample_results.instruction_pointers.iter().zip(sample_results.task_ids.iter()) {
        debug!("{:x} {}", results.0, results.1);
    }
}

/// Finds the corresponding function for each instruction pointer and calculates the percentage amount each function occured in the samples
pub fn find_function_names_from_samples(sample_results: &SampleResults) -> Result<(), &'static str> {
    let namespace = task::with_current_task(|f| f.get_namespace().clone())
        .map_err(|_| "pmu_x86::get_function_names_from_samples: couldn't get current task")?;
    debug!("Analyze Samples:");

    let mut sections: BTreeMap<String, usize> = BTreeMap::new();
    let total_samples = sample_results.instruction_pointers.len();

    for ip in sample_results.instruction_pointers.iter() {
        let (section_ref, _offset) = namespace.get_section_containing_address(*ip, false)
            .ok_or("Can't find section containing sampled instruction pointer")?;
        let section_name = section_ref.name_without_hash().to_string();

        sections.entry(section_name).and_modify(|e| {*e += 1}).or_insert(1);
    }

    for (section, occurrences) in sections.iter() {
        let percentage = *occurrences as f32 / total_samples as f32 * 100.0;
        debug!("{:?}  {}% \n", section, percentage);
    }

    Ok(())
}

/// This function is designed to be invoked from an interrupt handler 
/// when a sampling interrupt has (or may have) occurred. 
///
/// It takes a sample by logging the the instruction pointer and task ID at the point
/// at which the sampling interrupt occurred. 
/// The counter is then either reset to its starting value 
/// (if there are more samples that need to be taken)
/// or disabled entirely if the final sample has been taken. 
///
/// # Argument
/// * `stack_frame`: the stack frame that was pushed onto the stack automatically 
///    by the CPU and passed into the interrupt/exception handler. 
///    This is used to determine during which instruction the sampling interrupt occurred.
///
/// # Return
/// * Returns `Ok(true)` if a PMU sample occurred and was handled. 
/// * Returns `Ok(false)` if PMU isn't supported, or if PMU wasn't yet initialized, 
///   or if there was not a pending sampling interrupt. 
/// * Returns an `Err` if PMU is supported and initialized and a sample was pending, 
///   but an error occurred while logging the sample.
///
pub fn handle_sample(stack_frame: &InterruptStackFrame) -> Result<bool, &'static str> {
    // Check that PMU hardware exists and is supported on this machine.
    if *PMU_VERSION < MIN_PMU_VERSION {
        return Ok(false);
    }
    // Check that a PMU sampling event is currently pending.
    if unsafe { Msr::new(IA32_PERF_GLOBAL_STAUS).read() } == 0 {
        return Ok(false);
    }

    unsafe { Msr::new(IA32_PERF_GLOBAL_OVF_CTRL).write(CLEAR_PERF_STATUS_MSR); }

    let my_core_id = cpu::current_cpu().into_u8();

    let mut sampling_info = SAMPLING_INFO.lock();
    let samples = sampling_info.get_mut(&my_core_id)
        .ok_or("pmu_x86::handle_sample: Could not retrieve sampling information for this core")?;

    let current_count = samples.sample_count;
    // if all samples have already been taken, calls the function to turn off the counter
    if current_count == 0 {
        stop_samples(my_core_id, samples)?; 
        return Ok(true);
    }

    samples.sample_count = current_count - 1;

    // if the running task is the requested one or if one isn't requested, records the IP
    task::with_current_task(|taskref| {
        let requested_task_id = samples.task_id;
        let task_id = taskref.id;
        if (requested_task_id == 0) | (requested_task_id == task_id) {
            samples.ip_list.push(stack_frame.instruction_pointer);
            samples.task_id_list.push(task_id);
        }
    }).unwrap_or_else(|_| {
        samples.ip_list.push(stack_frame.instruction_pointer);
        samples.task_id_list.push(0);
    });

    // stops the counter, resets it, and restarts it
    unsafe {
        Msr::new(IA32_PERFEVTSEL0).write(0);
        Msr::new(IA32_PERF_GLOBAL_OVF_CTRL).write(CLEAR_PERF_STATUS_MSR);
        Msr::new(IA32_PMC0).write(samples.start_value as u64);
        Msr::new(IA32_PERFEVTSEL0).write(Msr::new(IA32_PERFEVTSEL0).read());
    }

    if let Some(my_apic) = apic::get_my_apic() {
        my_apic.write().clear_pmi_mask();
    }
    else {
        error!("Error in Performance Monitoring! Reference to the local APIC could not be retrieved.");
    }

    Ok(true)
}


/// Reads the given PMC (performance monitor counter) register.
fn rdpmc(msr: u32) -> u64 {
    let (high, low): (u32, u32);
    unsafe {
        core::arch::asm!(
            "rdpmc",
            in("ecx") msr,
            out("eax") low, out("edx") high,
            options(nomem, nostack, preserves_flags),
        );
    }
    ((high as u64) << 32) | (low as u64)
}