Nom 是一个二进制解析框架。它提供了许多特性:

Nom 需要配合 impl Fn(Input) -> IResult<Remain, Output, Error> 的解析器工作。

大致来说,它的工作方式是,你告诉 nom 解析字节的某种匹配模式。 它会尝试从输入中解析尽可能多的内容,将其余的输入返回给你。

相比 serde ,它更注重于高速的二进制解析,而并不提供 encode/serialize 的功能。 它本身只提供了一个约定和一些工具集帮助我们解析。

小试

最近本人在尝试写一个系统检测器。了解到在 Linux 下提供了 /proc/stat 文件,此文件能给出内核运行信息。我们尝试解析一下它吧!

首先是一个数字解析器。btw nom 已经精简到任何解析相关的东西都要我们手动实现。

fn parse_u64(input: &[u8]) -> IResult<&[u8], u64> {
    map_res(digit1, |s| unsafe{core::str::from_utf8_unchecked(s)}
        .parse::<u64>())(input)
}

然后是一个分割器。在一些分割空格的场合十分有用

fn split(n: u8)->impl Fn(&[u8])->IResult<&[u8], &[u8]> {
    move |input| {
        match take_till(|b| b==n)(input){
            Ok((input, output)) => match take_while(|b| b==n)(input) {
                Ok((input, _)) => Ok((input, output)),
                Err(e) => Err(e),
            },
            Err(e) => Err(e),
        }
    }
}

随后就开始照着 官方文档 实现了。完整代码如下:

// ref: https://man7.org/linux/man-pages/man5/proc_stat.5.html

#[derive(Debug)]
pub struct ProcStat {
    pub total: CPUStat,
    pub per_cpu: Vec<CPUStat>,
    /// counts of interrupts serviced since boot time. only total is parsed
    pub intr: u64,
    /// The number of context switches that the system underwent.
    pub ctxt: u64,
    /// boot time, in seconds since the Epoch, 1970-01-01 00:00:00 +0000 (UTC).
    pub btime: u64,
    /// Number of forks since boot.
    pub processes: u64,
}

#[derive(Debug)]
pub struct CPUStat {
    /// Time spent in user mode.
    pub user: u64,
    /// Time spent in user mode with low priority (nice).
    pub nice: u64,
    /// Time spent in system mode.
    pub system: u64,
    /// Time spent in the idle task.
    pub idle: u64,
    /// Time waiting for I/O to complete. (may lower than actual value)
    pub iowait: u64,
    /// Time servicing interrupts.
    pub irq: u64,
    /// Time servicing softirqs.
    pub softirq: u64,
    /// Stolen time, which is the time spent in other operating systems when
    /// running in a virtualized environment
    pub steal: u64,
    /// Time spent running a virtual CPU for guest operating systems under the
    /// control of the Linux kernel.
    pub guest: u64,
    /// Time spent running a niced guest.
    pub guest_nice: u64,
}

use nom::{
    bytes::complete::{tag, take_till, take_while}, 
    character::{complete::digit1, is_space}, 
    combinator::map_res, 
    IResult,
};

fn parse_u64(input: &[u8]) -> IResult<&[u8], u64> {
    map_res(digit1, |s| unsafe{core::str::from_utf8_unchecked(s)}.parse::<u64>())(input)
}

fn split(n: u8)->impl Fn(&[u8])->IResult<&[u8], &[u8]> {
    move |input| {
        match take_till(|b| b==n)(input){
            Ok((input, output)) => match take_while(|b| b==n)(input) {
                Ok((input, _)) => Ok((input, output)),
                Err(e) => Err(e),
            },
            Err(e) => Err(e),
        }
    }
}

fn take_line(input: &[u8]) -> IResult<&[u8], &[u8]> {
    split(b'\n')(input)
}

// input a whole line
fn cpu_stat(input: &[u8])->IResult<&[u8], CPUStat> {
    macro_rules! gen_fields { ($($field:ident),*) => {
        fn inner(input: &[u8])->IResult<&[u8], CPUStat> {
            // check cpu
            let (input, _) = tag("cpu")(input)?;
            let (input, _) = split(b' ')(input)?;
            $(
            let (input, $field) = parse_u64(input)?;
            let (input, _) = take_while(is_space)(input)?;
            )*
            Ok((input, CPUStat {$($field,)*}))
        }
    };}
    gen_fields!(user, nice, system, idle, iowait, irq, softirq, steal, guest, guest_nice);
    inner(input)
}

fn proc_field<'a>(name: &'a str) -> impl Fn(&[u8])->IResult<&[u8], u64>+'a {
    move |input| {
        let _debug_name = name;
        let (input, _) = tag(name)(input)?;
        let (input, _) = take_while(is_space)(input)?;
        let _debug_input2 = unsafe{core::str::from_utf8_unchecked(input)};

        parse_u64(input)
    }
}

pub fn proc_stat(input: &[u8]) -> IResult<&[u8], ProcStat> {
    // will skip remain value in line
    fn try_field<O, F: Fn(&[u8])->IResult<&[u8], O>>(input: &[u8], f: F) -> IResult<&[u8], O> {
        let (mut input, mut line) = take_line(input)?;
        loop {
            match f(line) {
                Ok((_, val)) => {return Ok((input, val));}
                Err(e) => if input.len()==0 {return Err(e);}
            }
            (input, line) = take_line(input)?;
        }
    }

    fn try_field_many<O, F: Fn(&[u8])->IResult<&[u8], O>>(mut input: &[u8], f: F) -> IResult<&[u8], Vec<O>> {
        let mut v = Vec::new();
        loop {
            match f(input) {
                Ok((_, val)) => {
                    (input, _) = take_line(input)?;
                    v.push(val);
                }
                Err(_) => {break;}
            }
        }
        Ok((input, v))
    }
    macro_rules! gen_field {($($field:ident),*) => { 
        fn inner(input: &[u8]) -> IResult<&[u8], ProcStat> {
            let (input, total) = try_field(input, cpu_stat)?;
            let (input, per_cpu) = try_field_many(input, cpu_stat)?;
            $(let (input, $field) = try_field(input, proc_field(stringify!($field)))?;)*
            Ok((input, ProcStat{total, per_cpu, $($field),*}))
        }
    };}
    gen_field!(intr, ctxt, btime, processes);
    inner(input)
}

fn main() {
    let s = std::fs::read("/proc/stat").unwrap();
    let r = proc_stat(&s).unwrap().1;
    println!("{r:#?}");
}