61

Scrape your Dev.to pageviews with Rust

 5 years ago
source link: https://www.tuicool.com/articles/hit/3aame2n
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.

Here's a quick 'n' dirty way to dump your new-fangled post analytics to a CSV using Rust. You have to save the page source to src/page.html . Y'know, for graphs and stuff. Who doesn't like graphs?

This ain't polished - It was my "one-hour-before-my-day-job-starts" project today. Snag the regex for your own real version, or improve this one and show me!

extern crate chrono;
extern crate csv;
#[macro_use]
extern crate lazy_static;
extern crate regex;
extern crate select;
extern crate serde;
#[macro_use]
extern crate serde_derive;

use chrono::prelude::*;
use regex::Regex;
use select::{
    document::Document,
    predicate::{Class, Name},
};
use std::{
    error::Error,
    fs::{File, OpenOptions},
};

lazy_static! {
    static ref NOW: DateTime<Local> = Local::now();
    static ref STAT_RE: Regex = Regex::new(".+?([0-9]+).+//.?([0-9]+).+//.?([0-9]+).+").unwrap();
}

#[derive(Debug, Serialize)]
struct Record {
    time: String,
    title: String,
    views: i32,
    reactions: i32,
    comments: i32,
}

impl Record {
    fn new(time: String, title: String, views: i32, reactions: i32, comments: i32) -> Self {
        Self {
            time,
            title,
            views,
            reactions,
            comments,
        }
    }
}

fn write_entries(rs: Vec<Record>, f: File) -> Result<(), Box<Error>> {
    let mut wtr = csv::Writer::from_writer(f);
    for r in rs {
        wtr.serialize(r)?;
    }
    wtr.flush()?;
    Ok(())
}

fn scrape_page(doc: Document) -> Vec<Record> {
    let mut ret = Vec::new();
    for node in doc.find(Class("dashboard-pageviews-indicator")) {
        let text = node.text();
        if STAT_RE.is_match(&text) {
            let title = node
                .parent()
                .unwrap()
                .parent()
                .unwrap()
                .find(Name("a"))
                .next()
                .unwrap()
                .find(Name("h2"))
                .next()
                .unwrap()
                .text();
            for cap in STAT_RE.captures_iter(&text) {
                let r = Record::new(
                    NOW.to_rfc2822(),
                    title.clone(),
                    cap[1].parse::<i32>().unwrap(),
                    cap[2].parse::<i32>().unwrap(),
                    cap[3].parse::<i32>().unwrap(),
                );
                ret.push(r);
            }
        }
    }
    ret
}

fn main() {
    let doc = Document::from(include_str!("page.html"));
    let file = OpenOptions::new()
        .write(true)
        .create(true)
        .append(true)
        .open("stats.csv")
        .unwrap();
    let entries = scrape_page(doc);
    write_entries(entries, file).unwrap();
}

About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK