aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib.rs
blob: 26be932adc7bac1407f42deeb877f5c59b6894ca (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
// A program to scan emails for phishing links.
// Copyright (C) 2019  Harald Eilertsen
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <https://www.gnu.org/licenses/>.

use chrono::{ DateTime, Utc };
use serde::{ Deserialize, Deserializer };
use serde_json;
use std::io::{ self, BufRead };
use std::net::IpAddr;
use std::result::Result;
use std::str::FromStr;

#[derive(Deserialize)]
pub struct PhishTank {
    pub phishes: Vec<Phish>,
}

#[derive(Deserialize)]
pub struct Phish {
    #[serde(deserialize_with = "deserialize_number")]
    pub phish_id: u64,
    pub url: String,
    pub phish_detail_url: String,
    pub submission_time: DateTime<Utc>,

    #[serde(deserialize_with = "deserialize_yesno")]
    pub verified: bool,
    pub verified_time: Option<DateTime<Utc>>,

    #[serde(deserialize_with = "deserialize_yesno")]
    pub online: bool,
    pub target: String,
    pub details: Vec<PhishDetails>,
}

#[derive(Deserialize)]
pub struct PhishDetails {
    pub ip_address: IpAddr,
    pub cidr_block: String,
    pub announcing_network: String,
    pub rir: String,
    pub country: String,
    pub detail_time: DateTime<Utc>,
}

pub fn load_phistank<R: BufRead>(input: R) -> Result<PhishTank, io::Error> {
    let phishes = serde_json::from_reader(input)?;
    Ok(PhishTank { phishes })
}

// Helper function to deserialize a number represented as a string.
//
// The PhishTank dataset represents numbers this way, so the default
// json deserializer implementation will only deserialize to a string.
//
fn deserialize_number<'de, D>(d: D) -> Result<u64, D::Error>
    where D: Deserializer<'de>
{
    let s = String::deserialize(d)?;
    u64::from_str(&s).map_err(serde::de::Error::custom)
}

// Helper function to deserialize "yes/no" into a bool.
//
// The PhishTank dataset uses the strings "yes" and "no" to represent
// boolean values. This function helps us deserialize back into a proper
// bool.
//
fn deserialize_yesno<'de, D>(d: D) -> Result<bool, D::Error>
    where D: Deserializer<'de>
{
    match String::deserialize(d)?.as_ref() {
        "yes" => Ok(true),
        "no" => Ok(false),
        _ => Err(serde::de::Error::custom("Expected \"yes\" or \"no\"."))
    }
}