aboutsummaryrefslogblamecommitdiffstats
path: root/src/lib.rs
blob: 8f5660186401f29af7e3c720b5f53fb63b4677ea (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
















                                                                         
                                         
               
                               
                     

                        
 





                            

                                                     


                                       


                                                    
                                             


                                                    













                                   
                                                                            


                                                  
 






                                                                    


























                                                                         
// A program to scan emails for phishing links.
// Copyright (C) 2019  Harald Eilertsen
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <https://www.gnu.org/licenses/>.

use chrono::{ DateTime, Utc };
use serde::{ Deserialize, Deserializer };
use serde_json;
use std::io::{ self, BufRead };
use std::net::IpAddr;
use std::result::Result;
use std::str::FromStr;

pub struct PhishTank {
    pub phishes: Vec<Phish>,
}

#[derive(Deserialize)]
pub struct Phish {
    #[serde(deserialize_with = "deserialize_number")]
    pub phish_id: u64,
    pub url: String,
    pub phish_detail_url: String,
    pub submission_time: DateTime<Utc>,

    #[serde(deserialize_with = "deserialize_yesno")]
    pub verified: bool,
    pub verified_time: Option<DateTime<Utc>>,

    #[serde(deserialize_with = "deserialize_yesno")]
    pub online: bool,
    pub target: String,
    pub details: Vec<PhishDetails>,
}

#[derive(Deserialize)]
pub struct PhishDetails {
    pub ip_address: IpAddr,
    pub cidr_block: String,
    pub announcing_network: String,
    pub rir: String,
    pub country: String,
    pub detail_time: DateTime<Utc>,
}

pub fn load_phistank<R: BufRead>(input: R) -> Result<PhishTank, io::Error> {
    let phishes = serde_json::from_reader(input)?;
    Ok(PhishTank { phishes })
}


impl PhishTank {
    pub fn is_phish(&self, url: &str) -> bool {
        self.phishes.iter().find(|phish| phish.url == url).is_some()
    }
}

// Helper function to deserialize a number represented as a string.
//
// The PhishTank dataset represents numbers this way, so the default
// json deserializer implementation will only deserialize to a string.
//
fn deserialize_number<'de, D>(d: D) -> Result<u64, D::Error>
    where D: Deserializer<'de>
{
    let s = String::deserialize(d)?;
    u64::from_str(&s).map_err(serde::de::Error::custom)
}

// Helper function to deserialize "yes/no" into a bool.
//
// The PhishTank dataset uses the strings "yes" and "no" to represent
// boolean values. This function helps us deserialize back into a proper
// bool.
//
fn deserialize_yesno<'de, D>(d: D) -> Result<bool, D::Error>
    where D: Deserializer<'de>
{
    match String::deserialize(d)?.as_ref() {
        "yes" => Ok(true),
        "no" => Ok(false),
        _ => Err(serde::de::Error::custom("Expected \"yes\" or \"no\"."))
    }
}