From 5c258209dc3f0c175d6338a3c658d7a8e01aedc0 Mon Sep 17 00:00:00 2001 From: Harald Eilertsen Date: Wed, 8 May 2019 11:53:14 +0200 Subject: Help deserializer turn numbers and bools into their proper types. The PhishTank dataset encodes the numeric `phish_id` as a string, and boolean values as "yes" or "no". The default json deserializer from serde isn't able to deserialize these values into their proper types, so we have to helo it a bit. --- src/lib.rs | 47 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/lib.rs b/src/lib.rs index 45add28..26be932 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,10 +15,12 @@ // along with this program. If not, see . use chrono::{ DateTime, Utc }; -use serde::Deserialize; +use serde::{ Deserialize, Deserializer }; use serde_json; -use std::io::{ BufRead, Result }; +use std::io::{ self, BufRead }; use std::net::IpAddr; +use std::result::Result; +use std::str::FromStr; #[derive(Deserialize)] pub struct PhishTank { @@ -27,13 +29,18 @@ pub struct PhishTank { #[derive(Deserialize)] pub struct Phish { - pub phish_id: String, + #[serde(deserialize_with = "deserialize_number")] + pub phish_id: u64, pub url: String, pub phish_detail_url: String, pub submission_time: DateTime, - pub verified: String, + + #[serde(deserialize_with = "deserialize_yesno")] + pub verified: bool, pub verified_time: Option>, - pub online: String, + + #[serde(deserialize_with = "deserialize_yesno")] + pub online: bool, pub target: String, pub details: Vec, } @@ -48,7 +55,35 @@ pub struct PhishDetails { pub detail_time: DateTime, } -pub fn load_phistank(input: R) -> Result { +pub fn load_phistank(input: R) -> Result { let phishes = serde_json::from_reader(input)?; Ok(PhishTank { phishes }) } + +// Helper function to deserialize a number represented as a string. +// +// The PhishTank dataset represents numbers this way, so the default +// json deserializer implementation will only deserialize to a string. +// +fn deserialize_number<'de, D>(d: D) -> Result + where D: Deserializer<'de> +{ + let s = String::deserialize(d)?; + u64::from_str(&s).map_err(serde::de::Error::custom) +} + +// Helper function to deserialize "yes/no" into a bool. +// +// The PhishTank dataset uses the strings "yes" and "no" to represent +// boolean values. This function helps us deserialize back into a proper +// bool. +// +fn deserialize_yesno<'de, D>(d: D) -> Result + where D: Deserializer<'de> +{ + match String::deserialize(d)?.as_ref() { + "yes" => Ok(true), + "no" => Ok(false), + _ => Err(serde::de::Error::custom("Expected \"yes\" or \"no\".")) + } +} -- cgit v1.2.3