diff options
author | Harald Eilertsen <haraldei@anduin.net> | 2019-05-08 11:53:14 +0200 |
---|---|---|
committer | Harald Eilertsen <haraldei@anduin.net> | 2019-05-08 11:53:14 +0200 |
commit | 5c258209dc3f0c175d6338a3c658d7a8e01aedc0 (patch) | |
tree | ebdc76a19144b0eb5c44728a587ef3cae5822be9 /src/lib.rs | |
parent | d1fdebb68c2818b224b092d4c265394f3fbdcb5d (diff) | |
download | phisher-5c258209dc3f0c175d6338a3c658d7a8e01aedc0.tar.gz phisher-5c258209dc3f0c175d6338a3c658d7a8e01aedc0.tar.bz2 phisher-5c258209dc3f0c175d6338a3c658d7a8e01aedc0.zip |
Help deserializer turn numbers and bools into their proper types.
The PhishTank dataset encodes the numeric `phish_id` as a string,
and boolean values as "yes" or "no". The default json deserializer
from serde isn't able to deserialize these values into their proper
types, so we have to helo it a bit.
Diffstat (limited to 'src/lib.rs')
-rw-r--r-- | src/lib.rs | 47 |
1 files changed, 41 insertions, 6 deletions
@@ -15,10 +15,12 @@ // along with this program. If not, see <https://www.gnu.org/licenses/>. use chrono::{ DateTime, Utc }; -use serde::Deserialize; +use serde::{ Deserialize, Deserializer }; use serde_json; -use std::io::{ BufRead, Result }; +use std::io::{ self, BufRead }; use std::net::IpAddr; +use std::result::Result; +use std::str::FromStr; #[derive(Deserialize)] pub struct PhishTank { @@ -27,13 +29,18 @@ pub struct PhishTank { #[derive(Deserialize)] pub struct Phish { - pub phish_id: String, + #[serde(deserialize_with = "deserialize_number")] + pub phish_id: u64, pub url: String, pub phish_detail_url: String, pub submission_time: DateTime<Utc>, - pub verified: String, + + #[serde(deserialize_with = "deserialize_yesno")] + pub verified: bool, pub verified_time: Option<DateTime<Utc>>, - pub online: String, + + #[serde(deserialize_with = "deserialize_yesno")] + pub online: bool, pub target: String, pub details: Vec<PhishDetails>, } @@ -48,7 +55,35 @@ pub struct PhishDetails { pub detail_time: DateTime<Utc>, } -pub fn load_phistank<R: BufRead>(input: R) -> Result<PhishTank> { +pub fn load_phistank<R: BufRead>(input: R) -> Result<PhishTank, io::Error> { let phishes = serde_json::from_reader(input)?; Ok(PhishTank { phishes }) } + +// Helper function to deserialize a number represented as a string. +// +// The PhishTank dataset represents numbers this way, so the default +// json deserializer implementation will only deserialize to a string. +// +fn deserialize_number<'de, D>(d: D) -> Result<u64, D::Error> + where D: Deserializer<'de> +{ + let s = String::deserialize(d)?; + u64::from_str(&s).map_err(serde::de::Error::custom) +} + +// Helper function to deserialize "yes/no" into a bool. +// +// The PhishTank dataset uses the strings "yes" and "no" to represent +// boolean values. This function helps us deserialize back into a proper +// bool. +// +fn deserialize_yesno<'de, D>(d: D) -> Result<bool, D::Error> + where D: Deserializer<'de> +{ + match String::deserialize(d)?.as_ref() { + "yes" => Ok(true), + "no" => Ok(false), + _ => Err(serde::de::Error::custom("Expected \"yes\" or \"no\".")) + } +} |