diff options
Diffstat (limited to 'mubi-to-csv')
-rwxr-xr-x | mubi-to-csv | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/mubi-to-csv b/mubi-to-csv new file mode 100755 index 0000000..08cea8e --- /dev/null +++ b/mubi-to-csv @@ -0,0 +1,29 @@ +#!/usr/bin/env ruby +# Copyright (C) 2020 Harald Eilertsen +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +require 'nokogiri' +require 'time' + +puts "title,director,year,date,rating" +doc = Nokogiri::HTML(IO.read(ARGV[0])) +films = doc.search(".film-tile").each do |f| + title = f.search(".film-title").text.strip + director, _, year = f.search(".director-year").text.strip.gsub(/\s{2,}/, '').gsub(/\n/,'').rpartition(',') + rating = f.search(".film-actions__rating").attr("rating") + ts = f.search(".view-log__viewing-info").attr("data-timestamp") + + puts %Q{"#{title}","#{director}",#{year},#{Time.at(ts.text.to_i).to_date.iso8601},#{rating.text + "/5" unless rating.nil?}} +end |