From 34a69da36e1a4dd862a360d5bbef749f0f329d2e Mon Sep 17 00:00:00 2001 From: Harald Eilertsen Date: Thu, 11 Dec 2014 10:02:05 +0100 Subject: First commit. A simple ruby app to download the 78's from http://78records.cdbpdx.com/ Just because clicking on close to 6000 files is a bit too much. --- lib/seventy_eights/application.rb | 60 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 lib/seventy_eights/application.rb (limited to 'lib/seventy_eights/application.rb') diff --git a/lib/seventy_eights/application.rb b/lib/seventy_eights/application.rb new file mode 100644 index 0000000..51709a6 --- /dev/null +++ b/lib/seventy_eights/application.rb @@ -0,0 +1,60 @@ +require_relative 'record' +require 'nokogiri' +require 'open-uri' + +module SeventyEights + class Application + def initialize(site_url, target_dir) + @site = site_url + @target_dir = target_dir + @records = [] + end + + def run + read_site + parse + download_files + end + + private + + def read_site + puts "Opening #{@site}..." + @doc = Nokogiri::HTML(open(@site, :http_basic_authentication => %w{New090908 654321})) + end + + def parse + puts "Parsing..." + @doc.xpath('//tr').each do |row| + columns = row.xpath('td') + if columns.size >= 4 + link = columns[0].xpath('a/@href').text + title = columns[0].xpath('a/b').text + artist = columns[1].text + label = columns[2].text + catalog = columns[3].text + unless link.empty? + @records << Record.new(title, artist, label, catalog, URI.parse(link)) + end + end + end + end + + def download_files + puts "Downloading #{@records.size} tracks..." + i = 1 + @records.each do |r| + outfile = File.join(@target_dir, r.to_filename) + if File.exists?(outfile) + puts "Skipping #{i}: #{r.title} by #{r.artist}..." + else + puts "#{i}: #{r.title} by #{r.artist}..." + track = open(r.url, :http_basic_authentication => %w{New090908 654321}) + IO.write(outfile, track.read) + end + i += 1 + end + end + + end +end -- cgit v1.2.3