-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrakefile
More file actions
127 lines (110 loc) · 3.73 KB
/
rakefile
File metadata and controls
127 lines (110 loc) · 3.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
namespace :db do
desc 'Run migrations'
task :migrate, [:version] do |t, args|
require 'sequel'
require 'dotenv'
Dotenv.load
Sequel.extension :migration
DBURL = ENV.fetch('DATABASE_URL')
db = Sequel.connect(DBURL)
if args[:version]
puts "Migrating to version #{args[:version]}"
Sequel::Migrator.run(db, "migrate", target: args[:version].to_i)
else
puts 'Migrating to latest'
Sequel::Migrator.run(db, 'migrate')
end
end
task :scrape do |t, args|
require_relative 'lib/neta_scraper.rb'
puts 'Started scraping...'
NetaScraper.scrape_all_mlas
NetaScraper.scrape_mps
puts 'Done scraping'
end
desc 'Scrape MPs for specific years (e.g., rake db:scrape_mps[2019,2024])'
task :scrape_mps, [:years] do |t, args|
require_relative 'lib/neta_scraper.rb'
require_relative 'models.rb'
years_to_scrape = if args[:years]
args[:years].split(',').map(&:strip)
else
# Scrape all available years if none specified
puts "No years specified, scraping all available..."
nil
end
puts "Starting to scrape MP data for: #{years_to_scrape || 'all available years'}"
urls = NetaScraper.mp_urls
urls.each do |year, url|
if years_to_scrape.nil? || years_to_scrape.include?(year)
puts "Checking year #{year}..."
# Check if already scraped to avoid duplicates
existing = MP.filter(year: year).count
if existing > 0
puts " - Year #{year} already has #{existing} records, skipping..."
puts " - To re-scrape, delete existing records first"
next
end
puts " - Scraping #{year}..."
NetaScraper.scrape_mp_year(year, url)
end
end
puts 'Done scraping MPs!'
end
desc 'Re-scrape MPs (deletes existing data first) for specific years (e.g., rake db:rescrape_mps[2024])'
task :rescrape_mps, [:years] do |t, args|
require_relative 'lib/neta_scraper.rb'
require_relative 'models.rb'
years_to_scrape = if args[:years]
args[:years].split(',').map(&:strip)
else
puts "ERROR: Please specify years to re-scrape"
puts "Usage: rake db:rescrape_mps[2024] or rake db:rescrape_mps[2019,2024]"
exit 1
end
puts "WARNING: This will DELETE existing data for: #{years_to_scrape.join(', ')}"
print "Are you sure? (yes/no): "
confirm = STDIN.gets.chomp
unless confirm.downcase == 'yes'
puts "Aborted."
exit 0
end
urls = NetaScraper.mp_urls
years_to_scrape.each do |year|
if urls.key?(year)
puts "Deleting existing MP data for #{year}..."
deleted = MP.filter(year: year).delete
puts " - Deleted #{deleted} records"
puts " - Scraping #{year}..."
NetaScraper.scrape_mp_year(year, urls[year])
else
puts " - Year #{year} not found in known URLs, skipping..."
end
end
puts 'Done re-scraping MPs!'
end
desc 'Show database statistics'
task :stats do
require_relative 'lib/db_stats'
DbStats.run
end
desc 'Open database console (IRB)'
task :console do
require_relative 'models.rb'
puts "Loading console with MP and MLA models..."
puts "Type 'MP' or 'MLA' to query, 'exit' to quit"
require 'irb'
ARGV.clear
IRB.start
end
end
require 'rspec/core/rake_task'
RSpec::Core::RakeTask.new(:spec) do |t|
t.pattern = Dir.glob('spec/**/*_spec.rb')
t.rspec_opts = '--format documentation'
end
# This is only for testing
task :hi, [:name] do |t, args|
desc 'Say hi'
puts "Hi #{args[:name]}"
end