
 3 years ago
source link: http://maskray.me/blog/2011-12-24-net-lesson
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.


用网页来看网络学堂的公告很费时间,就打算像 rss2email 那样,把消息制作成邮件。
Rubymechanize 来和网站交互,读取“课程公告”和“课程文件”中的消息,
把链接 sha1 后判断是否生成过提示邮件,没有则跟踪链接,用 w3m 输出成纯文本,
生成的邮件用 sendmail 投递。

require 'base64'
require 'date'
require 'digest/sha1'
require 'etc'
require 'mechanize'
require 'set'
agent = Mechanize.new
agent.max_history = 1
conf_dir = File.expand_path('~/.net_lesson')
unless File.directory?(conf_dir) && File.file?(File.join(conf_dir, 'passwd'))
STDERR.puts 'echo [userid] [passwd] > ~/.net_lesson/passwd'
exit 1
userid, passwd = File.open(File.join(conf_dir, 'passwd'), 'r') {|f| f.gets.split }
feeds = Set.new
File.open(File.join(conf_dir, 'feeds.dat'), 'r:binary') do |f|
while (h = f.read 20)
feeds.add h
rescue EOFError
new_feeds = []
puts "loaded #{feeds.size} feeds" unless feeds.empty?
page = agent.get('http://learn.tsinghua.edu.cn/')
form = page.form('form1')
form.field_with(:name => 'userid').value = userid
form.field_with(:name => 'userpass').value = passwd
puts 'login'
page = agent.get('http://learn.tsinghua.edu.cn/MultiLanguage/lesson/student/MyCourse.jsp?language=cn')
page.links_with(:href => /course_locate.jsp/).each do |lesson|
lesson_name = lesson.text.gsub(/\s/, '').sub(/\(.*/, '')
page = lesson.click
puts "checking #{lesson_name}"
['getnoteid_student.jsp', 'download.jsp'].collect do |uri|
download = uri == 'download.jsp'
page2 = page.link_with(:href => /#{uri}/).click
page2.links_with(:href => /note_reply|filePath/).each do |note|
h = Digest::SHA1.digest note.href
next if feeds.member? h
puts " found #{note.text.strip}"
author = (download ? 'file ' : '') + note.node.xpath("../following-sibling::td")[-2].text
IO.popen(['/usr/sbin/sendmail', Etc.getlogin], 'w') do |f|
bar = download ? "[#{lesson_name}]" : "*#{lesson_name}*"
time = Date.parse(note.node.xpath("../following-sibling::td")[-1].text).strftime '%a, %d %b %Y 00:00:00 +0800'
From: #{author} <#{Etc.getlogin}>
Subject: =?utf-8?B?#{Base64.strict_encode64("#{bar} #{note.text.strip}")}?=
Date: #{time}
User-Agent: net_lesson
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: binary
if download
f.puts note.text
IO.popen(['w3m', '-dump', '-T', 'text/html'], 'r+') do |p|
f.puts p.read
f.puts "\nURI: #{page2.uri.merge URI.escape(note.href, /[\u4E00-\u9FFF]/)}"
new_feeds << h
unless new_feeds.empty?
puts "appending to feeds.dat"
File.open(File.join(conf_dir, 'feeds.dat'), 'a:binary') {|f| new_feeds.each {|a| f.write(a) } }



#encoding: utf-8
require 'open-uri'
require 'nokogiri'
require 'rss'
s = open('http://oars.tsinghua.edu.cn/zzh/30630.nsf/infobytime?openview').read
m = Nokogiri.parse(s).xpath('//script/text()')[0].text.match(/(?<= location\.replace\(")[^"]*(?=")/)
s = open("http://oars.tsinghua.edu.cn#{m[0]}").read
s = s.force_encoding('gbk').encode 'utf-8'
d = Nokogiri.parse(s)
rss = RSS::Maker.make("atom") do |maker|
maker.channel.author = ''
maker.channel.about = ''
maker.channel.updated = Time.now.to_s
maker.channel.title = '教务通知'
d.xpath('//tr[contains(@valign, "top")]').each {|tr|
tds = tr.search('td')
next if tds.size != 4
maker.items.new_item {|item|
#item.link = tds[2].search('a')[1].attr 'href'
item.link = (URI('http://oars.tsinghua.edu.cn') + tds[2].search('a')[1].attr('href')).to_s
item.title = tds[2].text
item.updated = Time.parse tds[3].text
File.write '/tmp/教务通知.rss', rss.to_s
rss = RSS::Maker.make("atom") do |maker|
maker.channel.author = ''
maker.channel.about = ''
maker.channel.updated = Time.now.to_s
maker.channel.title = '重要通知'
t = Time.now
d = Nokogiri::HTML open('http://info.tsinghua.edu.cn/html/view/notice_beforelogin.htm')
d.xpath('//td').each {|td|
next if td.children.size != 2
a = td.search('a')[0]
maker.items.new_item {|item|
item.link = (URI('http://oars.tsinghua.edu.cn') + a.attr('href')).to_s
item.title = a.text
item.updated = t
File.write '/tmp/重要通知.rss', rss.to_s


% fcrontab -l
@ 1h /home/ray/bin/教务通知.rb 2>> /tmp/stderr


Share Comments

About Joyk

Aggregate valuable and interesting links.
Joyk means Joy of geeK