require 'yajl'
require 'em-http'
require 'em-http/middleware/json_response'
$latest_event_id = 0;
$base_api_url = "https://内部github链接";
$push_event_count = 0
$push_event_commit_count = 0
$create_event_repo_count = 0
$create_event_branch_count = 0
$create_event_tag_count = 0
EM.run do
stop = Proc.new do
puts "Terminating event crawler"
puts "Following events occurs during monitor:"
puts "Push #{$push_event_count}"
puts "Commit #{$push_event_commit_count}"
puts "Repo creation #{$create_event_repo_count}"
puts "Branch creation #{$create_event_branch_count}"
puts "Tag creation #{$create_event_tag_count}"
EM.stop
end
Signal.trap("INT", &stop)
Signal.trap("TERM", &stop)
process = Proc.new do
req = EM::HttpRequest.new($base_api_url + "/events").get
req.callback do
latest_page = Yajl::Parser.parse(req.response)
new_events = latest_page.reject { |e| e['id'].to_i <= $latest_event_id }
new_events_id = latest_page.collect { |e| e['id'].to_i }
$latest_event_id = new_events_id.max
new_events.each do |event|
if event['type'] == "PushEvent" then
$push_event_count += 1
$push_event_commit_count += event['payload']['size']
end
if event['type'] == "CreateEvent" then
case event['payload']
when "repository" then $create_event_repo_count += 1
when "branch" then $create_event_branch_count += 1
when "tag" then $create_event_tag_count += 1
end
end
end
puts "Found #{new_events.size} new events"
if new_events.size >= 25
EM.add_timer(1.5, &process)
end
end
end
EM.add_periodic_timer(12, &process)
end
2012/07/24
一个简单的github event爬虫
最近在考虑用cacti对现有的application做一些简单的monitor。公司内部有一套github供developer使用。针对github的监控主要侧重于对push,commit,create行为的监控。
今天迈出第一步,照猫画虎的用ruby里的eventmachine写了一个抓github event的小爬虫,数据处理部分以后慢慢设计。
代码如下:
订阅:
博文评论 (Atom)
so good !!!!!!!!!!! bro.
回复删除