$KCODE = 'u' require 'rubygems' require 'hpricot' require 'open-uri' require 'nkf' require 'uri' require 'timeout' require 'yaml' require 'optparse' class Starbucks attr_accessor :storeId attr_accessor :name attr_accessor :address attr_accessor :lat attr_accessor :lng attr_accessor :tel attr_accessor :open_at def initialize(id, name, lat, lng, addr, tel, op) @storeId = id @name = name @lat = lat @lng = lng @address = addr @tel = tel @open_at = op end def open_eng @open_at.gsub(/\s/, "") \ .gsub("定休日", "Reg.Holiday") \ .gsub("不定休", "unfixed") \ .gsub(/(曜日)|曜/, "") \ .gsub(/(祝日)|祝/, "Holiday") \ .gsub("〜", "-") \ .gsub("・", "") \ .gsub("は", ": ") \ .gsub("ドライブスルー", "DriveThru ") \ .gsub("/", "/ ") \ .gsub("月", "Mon.") \ .gsub("火", "Tue.") \ .gsub("水", "Wed.") \ .gsub("木", "Thu.") \ .gsub("金", "Fri.") \ .gsub("土", "Sat.") \ .gsub("日", "Sun.") end def open_jpn @open_at.gsub(/\s/, "") end def to_s "%d,%8.5f,%8.5f,%s,%s" % [@storeId, @lng, @lat, @name, @address] end def to_poi "%8.5f,%8.5f,\"Starbucks\", \"%s\"" % [@lng, @lat, open_eng()] end def to_poi_j "%8.5f,%8.5f,\"Starbucks %s\", \"%s\"" % [@lng, @lat, @name, open_jpn()] end end class StarbucksDownloader def initialize(lang, dl, verbose) @lang = lang @dl = dl @verbose = verbose end # Timeout時のretry付きでuriを開く def openURI(uri) retries = 5 begin timeout(30){ Hpricot.parse(NKF.nkf('-w', open(uri).read)) } rescue Timeout::Error retries -= 1 if retries > 0 sleep 5 and retry else raise end end end # 店舗数を取得 def get_total_no doc = openURI("http://www.starbucks.co.jp/search/result_store.php") result = (doc/"span.S").inner_text n = result.scan(/^(\d+)/).flatten[0].to_i; puts "Number of Starbucks(at glance) : #{n}" if @verbose n end # store IDの取得 def download_store_ids n_of_pages = get_total_no / 10 + 1 # サイトのページ数 storeIds = [] n_of_pages.times do |n| puts "Processing page #{n + 1}" if @verbose uri = "http://www.starbucks.co.jp/search/result_store.php?SearchString=&DriveThrowgh=&Terrace=&HoleBean=&TakeOut=&storelist=#{n * 10 + 1}" doc = openURI(uri) (doc/:a).each do |a| if a[:href] =~ /storeId=\d+/ then id = a[:href].scan(/storeId=(\d+)/).flatten[0].to_i if id < 9000 then storeIds << id else puts "Dropped #{id}" if @verbose end end end end puts "Number of Starbucks : #{storeIds.size}" storeIds end # 改行や?などの記号を除去 def strip(str) str.gsub!("?", "") str.gsub("\n", "/") end # 日本測地系(秒単位)から世界測地系へ変換 def conv(ln, la) # 経度、緯度 (単位:度) lng = ln - la * 0.000046038 - ln * 0.000083043 + 0.010040; lat = la - la * 0.00010695 + ln * 0.000017464 + 0.0046017; [lng, lat] end # 店舗情報の取得 def download_store(id) uri = "http://www.starbucks.co.jp/search/map/result.php?storeId=#{id}&lang=ja" doc = openURI(uri) html = doc.to_original_html lng = html.scan(/reqX\s*=\s*(\d+\.\d+)/).flatten[0].to_f # 経度(reqX) lat = html.scan(/reqY\s*=\s*(\d+\.\d+)/).flatten[0].to_f # 緯度(reqY) lng /= 3600.0 # 秒->度 lat /= 3600.0 lng, lat = conv(lng, lat) name = strip(doc.at("th/[text()*='店舗名']").parent.next_sibling.to_plain_text) addr = strip(doc.at("th/[text()*='住所']").parent.next_sibling.to_plain_text) tel = strip(doc.at("th/[text()*='電話番号']").parent.next_sibling.to_plain_text) op = strip(doc.at("th/[text()*='営業時間']").parent.next_sibling.to_plain_text) Starbucks.new(id, name, lat, lng, addr, tel, op) end def download_stores storeIds = download_store_ids stores = [] storeIds.each do |id| begin s = download_store(id) stores << s rescue => e puts "Error in getting store #{id}. Skip..." end puts s.to_s if @verbose end stores end def get_stores if Dir::glob("starbucks.yaml").size > 0 && !@dl then YAML.load_file("starbucks.yaml") else stores = download_stores dump_yaml(stores) stores end end def dump_yaml(stores) YAML.dump(stores, File.open('starbucks.yaml', 'w')) end # POIファイルを出力 def dump_poi stores = get_stores open("starbucks_poi.csv", 'w') do |f| if @lang == :english stores.each do |s| f.puts NKF.nkf('-s', s.to_poi) end else stores.each do |s| f.puts NKF.nkf('-s', s.to_poi_j) end end end end end lang = :english dl = false verbose = false OptionParser.new {|opt| opt.on('-v', 'verbose mode') {verbose = true} opt.on('-j', 'output japanese') {lang = :japanese} opt.on('-f', 'download info. instead of existing yaml file') {dl = true} }.parse!(ARGV) sd = StarbucksDownloader.new(lang, dl, verbose) sd.dump_poi