Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
21 changes: 21 additions & 0 deletions blog/aduana/bwin_aduana/bwin_aduana/items.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-

# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html

from scrapy import Item, Field

class PlayerItem(Item):
name = Field()
odds = Field()

def serialize_players(players):
return map(lambda x: dict(x), players)

class EventItem(Item):
league = Field()
time = Field()
date = Field()
players = Field(serializer=serialize_players)
11 changes: 11 additions & 0 deletions blog/aduana/bwin_aduana/bwin_aduana/pipelines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html


class BwinAduanaPipeline(object):
def process_item(self, item, spider):
return item
17 changes: 17 additions & 0 deletions blog/aduana/bwin_aduana/bwin_aduana/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-

# Scrapy settings for bwin_aduana project
#
# For simplicity, this file contains only the most important settings by
# default. All the other settings are documented here:
#
# http://doc.scrapy.org/en/latest/topics/settings.html
#

BOT_NAME = 'bwin_aduana'

SPIDER_MODULES = ['bwin_aduana.spiders']
NEWSPIDER_MODULE = 'bwin_aduana.spiders'

# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'bwin_aduana (+http://www.yourdomain.com)'
4 changes: 4 additions & 0 deletions blog/aduana/bwin_aduana/bwin_aduana/spiders/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# This package will contain the spiders of your Scrapy project
#
# Please refer to the documentation for information on how to create and manage
# your spiders.
42 changes: 42 additions & 0 deletions blog/aduana/bwin_aduana/bwin_aduana/spiders/bwin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
import scrapy
from scrapy.http import FormRequest

from bwin_aduana.items import PlayerItem, EventItem

class BwinSpider(scrapy.Spider):
name = "bwin"
allowed_domains = ["bwin.com", "sports.bwin.com"]

def start_requests(self):
return [ FormRequest('https://sports.bwin.com/en/sports/indexmultileague',
formdata={ 'sportId': '5', 'page': '1' },
callback=self.parse) ]

def get_players(self, text):
players = []
for td in text.css('table.options td'):
player = PlayerItem()
player['odds'] = td.css('.odds::text').extract_first()
player['name'] = td.css('.option-name::text').extract_first()
players.append(player)
return players

def get_events(self, text):
events = []
league = text.xpath('h2//a[@class="league-link"]/text()').extract()
for li in text.css('ul li'):
event = EventItem()
event['time'] = li.xpath('h6//span[1]/text()').extract_first()
event['date'] = li.xpath('h6//span[2]/text()').extract_first()
event['players'] = self.get_players(li)
events.append(event)
return events

def parse(self, response):
leagues = response.xpath('//div[@id="bet-offer"]//div[@id="international-highlights"]//div//ul//li')
events = []
for league in leagues:
events.extend(self.get_events(league))

return events
11 changes: 11 additions & 0 deletions blog/aduana/bwin_aduana/scrapy.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Automatically created by: scrapy startproject
#
# For more information about the [deploy] section see:
# http://doc.scrapy.org/en/latest/topics/scrapyd.html

[settings]
default = bwin_aduana.settings

[deploy]
#url = http://localhost:6800/
project = bwin_aduana