[cvs] / lkrtweb / utils / scrape-functional.py  

cvs: lkrtweb/utils/scrape-functional.py

File: [cvs] / lkrtweb / utils / scrape-functional.py (download) (as text)
Revision: 1.1, Sun Apr 8 00:27:37 2007 UTC (6 years, 1 month ago) by tobias
Branch: MAIN
CVS Tags: HEAD
add functional data scraper util

#!/usr/bin/env python 

import re
from  urllib2 import urlopen
from lkrtweb.lkrt.models import KernelVersion, ClientMachine, Status, TestType, FunctionalTest

def geturl(url):
#	print "retrieving %s" % url
	f =  urlopen(url)
	result = f.read( )
	f.close()
	return result

class StringMatcher:
	def __init__(self, str):
		self.cur_pos = 0
		self.str = str

	def search(self, reobj):
		m = reobj.search(self.str, self.cur_pos)  
		if m != None:
			self.cur_pos = m.start() + len(m.group(0))
		return m 

 	def match(self, reobj):
		m = reobj.match(self.str, self.cur_pos) 
		if m != None:
			self.cur_pos = m.start() + len(m.group(0))
		return m 

def parse_job_page(db_kversion, host, url): 
	global func_test

	db_client = ClientMachine.objects.get(name=host)

	# fetch the specified results page
	jp = StringMatcher(geturl("http://test.kernel.org/functional/" + url))

	# parse it and save the data to the DB
	m = jp.search(func_test)
	while m != None:
		db_status = Status.objects.get(name=m.group(3))
		db_type, created = TestType.objects.get_or_create(name=m.group(1))

		db_ftest = FunctionalTest(id=m.group(2), type=db_type, kversion=db_kversion, client=db_client, status=db_status, reason=m.group(4))
  		db_ftest.save()

		m = jp.search(func_test)
		
version = re.compile('<td>([a-zA-Z0-9.-]+)\s')
patch = re.compile('<br>\+<a href="\.\.\/(.+)">.+</a>\s') 
end_version = re.compile('</td>\s')
nonlink = re.compile('<td>&nbsp</td>\s')
link = re.compile('<td.+\s.+"(.+)">(\w+)</a>\s</td>\s')
func_test = re.compile('<td>(\w+)</td>\s<td><.+>(\d+)</a></td>\s<td><.+>(\w+)</a></td>\s<td><.+>(.+)</a></td>\s</tr>')

main = StringMatcher(geturl("http://test.kernel.org/functional/index.html"))

hosts = [ 'elm3b6', 'moe', 'elm3b132', 'elm3b133', 'gekko-lp1', 'pSeries-101', 'bl6-13', 'elm3b239' ]

while True:
	# get the base version and create a record in the DB
	m = main.search(version)
	if m == None:
		print "no more kernels found, exiting"
		break
 
	db_kversion = KernelVersion(base=m.group(1))
	db_kversion.save()

	# find the patches and add them to the kernel version (also in the DB)
	while True:
		m = main.match(patch)  
		if m != None:
			url = m.group(1) 
			db_kversion.add_patch_at_url(url)
		else:
			break 
 
	m = main.search(end_version)
	if m == None:
		print "unexpected error: no end_version found"
		break
  
	print "Fetching results for %s" % db_kversion.printable 
  
	# now get the actual results, page by page:
	# there are 8 machines, listed (in order) in the list 'hosts' above 
	for i in range(8):
		m = main.match(nonlink)
		if m == None:
			m = main.match(link)  
			parse_job_page(db_kversion,  hosts[i], m.group(1)) 
		

Tobias McNulty

Powered by ViewCVS 1.0-dev
(Powered by ViewCVS)

ViewCVS and CVS Help