Changes

Jump to navigation Jump to search
no edit summary
[http://www.chambredesrepresentants.ma/ar/%D8%A7%D9%84%D8%AA%D8%B4%D8%B1%D9%8A%D8%B9/%D9%85%D8%B4%D8%A7%D8%B1%D9%8A%D8%B9-%D8%A7%D9%84%D9%82%D9%88%D8%A7%D9%86%D9%8A%D9%86 Monarchy Proposed Bills]
 
The data that needs to be extracted from this site includes the pdfs of all the bill pages, as well as any interior pdfs on each page. The bill pages should be named by their url, and the interior pdfs should be named by their respective bill numbers.
===Moroccan House of Representatives Proposed Bills===
[http://www.chambredesrepresentants.ma/ar/%D8%A7%D9%84%D8%AA%D8%B4%D8%B1%D9%8A%D8%B9/%D9%84%D8%A7%D8%A6%D8%AD%D8%A9-%D9%85%D9%82%D8%AA%D8%B1%D8%AD%D8%A7%D8%AA-%D8%A7%D9%84%D9%82%D9%88%D8%A7%D9%86%D9%8A%D9%86 House Proposed Bills]
 
See Monarchy proposed bills for instructions.
===Moroccan Legislature Ratified Bills===
[http://www.chambredesrepresentants.ma/ar/%D8%A7%D9%84%D8%AA%D8%B4%D8%B1%D9%8A%D8%B9/%D8%A7%D9%84%D9%86%D8%B5%D9%88%D8%B5-%D8%A7%D9%84%D8%AA%D9%8A-%D8%B5%D8%A7%D8%AF%D9%82-%D8%B9%D9%84%D9%8A%D9%87%D8%A7-%D9%85%D8%AC%D9%84%D8%B3-%D8%A7%D9%84%D9%86%D9%88%D8%A7%D8%A8?field_legislature_tid=All&field_nature_loi_tid=All&page=27 Ratified Bills]
 
See Monarchy proposed bills for instructions.
===Moroccan Legislature Oral Questions===
[http://www.chambredesrepresentants.ma/ar/%D8%A7%D9%84%D8%A3%D8%B3%D9%80%D8%A6%D9%84%D8%A9-%D8%A7%D9%84%D9%83%D8%AA%D8%A7%D8%A8%D9%8A%D8%A9 Written Questions]
 
 
===Example Code===
 
#General Bill Download
 
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
import urllib
import string
import re
 
#launch Google Chrome Browser
driver = webdriver.Chrome()
 
def switch_window():
handles = driver.window_handles
driver.switch_to_window(handles[-1])
 
 
 
#Visit desired website
driver.get('http://www.chambredesrepresentants.ma/ar/%D8%A7%D9%84%D8%AA%D8%B4%D8%B1%D9%8A%D8%B9/%D9%84%D8%A7%D8%A6%D8%AD%D8%A9- %D9%85%D9%82%D8%AA%D8%B1%D8%AD%D8%A7%D8%AA-%D8%A7%D9%84%D9%82%D9%88%D8%A7%D9%86%D9%8A%D9%86?body_value=&field_og_commission_target_id=All')
 
bills_list = driver.find_elements_by_xpath("//li/h3/a")
for i in range(len(bills_list)):
ActionChains(driver).key_down(Keys.SHIFT).perform()
bills_list[i].click()
ActionChains(driver).key_up(Keys.SHIFT).perform()
 
switch_window()
url = driver.current_url
unicode_url = urllib.unquote(str(url)).decode('utf8')
url_parts = string.split(unicode_url, "/")
i = len(url_parts)
 
#Build arabic tag backwards, accounting for backwards spelling
tag = ""
while i > 4:
tag += url_parts[i - 1]
i -= 1
#Navigate to pdf of website
change_button = driver.find_elements_by_xpath("//a [@class='pdf' and @rel='nofollow']")[0]
ActionChains(driver).key_down(Keys.SHIFT).perform()
change_button.click()
ActionChains(driver).key_up(Keys.SHIFT).perform()
 
switch_window()
 
#Gets current window's URL
url = driver.current_url
 
#Saves file at URL to current directory
urllib.urlretrieve(url, tag)
 
driver.close()
 
switch_window()
pdfs_on_page = driver.find_elements_by_xpath("//div/div/div/article/div/ul/li/a")
 
#finds interior pdfs on the page
if pdfs_on_page:
for j in range(len(pdfs_on_page)):
element = pdfs_on_page[j]
 
#click on pdf
ActionChains(driver).key_down(Keys.SHIFT).perform()
element.click()
ActionChains(driver).key_up(Keys.SHIFT).perform()
 
switch_window()
 
url = driver.current_url
pdf_tag = string.split(str(url), "/")[-1]
 
#leaves link if it is not a pdf
if re.findall(".pdf", pdf_tag):
 
#saves interior pdf
urllib.urlretrieve(url, pdf_tag)
driver.close()
 
switch_window()
driver.close()
 
switch_window()
 
print "download complete"
 
#close browser
driver.quit()
===Further Inquiries===

Navigation menu