from datetime import datetime, timedelta
from airflow.utils.dates import days_ago
from airflow import DAG
from airflow.operators.python import PythonOperator
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
default_args = {
'owner': 'Jasmine Qian',
'start_date': days_ago(0),
'email': ['jaxxxxx@xxx.com],
'retries': 0,
'retry_delay': timedelta(minutes=2),
}
dag = DAG(
'Python_selenium',
default_args=default_args,
tags=['python', 'selenium'],
start_date=datetime(2021, 1, 1),
catchup=False,
)
def login():
url = "http://www.360doc.com/content/19/0217/09/33525635_815480537.shtml"
print(url)
chrome_options = Options()
chrome_options.add_argument("--no-sandbox") # linux only
chrome_options.add_argument("--headless")
# chrome_options.headless = True # also works
driver = webdriver.Chrome(options=chrome_options)
actual_url = driver.get(url)
print(actual_url)
bodyContent = driver.find_element_by_tag_name('body').text
print(bodyContent)
driver.close()
print("Succeed@@")
def connet_google():
url = "http://www.google.com"
print(url)
chrome_options = Options()
chrome_options.add_argument("--no-sandbox") # linux only
chrome_options.add_argument("--headless")
# chrome_options.headless = True # also works
driver = webdriver.Chrome(options=chrome_options)
bodyContent = driver.find_element_by_tag_name('body').text
print(bodyContent)
driver.close()
print("Succeed@@")
user_login = PythonOperator(
task_id='login',
python_callable=login,
dag=dag,
)
connet_google = PythonOperator(
task_id='google',
python_callable=connet_google,
dag=dag,
)
user_login >> connet_google
if __name__ == "__main__":
dag.cli()
Original: https://www.cnblogs.com/qianjinyan/p/15534088.html
Author: 巴黎爱工作
Title: airflow sample to start google chrome broswer
原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/553066/
转载文章受原作者版权保护。转载请注明原作者出处!