-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathUltra_google_crawler.py
More file actions
94 lines (50 loc) · 2.16 KB
/
Copy pathUltra_google_crawler.py
File metadata and controls
94 lines (50 loc) · 2.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env python
# coding: utf-8
# In[12]:
from lib2to3.pgen2 import driver
from selenium import webdriver
import urllib.request
from time import sleep
# url 경로로 다운로드
def downImage(url, name):
dir = 'C:\\Users\\USER\\Sad\\'
urllib.request.urlretrieve(url, dir + name + '.jpg')
#search_term = '우는 사진'
#url= f"https://www.google.com/search?q={search_term}&tbm=isch&ved="
url = "https://www.google.com/search?q=crying+scene&tbm=isch&ved=2ahUKEwjz56aA9f3qAhUMfZQKHeM9AEQQ2-cCegQIABAA&oq=crying+scene&gs_lcp=CgNpbWcQAzIECAAQEzIECAAQEzIECAAQEzIECAAQEzIECAAQEzIECAAQEzIECAAQEzIECAAQEzIECAAQEzIGCAAQHhATOgIIADoFCAAQsQM6BAgAEB5QuUhY_mpgu2xoAnAAeACAAawBiAGSCpIBBDEzLjGYAQCgAQGqAQtnd3Mtd2l6LWltZ8ABAQ&sclient=img&ei=i2wnX7P2Noz60QTj-4CgBA&bih=792&biw=1519&rlz=1C1CAFC_enKR901KR901&hl=ko&hl=ko"
browser = webdriver.Chrome(r"C:\Users\USER\Desktop\ksa_ai\chromedriver.exe")
browser.get(url)
pic_name = browser.find_elements_by_class_name("rg_i")
flg = 0
img_url = {}
# url 경로 모음.
#scroller_with_selenium
last_height = browser.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
sleep(2)
browser.execute_script("window.scrollTo(0, document.body.scrollHeight-50);")
sleep(2)
# Calculate new scroll height and compare with last scroll height
new_height = browser.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
for idx, value in enumerate(browser.find_elements_by_class_name("rg_i.Q4LuWd")):
print(idx, "번째")
# value.screenshot(search_term + "_" + str(idx) + ".png")
if value.get_attribute("data-src") == None:
img_url[idx] = value.get_attribute("src")
else:
img_url[idx] = value.get_attribute("data-src")
# print(value.get_attribute("data-src"))
# screen_data[idx] = value.get_attribute("data-src")
print(img_url[idx])
downImage(img_url[idx], 'sad08' + str(idx))
#image 갯수
if flg == 1000:
break
flg += 1
# In[ ]: