
+++ customer_list.csv
... | ... | @@ -0,0 +1,115 @@ |
1 | +0 | |
2 | +(주)한효라이프 태원산업지점 | |
3 | +대동산업(주) | |
4 | +미래자원(주) | |
5 | +자연산업(주) | |
6 | +포항산림조합 | |
7 | +피그넷엔에스(주) | |
8 | +(주)정금산업 | |
9 | +(주)제이케이코리아 | |
10 | +(주)남양견직 | |
11 | +창범텍스 | |
12 | +태해영 | |
13 | +(주)에스오에프앤씨 | |
14 | +우원산업 | |
15 | +삼강엠앤티 밀양 1공장 | |
16 | +금오섬유 | |
17 | +삼원팩(주)_0227117278 | |
18 | +삼원팩(주)_0227117287 | |
19 | +삼우연사 | |
20 | +대진금속열처리 | |
21 | +현대PIPE | |
22 | +부창텍스타일 | |
23 | +(주)미래생명자원_0231237783(1공장) | |
24 | +(주)미래생명자원_0245145329(2공장) | |
25 | +합천축산업협동조합사료공장 | |
26 | +휴윈스테크놀로_1116144894 | |
27 | +휴윈스테크놀로지_1125459698 | |
28 | +케이에이치 | |
29 | +보성 G&P | |
30 | +주식회사 광일기공_0333397319 | |
31 | +주식회사 광일기공_0331358276 | |
32 | +주식회사 세찬 | |
33 | +우진냉장 | |
34 | +㈜재인써키트_1129409511 | |
35 | +㈜재인써키트_1129409496 | |
36 | +영천축산업협동조합 | |
37 | +주식회사 나람 | |
38 | +(주)성윤 | |
39 | +(주)윤성알엠씨 | |
40 | +부성스틸(주) | |
41 | +주식회사 부국사료 | |
42 | +휴윈스테크놀로_모-자 | |
43 | +뉴텍컴파운드 | |
44 | +서울바이오 음성공장 | |
45 | +주식회사 누리 | |
46 | +세우산업 | |
47 | +(주)베스텍 | |
48 | +주식회사 청농 | |
49 | +주은폴리머 | |
50 | +창원열처리 | |
51 | +다인MHT | |
52 | +명광 | |
53 | +부농 경주 1공장_0526086822 | |
54 | +부농 경주 2공장_0526086948 | |
55 | +(주)엠제이씨엔씨 | |
56 | +동양산업 1공장_0526191977 | |
57 | +동양산업 2공장_0533523197 | |
58 | +한일씨앤에스_0526223684 | |
59 | +한일씨앤에스_0526223719 | |
60 | +현대케미칼산업 | |
61 | +화신플라텍 | |
62 | +태광에이텍주식회사 | |
63 | +태승화학 | |
64 | +수성산업 | |
65 | +영남산업 | |
66 | +백두물산 | |
67 | +신흥섬유 | |
68 | +대경텍스 | |
69 | +가나산업 | |
70 | +가나인터피아 | |
71 | +홍방섬유 | |
72 | +상인수지 | |
73 | +원동 | |
74 | +영빈산업 | |
75 | +삼천리산업 | |
76 | +명진자원환경 | |
77 | +(주)우진산업 | |
78 | +선프라텍 | |
79 | +대선산업 | |
80 | +(주)영림인쇄 | |
81 | +해드림 | |
82 | +(주)원창금속 | |
83 | +인천레미콘 | |
84 | +주식회사 대덕 | |
85 | +대성폴리머 | |
86 | +월드이엔씨 | |
87 | +대영수지 | |
88 | +제이엘플라텍 | |
89 | +(주)디에스아이 | |
90 | +(주)보광리사이클_0535594141 | |
91 | +(주)보광리사이클_0532976093 | |
92 | +영진철강(주) | |
93 | +인성수지 | |
94 | +일신수지 | |
95 | +진강자원 | |
96 | +국제수지 | |
97 | +고향수지 | |
98 | +(주)민진 | |
99 | +(주)민진(모-자) | |
100 | +형제자원 | |
101 | +대원수지 | |
102 | +아시아종합열처리 | |
103 | +인천란수피(주) | |
104 | +성산연사 | |
105 | +화인피엔지 | |
106 | +경북톱밥 | |
107 | +다엘 | |
108 | +(주)유원포리머 | |
109 | +반석산업 | |
110 | +진동종합수지 | |
111 | +복산 | |
112 | +(주)대상포징 | |
113 | +(주)남부 | |
114 | +달구벌산업(주) | |
115 | +(주)에스엠켐텍 |
+++ date_string.csv
... | ... | @@ -0,0 +1,47 @@ |
1 | +시작일,종료일 | |
2 | +2020-01-01,2020-01-31 | |
3 | +2020-02-01,2020-02-29 | |
4 | +2020-03-01,2020-03-31 | |
5 | +2020-04-01,2020-04-30 | |
6 | +2020-05-01,2020-05-31 | |
7 | +2020-06-01,2020-06-30 | |
8 | +2020-07-01,2020-07-31 | |
9 | +2020-08-01,2020-08-31 | |
10 | +2020-09-01,2020-09-30 | |
11 | +2020-10-01,2020-10-31 | |
12 | +2020-11-01,2020-11-30 | |
13 | +2020-12-01,2020-12-31 | |
14 | +2021-01-01,2021-01-31 | |
15 | +2021-02-01,2021-02-28 | |
16 | +2021-03-01,2021-03-31 | |
17 | +2021-04-01,2021-04-30 | |
18 | +2021-05-01,2021-05-31 | |
19 | +2021-06-01,2021-06-30 | |
20 | +2021-07-01,2021-07-31 | |
21 | +2021-08-01,2021-08-31 | |
22 | +2021-09-01,2021-09-30 | |
23 | +2021-10-01,2021-10-31 | |
24 | +2021-11-01,2021-11-30 | |
25 | +2021-12-01,2021-12-31 | |
26 | +2022-01-01,2022-01-31 | |
27 | +2022-02-01,2022-02-28 | |
28 | +2022-03-01,2022-03-31 | |
29 | +2022-04-01,2022-04-30 | |
30 | +2022-05-01,2022-05-31 | |
31 | +2022-06-01,2022-06-30 | |
32 | +2022-07-01,2022-07-31 | |
33 | +2022-08-01,2022-08-31 | |
34 | +2022-09-01,2022-09-30 | |
35 | +2022-10-01,2022-10-31 | |
36 | +2022-11-01,2022-11-30 | |
37 | +2022-12-01,2022-12-31 | |
38 | +2023-01-01,2023-01-31 | |
39 | +2023-02-01,2023-02-28 | |
40 | +2023-03-01,2023-03-31 | |
41 | +2023-04-01,2023-04-30 | |
42 | +2023-05-01,2023-05-31 | |
43 | +2023-06-01,2023-06-30 | |
44 | +2023-07-01,2023-07-31 | |
45 | +2023-08-01,2023-08-31 | |
46 | +2023-09-01,2023-09-30 | |
47 | +2023-10-01,2023-10-31 |
+++ file_download_massive.py
... | ... | @@ -0,0 +1,112 @@ |
1 | +from selenium import webdriver | |
2 | +from selenium.webdriver.common.by import By | |
3 | +from selenium.webdriver.chrome.options import Options | |
4 | +from selenium.webdriver.chrome.service import Service | |
5 | +from webdriver_manager.chrome import ChromeDriverManager | |
6 | +from selenium.webdriver.support.ui import Select | |
7 | +from selenium.webdriver.support.ui import WebDriverWait | |
8 | +from selenium.webdriver.support import expected_conditions as EC | |
9 | +from selenium.webdriver.chrome.service import service | |
10 | + | |
11 | +import pandas as pd | |
12 | +import time | |
13 | + | |
14 | + | |
15 | + | |
16 | +def download_files(company_name, time_interval, start_date, end_date, base_dir): | |
17 | + options = Options() | |
18 | + | |
19 | + download_setting = { | |
20 | + "download.default_directory" : f"{base_dir}{time_interval}/{company_name}", | |
21 | + "download.prompt_for_download" : False, | |
22 | + "download.directory_upgrade": True, | |
23 | + } | |
24 | + options.add_experimental_option("prefs", download_setting) | |
25 | + | |
26 | + options.add_argument('--disable-dev-shm-usage') | |
27 | + | |
28 | + driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) | |
29 | + | |
30 | + driver.get("https://admin.wls.eims.co.kr/WattageState/Report") | |
31 | + | |
32 | + input_id = driver.find_element(By.CLASS_NAME, 'login-userid') | |
33 | + input_password = driver.find_element(By.CLASS_NAME, 'login-password') | |
34 | + login_button = driver.find_element(By.CSS_SELECTOR, "div.col-sm-7.text-right button") | |
35 | + | |
36 | + | |
37 | + input_id.clear() | |
38 | + input_password.clear() | |
39 | + | |
40 | + input_id.send_keys("wl00002") | |
41 | + input_password.send_keys("2666##") | |
42 | + | |
43 | + login_button.click() | |
44 | + | |
45 | + did_it_logged_in = EC.presence_of_element_located((By.ID, 'data-select2')) | |
46 | + | |
47 | + time.sleep(2) | |
48 | + driver.get("https://admin.wls.eims.co.kr/WattageState/Report") | |
49 | + time.sleep(2) | |
50 | + | |
51 | + # ---------- gather the data from the website, by company, time_interval, and date | |
52 | + | |
53 | + starting_date_input_box = driver.find_element(By.NAME, "from") | |
54 | + end_date_input_box = driver.find_element(By.NAME, "to") | |
55 | + | |
56 | + starting_date_input_box.clear() | |
57 | + end_date_input_box.clear() | |
58 | + | |
59 | + starting_date_input_box.send_keys(start_date) | |
60 | + end_date_input_box.send_keys(end_date) | |
61 | + | |
62 | + | |
63 | + | |
64 | + drop_down_customer = driver.find_element(By.XPATH, '//select[@data-placeholder="참여고객명"]') | |
65 | + # customer_list = drop_down_customer.text.split("\n")[1:] | |
66 | + | |
67 | + | |
68 | + drop_down_time_interval = driver.find_element(By.XPATH, '//select[@data-placeholder="검침기종류"]') | |
69 | + # time_interval_list = drop_down_time_interval.text.split("\n")[1:3] | |
70 | + | |
71 | + | |
72 | + select = Select(drop_down_customer) | |
73 | + | |
74 | + select.select_by_visible_text(company_name) | |
75 | + | |
76 | + | |
77 | + select = Select(drop_down_time_interval) | |
78 | + | |
79 | + select.select_by_visible_text(time_interval) | |
80 | + | |
81 | + the_red_button = driver.find_element(By.XPATH, '//button[@ng-click="ctrl.getContractWattageStatistics(ctrl.req)"]') | |
82 | + the_red_button.click() | |
83 | + the_download_button = driver.find_element(By.XPATH, '//a[@class="btn btn-default btn-outline pull-right btn-sm"]') | |
84 | + | |
85 | + wait = WebDriverWait(driver, 10) | |
86 | + href = wait.until( | |
87 | + lambda driver: the_download_button.get_attribute('href') | |
88 | + ) | |
89 | + the_download_button.click() | |
90 | + | |
91 | + time.sleep(10) | |
92 | + | |
93 | + | |
94 | +if __name__ == "__main__": | |
95 | + import argparse | |
96 | + parser = argparse.ArgumentParser() | |
97 | + parser.add_argument("-t", "--time_interval", type=int) | |
98 | + args = parser.parse_args() | |
99 | + | |
100 | + time_interval_index = args.time_interval | |
101 | + | |
102 | + df_c = pd.read_csv("customer_list.csv") | |
103 | + df_t = pd.read_csv("time_interval_list.csv") | |
104 | + t = df_t.iloc[time_interval_index].values | |
105 | + t = t[0].replace(' ', '') | |
106 | + df_dates = pd.read_csv("date_string.csv") | |
107 | + base_dir = "/media/juni/T7 Shield/웰라인데이터/" | |
108 | + # for t in df_t.values: | |
109 | + for c in df_c.values: | |
110 | + for dates in df_dates.values: | |
111 | + download_files(c[0], t, dates[0], dates[1], base_dir) | |
112 | + |
+++ folder_creator.py
... | ... | @@ -0,0 +1,10 @@ |
1 | +import os | |
2 | +import pandas as pd | |
3 | + | |
4 | +base_dir = "/media/juni/T7 Shield/웰라인데이터/" | |
5 | +df_c = pd.read_csv("customer_list.csv") | |
6 | +df_t = pd.read_csv("time_interval_list") | |
7 | + | |
8 | +for t in df_t.values: | |
9 | + for c in df_c.values: | |
10 | + os.makedirs(f"{base_dir}{t[1].replace(' ', '')}/{c[0]}")(No newline at end of file) |
+++ main.py
... | ... | @@ -0,0 +1,113 @@ |
1 | +from selenium import webdriver | |
2 | +from selenium.webdriver.common.by import By | |
3 | +from selenium.webdriver.chrome.options import Options | |
4 | +from selenium.webdriver.chrome.service import Service | |
5 | +from webdriver_manager.chrome import ChromeDriverManager | |
6 | +from selenium.webdriver.support.ui import Select | |
7 | +from selenium.webdriver.support.ui import WebDriverWait | |
8 | +from selenium.webdriver.support import expected_conditions as EC | |
9 | + | |
10 | +import pandas as pd | |
11 | +import time | |
12 | +import urllib.request | |
13 | +import requests | |
14 | +from fake_useragent import UserAgent | |
15 | + | |
16 | +options = Options() | |
17 | +# options.add_argument('--headless') | |
18 | +# options.add_argument('--no-sandbox') | |
19 | +options.add_argument('--disable-dev-shm-usage') | |
20 | +driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) | |
21 | + | |
22 | +driver.get("https://admin.wls.eims.co.kr/WattageState/Report") | |
23 | + | |
24 | +input_id = driver.find_element(By.CLASS_NAME, 'login-userid') | |
25 | +input_password = driver.find_element(By.CLASS_NAME, 'login-password') | |
26 | +login_button = driver.find_element(By.CSS_SELECTOR, "div.col-sm-7.text-right button") | |
27 | + | |
28 | + | |
29 | +input_id.clear() | |
30 | +input_password.clear() | |
31 | + | |
32 | +input_id.send_keys("wl00002") | |
33 | +input_password.send_keys("2666##") | |
34 | + | |
35 | +login_button.click() | |
36 | + | |
37 | +did_it_logged_in = EC.presence_of_element_located((By.ID, 'data-select2')) | |
38 | + | |
39 | +time.sleep(2) | |
40 | +driver.get("https://admin.wls.eims.co.kr/WattageState/Report") | |
41 | +time.sleep(2) | |
42 | + | |
43 | +# ---------- gather the data from the website, by company, time_interval, and date | |
44 | + | |
45 | +starting_date_input_box = driver.find_element(By.NAME, "from") | |
46 | +end_date_input_box = driver.find_element(By.NAME, "to") | |
47 | + | |
48 | +starting_date_input_box.clear() | |
49 | +end_date_input_box.clear() | |
50 | + | |
51 | +starting_date_input_box.send_keys("2023-01-01") | |
52 | +end_date_input_box.send_keys("2023-01-31") | |
53 | + | |
54 | + | |
55 | + | |
56 | +drop_down_customer = driver.find_element(By.XPATH, '//select[@data-placeholder="참여고객명"]') | |
57 | +customer_list = drop_down_customer.text.split("\n")[1:] | |
58 | + | |
59 | + | |
60 | +drop_down_time_interval = driver.find_element(By.XPATH, '//select[@data-placeholder="검침기종류"]') | |
61 | +time_interval_list = drop_down_time_interval.text.split("\n")[1:3] | |
62 | + | |
63 | + | |
64 | +select = Select(drop_down_customer) | |
65 | + | |
66 | +select.select_by_visible_text(customer_list[5]) | |
67 | + | |
68 | +df = pd.DataFrame(customer_list) | |
69 | +df.to_csv("customer_list.csv", index=False) | |
70 | +df = pd.DataFrame(time_interval_list) | |
71 | +df.to_csv("time_interval_list.csv", index=False) | |
72 | + | |
73 | +select = Select(drop_down_time_interval) | |
74 | + | |
75 | +select.select_by_visible_text(time_interval_list[0].replace(' ','')) | |
76 | + | |
77 | +the_red_button = driver.find_element(By.XPATH, '//button[@ng-click="ctrl.getContractWattageStatistics(ctrl.req)"]') | |
78 | +the_red_button.click() | |
79 | +the_download_button = driver.find_element(By.XPATH, '//a[@class="btn btn-default btn-outline pull-right btn-sm"]') | |
80 | + | |
81 | +# time.sleep(3) | |
82 | +wait = WebDriverWait(driver, 10) | |
83 | +href = wait.until( | |
84 | + lambda driver: the_download_button.get_attribute('href') | |
85 | +) | |
86 | +the_download_button.click() | |
87 | + | |
88 | +time.sleep(10) | |
89 | + | |
90 | +# this is an attemp to download files with requests, but did not worked... | |
91 | +# print(href) | |
92 | +# # time.sleep(10) | |
93 | +# | |
94 | +# # urllib.request.urlretrieve(href, "test/test.xlsx") | |
95 | +# | |
96 | +# USER_AGENT = UserAgent() | |
97 | +# # fake_browser = USER_AGENT.firefox | |
98 | +# header = { | |
99 | +# 'User-Agent': USER_AGENT.firefox | |
100 | +# } | |
101 | +# | |
102 | +# with open("test/test.xlsx", "wb") as file: | |
103 | +# r = requests.get(href, headers=header) | |
104 | +# file.write(r.content) | |
105 | +# | |
106 | +# time.sleep(10000) | |
107 | + | |
108 | +# WebDriverWait(driver, 10).until(lambda d: d.find_element(By.CSS_SELECTOR, "div.span.span.select2-selection select2-selection--single")) | |
109 | + | |
110 | + | |
111 | + | |
112 | +print(driver.title) | |
113 | +# driver.close()(No newline at end of file) |
+++ time_interval_list
... | ... | @@ -0,0 +1,3 @@ |
1 | +,0 | |
2 | +0,15분검침기 | |
3 | +1,5분검침기 |
+++ time_interval_list.csv
... | ... | @@ -0,0 +1,3 @@ |
1 | +0 | |
2 | + 15분검침기 | |
3 | + 5분검침기 |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?