윤영준 윤영준 2023-11-10
Hello Yona
@97aed7ce9ef7f8c7be9463740249acc96e02dfdc
 
customer_list.csv (added)
+++ customer_list.csv
@@ -0,0 +1,115 @@
+0
+(주)한효라이프 태원산업지점
+대동산업(주)
+미래자원(주)
+자연산업(주)
+포항산림조합
+피그넷엔에스(주)
+(주)정금산업
+(주)제이케이코리아
+(주)남양견직
+창범텍스
+태해영
+(주)에스오에프앤씨
+우원산업
+삼강엠앤티 밀양 1공장
+금오섬유
+삼원팩(주)_0227117278
+삼원팩(주)_0227117287
+삼우연사
+대진금속열처리
+현대PIPE
+부창텍스타일
+(주)미래생명자원_0231237783(1공장)
+(주)미래생명자원_0245145329(2공장)
+합천축산업협동조합사료공장
+휴윈스테크놀로_1116144894
+휴윈스테크놀로지_1125459698
+케이에이치
+보성 G&P
+주식회사 광일기공_0333397319
+주식회사 광일기공_0331358276
+주식회사 세찬
+우진냉장
+㈜재인써키트_1129409511
+㈜재인써키트_1129409496
+영천축산업협동조합
+주식회사 나람
+(주)성윤
+(주)윤성알엠씨
+부성스틸(주)
+주식회사 부국사료
+휴윈스테크놀로_모-자
+뉴텍컴파운드
+서울바이오 음성공장
+주식회사 누리
+세우산업
+(주)베스텍
+주식회사 청농
+주은폴리머
+창원열처리
+다인MHT
+명광
+부농 경주 1공장_0526086822
+부농 경주 2공장_0526086948
+(주)엠제이씨엔씨
+동양산업 1공장_0526191977
+동양산업 2공장_0533523197
+한일씨앤에스_0526223684
+한일씨앤에스_0526223719
+현대케미칼산업
+화신플라텍
+태광에이텍주식회사
+태승화학
+수성산업
+영남산업
+백두물산
+신흥섬유
+대경텍스
+가나산업
+가나인터피아
+홍방섬유
+상인수지
+원동
+영빈산업
+삼천리산업
+명진자원환경
+(주)우진산업
+선프라텍
+대선산업
+(주)영림인쇄
+해드림
+(주)원창금속
+인천레미콘
+주식회사 대덕
+대성폴리머
+월드이엔씨
+대영수지
+제이엘플라텍
+(주)디에스아이
+(주)보광리사이클_0535594141
+(주)보광리사이클_0532976093
+영진철강(주)
+인성수지
+일신수지
+진강자원
+국제수지
+고향수지
+(주)민진
+(주)민진(모-자)
+형제자원
+대원수지
+아시아종합열처리
+인천란수피(주)
+성산연사
+화인피엔지
+경북톱밥
+다엘
+(주)유원포리머
+반석산업
+진동종합수지
+복산
+(주)대상포징
+(주)남부
+달구벌산업(주)
+(주)에스엠켐텍
 
date_string.csv (added)
+++ date_string.csv
@@ -0,0 +1,47 @@
+시작일,종료일
+2020-01-01,2020-01-31
+2020-02-01,2020-02-29
+2020-03-01,2020-03-31
+2020-04-01,2020-04-30
+2020-05-01,2020-05-31
+2020-06-01,2020-06-30
+2020-07-01,2020-07-31
+2020-08-01,2020-08-31
+2020-09-01,2020-09-30
+2020-10-01,2020-10-31
+2020-11-01,2020-11-30
+2020-12-01,2020-12-31
+2021-01-01,2021-01-31
+2021-02-01,2021-02-28
+2021-03-01,2021-03-31
+2021-04-01,2021-04-30
+2021-05-01,2021-05-31
+2021-06-01,2021-06-30
+2021-07-01,2021-07-31
+2021-08-01,2021-08-31
+2021-09-01,2021-09-30
+2021-10-01,2021-10-31
+2021-11-01,2021-11-30
+2021-12-01,2021-12-31
+2022-01-01,2022-01-31
+2022-02-01,2022-02-28
+2022-03-01,2022-03-31
+2022-04-01,2022-04-30
+2022-05-01,2022-05-31
+2022-06-01,2022-06-30
+2022-07-01,2022-07-31
+2022-08-01,2022-08-31
+2022-09-01,2022-09-30
+2022-10-01,2022-10-31
+2022-11-01,2022-11-30
+2022-12-01,2022-12-31
+2023-01-01,2023-01-31
+2023-02-01,2023-02-28
+2023-03-01,2023-03-31
+2023-04-01,2023-04-30
+2023-05-01,2023-05-31
+2023-06-01,2023-06-30
+2023-07-01,2023-07-31
+2023-08-01,2023-08-31
+2023-09-01,2023-09-30
+2023-10-01,2023-10-31
 
file_download_massive.py (added)
+++ file_download_massive.py
@@ -0,0 +1,112 @@
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.chrome.service import Service
+from webdriver_manager.chrome import ChromeDriverManager
+from selenium.webdriver.support.ui import Select
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.chrome.service import service
+
+import pandas as pd
+import time
+
+
+
+def download_files(company_name, time_interval, start_date, end_date, base_dir):
+    options = Options()
+
+    download_setting = {
+        "download.default_directory" : f"{base_dir}{time_interval}/{company_name}",
+        "download.prompt_for_download" : False,
+        "download.directory_upgrade": True,
+    }
+    options.add_experimental_option("prefs", download_setting)
+
+    options.add_argument('--disable-dev-shm-usage')
+
+    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
+
+    driver.get("https://admin.wls.eims.co.kr/WattageState/Report")
+
+    input_id = driver.find_element(By.CLASS_NAME, 'login-userid')
+    input_password = driver.find_element(By.CLASS_NAME, 'login-password')
+    login_button = driver.find_element(By.CSS_SELECTOR, "div.col-sm-7.text-right button")
+
+
+    input_id.clear()
+    input_password.clear()
+
+    input_id.send_keys("wl00002")
+    input_password.send_keys("2666##")
+
+    login_button.click()
+
+    did_it_logged_in = EC.presence_of_element_located((By.ID, 'data-select2'))
+
+    time.sleep(2)
+    driver.get("https://admin.wls.eims.co.kr/WattageState/Report")
+    time.sleep(2)
+
+    # ---------- gather the data from the website, by company, time_interval, and date
+
+    starting_date_input_box = driver.find_element(By.NAME, "from")
+    end_date_input_box = driver.find_element(By.NAME, "to")
+
+    starting_date_input_box.clear()
+    end_date_input_box.clear()
+
+    starting_date_input_box.send_keys(start_date)
+    end_date_input_box.send_keys(end_date)
+
+
+
+    drop_down_customer = driver.find_element(By.XPATH, '//select[@data-placeholder="참여고객명"]')
+    # customer_list = drop_down_customer.text.split("\n")[1:]
+
+
+    drop_down_time_interval = driver.find_element(By.XPATH, '//select[@data-placeholder="검침기종류"]')
+    # time_interval_list = drop_down_time_interval.text.split("\n")[1:3]
+
+
+    select = Select(drop_down_customer)
+
+    select.select_by_visible_text(company_name)
+
+
+    select = Select(drop_down_time_interval)
+
+    select.select_by_visible_text(time_interval)
+
+    the_red_button = driver.find_element(By.XPATH, '//button[@ng-click="ctrl.getContractWattageStatistics(ctrl.req)"]')
+    the_red_button.click()
+    the_download_button = driver.find_element(By.XPATH, '//a[@class="btn btn-default btn-outline pull-right btn-sm"]')
+
+    wait = WebDriverWait(driver, 10)
+    href = wait.until(
+        lambda driver: the_download_button.get_attribute('href')
+    )
+    the_download_button.click()
+
+    time.sleep(10)
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-t", "--time_interval", type=int)
+    args = parser.parse_args()
+
+    time_interval_index = args.time_interval
+
+    df_c = pd.read_csv("customer_list.csv")
+    df_t = pd.read_csv("time_interval_list.csv")
+    t = df_t.iloc[time_interval_index].values
+    t = t[0].replace(' ', '')
+    df_dates = pd.read_csv("date_string.csv")
+    base_dir = "/media/juni/T7 Shield/웰라인데이터/"
+    # for t in df_t.values:
+    for c in df_c.values:
+        for dates in df_dates.values:
+            download_files(c[0], t, dates[0], dates[1], base_dir)
+
 
folder_creator.py (added)
+++ folder_creator.py
@@ -0,0 +1,10 @@
+import os
+import pandas as pd
+
+base_dir = "/media/juni/T7 Shield/웰라인데이터/"
+df_c = pd.read_csv("customer_list.csv")
+df_t = pd.read_csv("time_interval_list")
+
+for t in df_t.values:
+    for c in df_c.values:
+        os.makedirs(f"{base_dir}{t[1].replace(' ', '')}/{c[0]}")(No newline at end of file)
 
main.py (added)
+++ main.py
@@ -0,0 +1,113 @@
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.chrome.service import Service
+from webdriver_manager.chrome import ChromeDriverManager
+from selenium.webdriver.support.ui import Select
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+
+import pandas as pd
+import time
+import urllib.request
+import requests
+from fake_useragent import UserAgent
+
+options = Options()
+# options.add_argument('--headless')
+# options.add_argument('--no-sandbox')
+options.add_argument('--disable-dev-shm-usage')
+driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
+
+driver.get("https://admin.wls.eims.co.kr/WattageState/Report")
+
+input_id = driver.find_element(By.CLASS_NAME, 'login-userid')
+input_password = driver.find_element(By.CLASS_NAME, 'login-password')
+login_button = driver.find_element(By.CSS_SELECTOR, "div.col-sm-7.text-right button")
+
+
+input_id.clear()
+input_password.clear()
+
+input_id.send_keys("wl00002")
+input_password.send_keys("2666##")
+
+login_button.click()
+
+did_it_logged_in = EC.presence_of_element_located((By.ID, 'data-select2'))
+
+time.sleep(2)
+driver.get("https://admin.wls.eims.co.kr/WattageState/Report")
+time.sleep(2)
+
+# ---------- gather the data from the website, by company, time_interval, and date
+
+starting_date_input_box = driver.find_element(By.NAME, "from")
+end_date_input_box = driver.find_element(By.NAME, "to")
+
+starting_date_input_box.clear()
+end_date_input_box.clear()
+
+starting_date_input_box.send_keys("2023-01-01")
+end_date_input_box.send_keys("2023-01-31")
+
+
+
+drop_down_customer = driver.find_element(By.XPATH, '//select[@data-placeholder="참여고객명"]')
+customer_list = drop_down_customer.text.split("\n")[1:]
+
+
+drop_down_time_interval = driver.find_element(By.XPATH, '//select[@data-placeholder="검침기종류"]')
+time_interval_list = drop_down_time_interval.text.split("\n")[1:3]
+
+
+select = Select(drop_down_customer)
+
+select.select_by_visible_text(customer_list[5])
+
+df = pd.DataFrame(customer_list)
+df.to_csv("customer_list.csv", index=False)
+df = pd.DataFrame(time_interval_list)
+df.to_csv("time_interval_list.csv", index=False)
+
+select = Select(drop_down_time_interval)
+
+select.select_by_visible_text(time_interval_list[0].replace(' ',''))
+
+the_red_button = driver.find_element(By.XPATH, '//button[@ng-click="ctrl.getContractWattageStatistics(ctrl.req)"]')
+the_red_button.click()
+the_download_button = driver.find_element(By.XPATH, '//a[@class="btn btn-default btn-outline pull-right btn-sm"]')
+
+# time.sleep(3)
+wait = WebDriverWait(driver, 10)
+href = wait.until(
+    lambda driver: the_download_button.get_attribute('href')
+)
+the_download_button.click()
+
+time.sleep(10)
+
+# this is an attemp to download files with requests, but did not worked...
+# print(href)
+# # time.sleep(10)
+#
+# # urllib.request.urlretrieve(href, "test/test.xlsx")
+#
+# USER_AGENT = UserAgent()
+# # fake_browser = USER_AGENT.firefox
+# header = {
+#     'User-Agent': USER_AGENT.firefox
+# }
+#
+# with open("test/test.xlsx", "wb") as file:
+#     r = requests.get(href, headers=header)
+#     file.write(r.content)
+#
+# time.sleep(10000)
+
+# WebDriverWait(driver, 10).until(lambda d: d.find_element(By.CSS_SELECTOR, "div.span.span.select2-selection select2-selection--single"))
+
+
+
+print(driver.title)
+# driver.close()(No newline at end of file)
 
time_interval_list (added)
+++ time_interval_list
@@ -0,0 +1,3 @@
+,0
+0,15분검침기
+1,5분검침기
 
time_interval_list.csv (added)
+++ time_interval_list.csv
@@ -0,0 +1,3 @@
+0
+                                15분검침기
+                                5분검침기
Add a comment
List