python html 활용하기

python으로 html 타입을 활용하는 방법을 공유하려 합니다.

해당 코드를 실습할수 있는 데이터는

캐글 데이터 페이지를 통해서 다운로드 부탁드리겠습니다.

import warnings
warnings.filterwarnings(action='ignore')

import numpy as np

import pandas as pd
from pandas import DataFrame

import os

data = pd.read_csv("2019_kbo_for_kaggle_v2.csv")
print(data.shape)
data.head()

(1913, 37)

	batter_name	age	G	PA	AB	R	H	2B	HR	...	tp	1B	FBP	avg	OBP	SLG	OPS	p_year	YAB	YOPS
0	백용환	24.0	26.0	58.0	52.0	4.0	9.0	4.0	0.0	...	포수	5.0	6.0	0.173	0.259	0.250	0.509	2014	79.0	0.580
1	백용환	25.0	47.0	86.0	79.0	8.0	14.0	2.0	4.0	...	포수	8.0	5.0	0.177	0.226	0.354	0.580	2015	154.0	0.784
2	백용환	26.0	65.0	177.0	154.0	22.0	36.0	6.0	10.0	...	포수	20.0	20.0	0.234	0.316	0.468	0.784	2016	174.0	0.581
3	백용환	27.0	80.0	199.0	174.0	12.0	34.0	7.0	4.0	...	포수	23.0	20.0	0.195	0.276	0.305	0.581	2017	17.0	0.476
4	백용환	28.0	15.0	20.0	17.0	2.0	3.0	0.0	0.0	...	포수	3.0	3.0	0.176	0.300	0.176	0.476	2018	47.0	0.691

5 rows × 37 columns

table을 엑셀로 저장하는 경우가 일반적이지만,

필요에 따라 html로 바꿔서 저장해야되는 경우가 있습니다.

아래는 테이블을 html로 변환해주는 함수입니다.

from IPython.display import HTML

def getTableHTML(df):

    styles = [
        # table properties
        dict(selector=" ",
             props=[("margin", "0"),
                    ("font-family", '"Helvetica", "Arial", sans-serif'),
                    ("border-collapse", "collapse"),
                    ("border", "none"),
                    ]),

        # background shading
        dict(selector="tbody tr:nth-child(even)",
             props=[("background-color", "#fff")]),
        dict(selector="tbody tr:nth-child(odd)",
             props=[("background-color", "#eee")]),

        # cell spacing
        dict(selector="td",
             props=[("padding", ".5em")]),

        # header cell properties
        dict(selector="th",
             props=[("font-size", "100%"),
                    ("text-align", "center")]),
    ]
    return (df.style.set_table_styles(styles)).render()

html 생성을 위해서 테스트용 폴더를 생성하고 이를 경로로 지정하였습니다.

from pathlib import Path

test_path = os.getcwd() + '/test' #테스트 폴더 생성용 경로

Path(test_path).mkdir(parents=True, exist_ok=True) #테스트 폴더 생성(기존에 있으면 자동으로 패스됨)

os.chdir(test_path) #새로운 테스트 폴더로 이동

테이블에서 html을 변환하는 것을 테스트하기 위해

head, tail을 통해서 앞부분과 뒷 부분만 추출 하였습니다.

html_file = open('html_file_front.html', 'w')
html_file.write(getTableHTML(data.head(2)))
html_file.close()

html_file = open('html_file_tail.html', 'w')
html_file.write(getTableHTML(data.tail(2)))
html_file.close()

absolute

다음으로는 이러한 html 파일들을 하나로 모으는 함수 입니다.

빈 html 파일을 만들고 그 안에 현재 모아야되는 html 파일들이 있는 경로,

새로운 파일 명을 넣으면, merge라는 하위 폴더가 만들어지고

자동으로 해당 파일 명으로 html이 모아집니다.

def multiple_html_to_one(contain_path, file_name):
    empty_html = '<html><head></head><body></body></html>' #빈 html

    contain_path = contain_path + '/' #현재 경로 설정용

    for file in os.listdir(contain_path):
        if file.endswith(".html"):
            print(file)
            with open(contain_path + file, 'r') as f:
                html = f.read()
                empty_html = empty_html.replace(
                    '</body></html>', html + '<br></br><br></br>' + '</body></html>') #<br></br> 2번으로 공백 2번 생성

    #새로 merge되는 하위 폴더 생성
    merge_path = contain_path + '/merge'
    Path(merge_path).mkdir(parents=True, exist_ok=True)
    merge_file = merge_path + '/' + file_name

    #merge된 html을 생성
    with open(merge_file, 'w') as f:
        f.write(empty_html)

해당 함수를 실행시키면,

현재 경로에 있는 html 파일들 목록을 print 해주게 됩니다.

multiple_html_to_one(os.getcwd(), 'merged.html')

html_file_front.html
html_file_tail.html

merge 파일이 생성된 것을 확인하실 수 있습니다.

absolute

설정한 파일 이름대로 나온 것을 볼 수 있습니다.

absolute

마지막으로 두 html 파일이 합쳐진 예시입니다.

중간 사이의 공백은 ‘
</br>
</br>‘을 통해 만든 것입니다.

absolute

python으로 html 타입을 활용하는 방법을 공유하려 합니다.

Share this post