- BeautifulSoup - 파이썬 웹 크롤링 라이브러리
- BeautifulSoup 기본 사용
- Requests 기본 사용
- 네이버 뉴스 제목 가져오기
- 삼성전자 주식 일별시세 가져오기
- BBC 뉴스 검색 결과 가져오기
- Python Tutorial
- NumPy Tutorial
- Matplotlib Tutorial
- PyQt5 Tutorial
- BeautifulSoup Tutorial
- xlrd/xlwt Tutorial
- Pillow Tutorial
- Googletrans Tutorial
- PyWin32 Tutorial
- PyAutoGUI Tutorial
- Pyperclip Tutorial
- TensorFlow Tutorial
- Tips and Examples
Requests 기본 사용¶
html 소스 가져오기¶
Requests를 사용하면 아래와 같이 간단한 코드만으로 웹페이지의 html 소스를 가져올 수 있습니다.
import requests
r = requests.get('https://google.com')
html = r.text
print(html)
출력 결과는 아래와 같습니다.
<!doctype html><html itemscope="" itemtype="http://schema.org/WebPage" lang="ko"><head><meta content="text/html; charset=UTF-8" http-equiv="Content-Type"><meta content="/images/branding/googleg/1x/googleg_standard_color_128dp.png" itemprop="image"><title>Google</title><script nonce="yGvdma2v6wEPcfXJwWJzZQ==">(function(){window.google={kEI:'1WiRXcrlMq2mmAWrwoEw',kEXPI:'0,1353747,5662,730,224,510,1065,1216,1936,377,207,1017,54,1883,124,2,124,10,713,320,18,49,490,383,287,86,42,102,76,1131094,1197714,309,329244,1294,12383,4855,32691,15248,867,12163,5281,11240,364,3319,5505,2442,5942,1119,2,205,373,728,2431,1362,4323,4968,773,2256,4738,6,3117,6191,1719,1808,1478,7,491,2044,8909,5071,226,2016,38,622,298,873,1217,1710,1,4000,48,3016,2,628,3240,414,7652,803,2080,19,320,234,884,904,2125,1,368,2778,519,400,992,509,776,10,107,2687,967,48,553,11,14,1279,390,1824,200,37,286,5,1252,840,324,195,1472,48,158,662,2620,818,108,152,52,1137,2,2063,606,1839,184,546,49,1704,1945,748,210,8,97,113,44,1009,93,328,1284,16,84,417,2426,1639,105,503,473,133,1206,508,221,19,745,294,3094,133,773,1217,331,524,7,728,591,524,925,126,1227,374,316,418,2404,4,340,426,957,2994,167,465,114,293,12,26,50,613,44,8,1111,1655,2,55,1,480,165,5,197,2,470,588,497,312,47,167,506,112,497,61,284,1078,145,48,497,21,225,497,111,666,306,92,144,122,11,4,130,85,2,344,417,42,446,684,371,891,330,20,1183,5886574,1873,1155,5997624,2799688,4,249,1323,549,333,444,1,2,80,1,900,576,13,307,1,8,1,2,2132,1,1,1,1,1,414,1,748,200,726,3,7,4,559,1,4002,17,31,22,37,8,23965887',authuser:0,kscs:'c9c918f0_1WiRXcrlMq2mmAWrwoEw',kGL:'KR',kBL:'tf62'};google.sn='webhp';google.kHL='ko';google.jsfs='Ffpdje';})();(function(){google.lc=[];google.li=0;google.getEI=function(a){for(var b;a&&(!a.getAttribute||!(b=a.getAttribute("eid")));)a=a.parentNode;return b||google.kEI};google.getLEI=function(a){for(var b=null;a&&(!a.getAttribute||!(b=a.getAttribute("leid")));)a=a.parentNode;return b};google.https=function(){return"https:"==window.location.protocol};google.ml=function(){return null};google.time=function(){return(new Date).getTime()};google.log=function(a,b,e,c,g){if(a=google.logUrl(a,b,e,c,g)){b=new Image;var d=google.lc,f=google.li;d[f]=b;b.onerror=b.onload=b.onabort=function(){delete d[f]};google.vel&&google.vel.lu&&google.vel.lu(a);b.src=a;google.li=f+1}};google.logUrl=function(a,b,e,c,g){var d="",f=google.ls||"";e||-1!=b.search("&ei=")||(d="&ei="+google.getEI(c),-1==b.search("&lei=")&&(c=google.getLEI(c))&&(d+="&lei="+c));c="";!e&&google.cshid&&-1==b.search("&cshid=")&&"slh"!=a&&(c="&cshid="+google.cshid);a=e||"/"+(g||"gen_204")+"?atyp=i&ct="+a+"&cad="+b+d+f+"&zx="+google.time()+c;/^http:/i.test(a)&&google.https()&&(google.ml(Error("a"),!1,{src:a,glmm:1}),a="");return a};}).call(this);(function(){google.y={};google.x=function(a,b){if(a)var c=a.id;else{do c=Math.random();while(google.y[c])}google.y[c]=[a,b];return!1};google.lm=[];google.plm=function(a){google.lm.push.apply(google.lm,a)};google.lq=[];google.load=function(a,b,c){google.lq.push([[a],b,c])};google.loadAll=function(a,b){google.lq.push([a,b])};}).call(this);google.f={};var a=window.location,b=a.href.indexOf("#");if(0<=b){var c=a.href.substring(b+1);/(^|&)q=/.test(c)&&-1==c.indexOf("#")&&a.replace("/search?"+c.replace(/(^|&)fp=[^&]*/g,"")+"&cad=h")};</script><style>#gbar,#guser{font-size:13px;padding-top:1px !important;}#gbar{height:22px}#guser{padding-bottom:7px !important;text-align:right}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}@media all{.gb1{height:22px;margin-right:.5em;vertical-align:top}#gbar{float:left}}a.gb1,a.gb4{text-decoration:underline !important}a.gb1,a.gb4{color:#00c !important}.gbi .gb4{color:#dd8e27 !important}.gbf .gb4{color:#900 !important}
</style><style>body,td,a,p,.h{font-family:굴림,돋움,arial,sans-serif}.ko{font-size:9pt}body{margin:0;overflow-y:scroll}#gog{padding:3px 8px 0}td{line-height:.8em}.gac_m td{line-height:17px}form{margin-bottom:20px}.h{color:#36c}.q{color:#00c}.ts td{padding:0}.ts{border-collapse:collapse}em{font-weight:bold;font-style:normal}.lst{height:25px;width:496px}.gsfi,.lst{font:18px arial,sans-serif}.gsfs{font:17px arial,sans-serif}.ds{display:inline-box;display:inline-block;margin:3px 0 4px;margin-left:4px}input{font-family:inherit}a.gb1,a.gb2,a.gb3,a.gb4{color:#11c !important}body{background:#fff;color:black}a{color:#11c;text-decoration:none}a:hover,a:active{text-decoration:underline}.fl a{color:#36c}a:visited{color:#551a8b}a.gb1,a.gb4{text-decoration:underline}a.gb3:hover{text-decoration:none}#ghead a.gb2:hover{color:#fff !important}.sblc{padding-top:5px}.sblc a{display:block;margin:2px 0;margin-left:13px;font-size:11px}.lsbb{background:#eee;border:solid 1px;border-color:#ccc #999 #999 #ccc;height:30px}.lsbb{display:block}.ftl,#fll a{display:inline-block;margin:0 12px}.lsb{background:url(/images/nav_logo229.png) 0 -261px repeat-x;border:none;color:#000;cursor:pointer;height:30px;margin:0;outline:0;font:15px arial,sans-serif;vertical-align:top}.lsb:active{background:#ccc}.lst:focus{outline:none}.tiah{width:458px}</style><script nonce="yGvdma2v6wEPcfXJwWJzZQ=="></script></head><body bgcolor="#fff"><script nonce="yGvdma2v6wEPcfXJwWJzZQ==">(function(){var src='/images/nav_logo229.png';var iesg=false;document.body.onload = function(){window.n && window.n();if (document.images){new Image().src=src;}
if (!iesg){document.f&&document.f.q.focus();document.gbqf&&document.gbqf.q.focus();}
}
})();</script><div id="mngb"> <div id=gbar><nobr><b class=gb1>검색</b> <a class=gb1 href="https://www.google.co.kr/imghp?hl=ko&tab=wi">이미지</a> <a class=gb1 href="https://maps.google.co.kr/maps?hl=ko&tab=wl">지도</a> <a class=gb1 href="https://play.google.com/?hl=ko&tab=w8">Play</a> <a class=gb1 href="https://www.youtube.com/?gl=KR&tab=w1">YouTube</a> <a class=gb1 href="https://news.google.co.kr/nwshp?hl=ko&tab=wn">뉴스</a> <a class=gb1 href="https://mail.google.com/mail/?tab=wm">Gmail</a> <a class=gb1 href="https://drive.google.com/?tab=wo">드라이브</a> <a class=gb1 style="text-decoration:none" href="https://www.google.co.kr/intl/ko/about/products?tab=wh"><u>더보기</u> »</a></nobr></div><div id=guser width=100%><nobr><span id=gbn class=gbi></span><span id=gbf class=gbf></span><span id=gbe></span><a href="http://www.google.co.kr/history/optout?hl=ko" class=gb4>웹 기록</a> | <a href="/preferences?hl=ko" class=gb4>설정</a> | <a target=_top id=gb_70 href="https://accounts.google.com/ServiceLogin?hl=ko&passive=true&continue=https://www.google.com/" class=gb4>로그인</a></nobr></div><div class=gbh style=left:0></div><div class=gbh style=right:0></div> </div><center><br clear="all" id="lgpd"><div id="lga"><img alt="Google" height="92" src="/images/branding/googlelogo/1x/googlelogo_white_background_color_272x92dp.png" style="padding:28px 0 14px" width="272" id="hplogo"><br><br></div><form action="/search" name="f"><table cellpadding="0" cellspacing="0"><tr valign="top"><td width="25%"> </td><td align="center" nowrap=""><input name="ie" value="ISO-8859-1" type="hidden"><input value="ko" name="hl" type="hidden"><input name="source" type="hidden" value="hp"><input name="biw" type="hidden"><input name="bih" type="hidden"><div class="ds" style="height:32px;margin:4px 0"><div style="position:relative;zoom:1"><input style="color:#000;margin:0;padding:5px 8px 0 6px;vertical-align:top;padding-right:38px" autocomplete="off" class="lst tiah" value="" title="Google 검색" maxlength="2048" name="q" size="57"><img src="/textinputassistant/tia.png" style="position:absolute;cursor:pointer;right:5px;top:4px;z-index:300" data-script-url="/textinputassistant/11/ko_tia.js" id="tsuid1" alt="" height="23" width="27"><script nonce="yGvdma2v6wEPcfXJwWJzZQ==">(function(){var id='tsuid1';document.getElementById(id).onclick = function(){var s = document.createElement('script');s.src = this.getAttribute('data-script-url');(document.getElementById('xjsc')||document.body).appendChild(s);};})();</script></div></div><br style="line-height:0"><span class="ds"><span class="lsbb"><input class="lsb" value="Google 검색" name="btnG" type="submit"></span></span><span class="ds"><span class="lsbb"><input class="lsb" id="tsuid2" value="I’m Feeling Lucky" name="btnI" type="submit"><script nonce="yGvdma2v6wEPcfXJwWJzZQ==">(function(){var id='tsuid2';document.getElementById(id).onclick = function(){if (this.form.q.value){this.checked = 1;if (this.form.iflsig)this.form.iflsig.disabled = false;}
else top.location='/doodles/';};})();</script></span></span></td><td class="fl sblc" align="left" nowrap="" width="25%"><a href="/advanced_search?hl=ko&authuser=0">고급검색</a><a href="/language_tools?hl=ko&authuser=0">언어도구</a></td></tr></table><input id="gbv" name="gbv" type="hidden" value="1"><script nonce="yGvdma2v6wEPcfXJwWJzZQ==">(function(){var a,b="1";if(document&&document.getElementById)if("undefined"!=typeof XMLHttpRequest)b="2";else if("undefined"!=typeof ActiveXObject){var c,d,e=["MSXML2.XMLHTTP.6.0","MSXML2.XMLHTTP.3.0","MSXML2.XMLHTTP","Microsoft.XMLHTTP"];for(c=0;d=e[c++];)try{new ActiveXObject(d),b="2"}catch(h){}}a=b;if("2"==a&&-1==location.search.indexOf("&gbv=2")){var f=google.gbvu,g=document.getElementById("gbv");g&&(g.value=a);f&&window.setTimeout(function(){location.href=f},0)};}).call(this);</script></form><div id="gac_scont"></div><div style="font-size:83%;min-height:3.5em"><br></div><span id="footer"><div style="font-size:10pt"><div style="margin:19px auto;text-align:center" id="fll"><a href="/intl/ko/ads/">광고 프로그램</a><a href="http://www.google.co.kr/intl/ko/services/">비즈니스 솔루션</a><a href="/intl/ko/about.html">Google 정보</a><a href="https://www.google.com/setprefdomain?prefdom=KR&prev=https://www.google.co.kr/&sig=K_9FOnT3HjopOIVUsa_w4d4PtWYTU%3D">Google.co.kr</a></div></div><p style="color:#767676;font-size:8pt">© 2019 - <a href="/intl/ko/policies/privacy/">개인정보처리방침</a> - <a href="/intl/ko/policies/terms/">약관</a></p></span></center><script nonce="yGvdma2v6wEPcfXJwWJzZQ==">(function(){window.google.cdo={height:0,width:0};(function(){var a=window.innerWidth,b=window.innerHeight;if(!a||!b){var c=window.document,d="CSS1Compat"==c.compatMode?c.documentElement:c.body;a=d.clientWidth;b=d.clientHeight}a&&b&&(a!=google.cdo.width||b!=google.cdo.height)&&google.log("","","/client_204?&atyp=i&biw="+a+"&bih="+b+"&ei="+google.kEI);}).call(this);})();(function(){var u='/xjs/_/js/k\x3dxjs.hp.en.Rbvu6sL5jkw.O/m\x3dsb_he,d/am\x3dhgk2AQ/d\x3d1/rs\x3dACT90oHY26IKjRg5OVueCrM5IHCZPNfqwg';setTimeout(function(){var a=document.createElement("script");a.src=u;google.timers&&google.timers.load&&google.tick&&google.tick("load","xjsls");document.body.appendChild(a)},0);})();(function(){window.google.xjsu='/xjs/_/js/k\x3dxjs.hp.en.Rbvu6sL5jkw.O/m\x3dsb_he,d/am\x3dhgk2AQ/d\x3d1/rs\x3dACT90oHY26IKjRg5OVueCrM5IHCZPNfqwg';})();function _DumpException(e){throw e;}
function _F_installCss(c){}
(function(){google.spjs=false;google.snet=true;google.em=[];google.emw=false;})();(function(){var pmc='{\x22Qnk92g\x22:{},\x22RWGcrA\x22:{},\x22U5B21g\x22:{},\x22YFCs/g\x22:{},\x22YQeDTA\x22:{},\x22ZI/YVQ\x22:{},\x22d\x22:{},\x22mVopag\x22:{},\x22sb_he\x22:{\x22agen\x22:false,\x22cgen\x22:false,\x22client\x22:\x22heirloom-hp\x22,\x22dh\x22:true,\x22dhqt\x22:true,\x22ds\x22:\x22\x22,\x22ffql\x22:\x22ko\x22,\x22fl\x22:true,\x22host\x22:\x22google.com\x22,\x22isbh\x22:28,\x22jsonp\x22:true,\x22msgs\x22:{\x22cibl\x22:\x22검색어 지우기\x22,\x22dym\x22:\x22이것을 찾으셨나요?\x22,\x22lcky\x22:\x22I’m Feeling Lucky\x22,\x22lml\x22:\x22자세히 알아보기\x22,\x22oskt\x22:\x22입력 도구\x22,\x22psrc\x22:\x22검색어가 \\u003Ca href\x3d\\\x22/history\\\x22\\u003E웹 기록\\u003C/a\\u003E에서 삭제되었습니다.\x22,\x22psrl\x22:\x22삭제\x22,\x22sbit\x22:\x22이미지로 검색\x22,\x22srch\x22:\x22Google 검색\x22},\x22ovr\x22:{},\x22pq\x22:\x22\x22,\x22refpd\x22:true,\x22refspre\x22:true,\x22rfs\x22:[],\x22sbpl\x22:24,\x22sbpr\x22:24,\x22scd\x22:10,\x22sce\x22:5,\x22stok\x22:\x22j1NGbmD2awnqzsK22iDQWzYxsdI\x22,\x22uhde\x22:false}}';google.pmc=JSON.parse(pmc);})();</script> </body></html>
requests.get()에 의한 response에는 다양한 정보가 포함되어 있습니다.
웹페이지의 content를 유니코드 형태가 아니라 bytes 형태로 얻기 위해서는 r.text가 아닌 r.content를 사용할 수도 있습니다.
import requests
r = requests.get('https://google.com')
html = r.content
print(html)
출력 결과는 아래와 같습니다.
b'<!doctype html><html itemscope="" itemtype="http://schema.org/WebPage" lang="ko"><head><meta content="text/html; charset=UTF-8" http-equiv="Content-Type"><meta content="/images/branding/googleg/1x/googleg_standard_color_128dp.png" itemprop="image"><title>Google</title><script nonce="bK5MYRkw4MttuRWfIYyExA==">(function(){window.google={kEI:\'A2yRXaXmJdWIr7wP5pO38AU\',kEXPI:\'0,1353747,3033,2629,731,223,510,1065,1216,1936,377,207,1017,54,250,1757,2,124,10,713,319,19,49,490,670,126,94,1131180,1197757,266,329194,1344,12383,4855,32692,15247,867,12163,16521,363,3320,5505,2442,5942,1119,2,579,727,2432,1361,283,4042,4966,773,2253,4741,7,3111,4882,1314,1719,1808,1478,7,491,2044,8909,4212,2,857,226,2017,37,920,873,1217,1710,1,1264,2736,48,2607,315,724,3240,4192,3874,2883,21,317,1118,902,2128,1,370,2776,519,400,992,509,776,8,109,2687,887,80,48,553,11,16,1277,2212,202,323,5,1252,840,324,193,1474,49,156,663,2620,818,108,152,52,1137,2,2063,606,1839,184,595,1182,520,1947,448,299,316,113,44,1009,95,326,1284,16,84,417,2426,1639,608,473,1177,163,747,440,305,294,3094,133,773,1548,524,7,728,592,523,1051,1227,652,35,422,1149,1525,73,426,957,2994,167,465,407,12,26,50,613,44,10,935,163,11,260,338,535,30,486,2,62,319,161,165,5,197,3,262,91,116,495,73,24,493,312,47,167,468,30,8,112,497,61,282,1247,26,497,246,42,455,57,47,417,562,92,144,137,130,289,142,417,42,1501,548,344,836,208,5887062,3028,5997625,2799687,4,1572,549,333,444,1,2,80,1,900,583,6,307,1,8,1,2,2132,1,1,1,1,1,414,1,748,200,726,3,7,4,559,1,4002,19,29,10,39,18,23965887\',authuser:0,kscs:\'c9c918f0_A2yRXaXmJdWIr7wP5pO38AU\',kGL:\'KR\',kBL:\'tf62\'};google.sn=\'webhp\';google.kHL=\'ko\';google.jsfs=\'Ffpdje\';})();(function(){google.lc=[];google.li=0;google.getEI=function(a){for(var b;a&&(!a.getAttribute||!(b=a.getAttribute("eid")));)a=a.parentNode;return b||google.kEI};google.getLEI=function(a){for(var b=null;a&&(!a.getAttribute||!(b=a.getAttribute("leid")));)a=a.parentNode;return b};google.https=function(){return"https:"==window.location.protocol};google.ml=function(){return null};google.time=function(){return(new Date).getTime()};google.log=function(a,b,e,c,g){if(a=google.logUrl(a,b,e,c,g)){b=new Image;var d=google.lc,f=google.li;d[f]=b;b.onerror=b.onload=b.onabort=function(){delete d[f]};google.vel&&google.vel.lu&&google.vel.lu(a);b.src=a;google.li=f+1}};google.logUrl=function(a,b,e,c,g){var d="",f=google.ls||"";e||-1!=b.search("&ei=")||(d="&ei="+google.getEI(c),-1==b.search("&lei=")&&(c=google.getLEI(c))&&(d+="&lei="+c));c="";!e&&google.cshid&&-1==b.search("&cshid=")&&"slh"!=a&&(c="&cshid="+google.cshid);a=e||"/"+(g||"gen_204")+"?atyp=i&ct="+a+"&cad="+b+d+f+"&zx="+google.time()+c;/^http:/i.test(a)&&google.https()&&(google.ml(Error("a"),!1,{src:a,glmm:1}),a="");return a};}).call(this);(function(){google.y={};google.x=function(a,b){if(a)var c=a.id;else{do c=Math.random();while(google.y[c])}google.y[c]=[a,b];return!1};google.lm=[];google.plm=function(a){google.lm.push.apply(google.lm,a)};google.lq=[];google.load=function(a,b,c){google.lq.push([[a],b,c])};google.loadAll=function(a,b){google.lq.push([a,b])};}).call(this);google.f={};var a=window.location,b=a.href.indexOf("#");if(0<=b){var c=a.href.substring(b+1);/(^|&)q=/.test(c)&&-1==c.indexOf("#")&&a.replace("/search?"+c.replace(/(^|&)fp=[^&]*/g,"")+"&cad=h")};</script><style>#gbar,#guser{font-size:13px;padding-top:1px !important;}#gbar{height:22px}#guser{padding-bottom:7px !important;text-align:right}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}@media all{.gb1{height:22px;margin-right:.5em;vertical-align:top}#gbar{float:left}}a.gb1,a.gb4{text-decoration:underline !important}a.gb1,a.gb4{color:#00c !important}.gbi .gb4{color:#dd8e27 !important}.gbf .gb4{color:#900 !important}\n</style><style>body,td,a,p,.h{font-family:굴림,돋움,arial,sans-serif}.ko{font-size:9pt}body{margin:0;overflow-y:scroll}#gog{padding:3px 8px 0}td{line-height:.8em}.gac_m td{line-height:17px}form{margin-bottom:20px}.h{color:#36c}.q{color:#00c}.ts td{padding:0}.ts{border-collapse:collapse}em{font-weight:bold;font-style:normal}.lst{height:25px;width:496px}.gsfi,.lst{font:18px arial,sans-serif}.gsfs{font:17px arial,sans-serif}.ds{display:inline-box;display:inline-block;margin:3px 0 4px;margin-left:4px}input{font-family:inherit}a.gb1,a.gb2,a.gb3,a.gb4{color:#11c !important}body{background:#fff;color:black}a{color:#11c;text-decoration:none}a:hover,a:active{text-decoration:underline}.fl a{color:#36c}a:visited{color:#551a8b}a.gb1,a.gb4{text-decoration:underline}a.gb3:hover{text-decoration:none}#ghead a.gb2:hover{color:#fff !important}.sblc{padding-top:5px}.sblc a{display:block;margin:2px 0;margin-left:13px;font-size:11px}.lsbb{background:#eee;border:solid 1px;border-color:#ccc #999 #999 #ccc;height:30px}.lsbb{display:block}.ftl,#fll a{display:inline-block;margin:0 12px}.lsb{background:url(/images/nav_logo229.png) 0 -261px repeat-x;border:none;color:#000;cursor:pointer;height:30px;margin:0;outline:0;font:15px arial,sans-serif;vertical-align:top}.lsb:active{background:#ccc}.lst:focus{outline:none}.tiah{width:458px}</style><script nonce="bK5MYRkw4MttuRWfIYyExA=="></script></head><body bgcolor="#fff"><script nonce="bK5MYRkw4MttuRWfIYyExA==">(function(){var src=\'/images/nav_logo229.png\';var iesg=false;document.body.onload = function(){window.n && window.n();if (document.images){new Image().src=src;}\nif (!iesg){document.f&&document.f.q.focus();document.gbqf&&document.gbqf.q.focus();}\n}\n})();</script><div id="mngb"> <div id=gbar><nobr><b class=gb1>검색</b> <a class=gb1 href="https://www.google.co.kr/imghp?hl=ko&tab=wi">이미지</a> <a class=gb1 href="https://maps.google.co.kr/maps?hl=ko&tab=wl">지도</a> <a class=gb1 href="https://play.google.com/?hl=ko&tab=w8">Play</a> <a class=gb1 href="https://www.youtube.com/?gl=KR&tab=w1">YouTube</a> <a class=gb1 href="https://news.google.co.kr/nwshp?hl=ko&tab=wn">뉴스</a> <a class=gb1 href="https://mail.google.com/mail/?tab=wm">Gmail</a> <a class=gb1 href="https://drive.google.com/?tab=wo">드라이브</a> <a class=gb1 style="text-decoration:none" href="https://www.google.co.kr/intl/ko/about/products?tab=wh"><u>더보기</u> »</a></nobr></div><div id=guser width=100%><nobr><span id=gbn class=gbi></span><span id=gbf class=gbf></span><span id=gbe></span><a href="http://www.google.co.kr/history/optout?hl=ko" class=gb4>웹 기록</a> | <a href="/preferences?hl=ko" class=gb4>설정</a> | <a target=_top id=gb_70 href="https://accounts.google.com/ServiceLogin?hl=ko&passive=true&continue=https://www.google.com/" class=gb4>로그인</a></nobr></div><div class=gbh style=left:0></div><div class=gbh style=right:0></div> </div><center><br clear="all" id="lgpd"><div id="lga"><img alt="Google" height="92" src="/images/branding/googlelogo/1x/googlelogo_white_background_color_272x92dp.png" style="padding:28px 0 14px" width="272" id="hplogo"><br><br></div><form action="/search" name="f"><table cellpadding="0" cellspacing="0"><tr valign="top"><td width="25%"> </td><td align="center" nowrap=""><input name="ie" value="ISO-8859-1" type="hidden"><input value="ko" name="hl" type="hidden"><input name="source" type="hidden" value="hp"><input name="biw" type="hidden"><input name="bih" type="hidden"><div class="ds" style="height:32px;margin:4px 0"><div style="position:relative;zoom:1"><input style="color:#000;margin:0;padding:5px 8px 0 6px;vertical-align:top;padding-right:38px" autocomplete="off" class="lst tiah" value="" title="Google 검색" maxlength="2048" name="q" size="57"><img src="/textinputassistant/tia.png" style="position:absolute;cursor:pointer;right:5px;top:4px;z-index:300" data-script-url="/textinputassistant/11/ko_tia.js" id="tsuid1" alt="" height="23" width="27"><script nonce="bK5MYRkw4MttuRWfIYyExA==">(function(){var id=\'tsuid1\';document.getElementById(id).onclick = function(){var s = document.createElement(\'script\');s.src = this.getAttribute(\'data-script-url\');(document.getElementById(\'xjsc\')||document.body).appendChild(s);};})();</script></div></div><br style="line-height:0"><span class="ds"><span class="lsbb"><input class="lsb" value="Google 검색" name="btnG" type="submit"></span></span><span class="ds"><span class="lsbb"><input class="lsb" id="tsuid2" value="I’m Feeling Lucky" name="btnI" type="submit"><script nonce="bK5MYRkw4MttuRWfIYyExA==">(function(){var id=\'tsuid2\';document.getElementById(id).onclick = function(){if (this.form.q.value){this.checked = 1;if (this.form.iflsig)this.form.iflsig.disabled = false;}\nelse top.location=\'/doodles/\';};})();</script></span></span></td><td class="fl sblc" align="left" nowrap="" width="25%"><a href="/advanced_search?hl=ko&authuser=0">고급검색</a><a href="/language_tools?hl=ko&authuser=0">언어도구</a></td></tr></table><input id="gbv" name="gbv" type="hidden" value="1"><script nonce="bK5MYRkw4MttuRWfIYyExA==">(function(){var a,b="1";if(document&&document.getElementById)if("undefined"!=typeof XMLHttpRequest)b="2";else if("undefined"!=typeof ActiveXObject){var c,d,e=["MSXML2.XMLHTTP.6.0","MSXML2.XMLHTTP.3.0","MSXML2.XMLHTTP","Microsoft.XMLHTTP"];for(c=0;d=e[c++];)try{new ActiveXObject(d),b="2"}catch(h){}}a=b;if("2"==a&&-1==location.search.indexOf("&gbv=2")){var f=google.gbvu,g=document.getElementById("gbv");g&&(g.value=a);f&&window.setTimeout(function(){location.href=f},0)};}).call(this);</script></form><div id="gac_scont"></div><div style="font-size:83%;min-height:3.5em"><br></div><span id="footer"><div style="font-size:10pt"><div style="margin:19px auto;text-align:center" id="fll"><a href="/intl/ko/ads/">광고 프로그램</a><a href="http://www.google.co.kr/intl/ko/services/">비즈니스 솔루션</a><a href="/intl/ko/about.html">Google 정보</a><a href="https://www.google.com/setprefdomain?prefdom=KR&prev=https://www.google.co.kr/&sig=K_7JyPX5AoJhXBhRv8MdFaTIOflC8%3D">Google.co.kr</a></div></div><p style="color:#767676;font-size:8pt">© 2019 - <a href="/intl/ko/policies/privacy/">개인정보처리방침</a> - <a href="/intl/ko/policies/terms/">약관</a></p></span></center><script nonce="bK5MYRkw4MttuRWfIYyExA==">(function(){window.google.cdo={height:0,width:0};(function(){var a=window.innerWidth,b=window.innerHeight;if(!a||!b){var c=window.document,d="CSS1Compat"==c.compatMode?c.documentElement:c.body;a=d.clientWidth;b=d.clientHeight}a&&b&&(a!=google.cdo.width||b!=google.cdo.height)&&google.log("","","/client_204?&atyp=i&biw="+a+"&bih="+b+"&ei="+google.kEI);}).call(this);})();(function(){var u=\'/xjs/_/js/k\\x3dxjs.hp.en.Rbvu6sL5jkw.O/m\\x3dsb_he,d/am\\x3dhgk2AQ/d\\x3d1/rs\\x3dACT90oHY26IKjRg5OVueCrM5IHCZPNfqwg\';setTimeout(function(){var a=document.createElement("script");a.src=u;google.timers&&google.timers.load&&google.tick&&google.tick("load","xjsls");document.body.appendChild(a)},0);})();(function(){window.google.xjsu=\'/xjs/_/js/k\\x3dxjs.hp.en.Rbvu6sL5jkw.O/m\\x3dsb_he,d/am\\x3dhgk2AQ/d\\x3d1/rs\\x3dACT90oHY26IKjRg5OVueCrM5IHCZPNfqwg\';})();function _DumpException(e){throw e;}\nfunction _F_installCss(c){}\n(function(){google.spjs=false;google.snet=true;google.em=[];google.emw=false;})();(function(){var pmc=\'{\\x22Qnk92g\\x22:{},\\x22RWGcrA\\x22:{},\\x22U5B21g\\x22:{},\\x22YFCs/g\\x22:{},\\x22YQeDTA\\x22:{},\\x22ZI/YVQ\\x22:{},\\x22d\\x22:{},\\x22mVopag\\x22:{},\\x22sb_he\\x22:{\\x22agen\\x22:false,\\x22cgen\\x22:false,\\x22client\\x22:\\x22heirloom-hp\\x22,\\x22dh\\x22:true,\\x22dhqt\\x22:true,\\x22ds\\x22:\\x22\\x22,\\x22ffql\\x22:\\x22ko\\x22,\\x22fl\\x22:true,\\x22host\\x22:\\x22google.com\\x22,\\x22isbh\\x22:28,\\x22jsonp\\x22:true,\\x22msgs\\x22:{\\x22cibl\\x22:\\x22검색어 지우기\\x22,\\x22dym\\x22:\\x22이것을 찾으셨나요?\\x22,\\x22lcky\\x22:\\x22I’m Feeling Lucky\\x22,\\x22lml\\x22:\\x22자세히 알아보기\\x22,\\x22oskt\\x22:\\x22입력 도구\\x22,\\x22psrc\\x22:\\x22검색어가 \\\\u003Ca href\\x3d\\\\\\x22/history\\\\\\x22\\\\u003E웹 기록\\\\u003C/a\\\\u003E에서 삭제되었습니다.\\x22,\\x22psrl\\x22:\\x22삭제\\x22,\\x22sbit\\x22:\\x22이미지로 검색\\x22,\\x22srch\\x22:\\x22Google 검색\\x22},\\x22ovr\\x22:{},\\x22pq\\x22:\\x22\\x22,\\x22refpd\\x22:true,\\x22refspre\\x22:true,\\x22rfs\\x22:[],\\x22sbpl\\x22:24,\\x22sbpr\\x22:24,\\x22scd\\x22:10,\\x22sce\\x22:5,\\x22stok\\x22:\\x22kTHNj3aQwBAGOLiWN-6Sxu-J8Us\\x22,\\x22uhde\\x22:false}}\';google.pmc=JSON.parse(pmc);})();</script> </body></html>'
response 객체¶
import requests
r = requests.get('https://google.com')
html = r.content
print(r.status_code)
print(r.headers['Content-Type'])
print(r.encoding)
print(r.ok)
response 객체는 HTTP request에 의한 서버의 응답 정보를 갖고 있습니다.
status_code, headers, encoding, ok 등의 속성을 이용하면 다양한 정보를 얻을 수 있습니다.
결과는 아래와 같습니다.
200
text/html; charset=ISO-8859-1
ISO-8859-1
True
status_code는 정상일 경우 200, 페이지가 발견되지 않을 경우 404입니다.
encoding 방식은 ISO-8859-1이고, 요청에 대한 응답이 정상적으로 이루어졌음을 알 수 있습니다. (status_code가 200 보다 작거나 같은 경우 True, 그렇지 않은 경우 False)
만약 인코딩 방식이 달라서 한글이 제대로 표시되지 않으면 아래와 같이 인코딩 방식을 변경해 줍니다.
r.encoding = 'utf-8'
Requests를 이용해서 html 소스를 가져왔지만, 단순한 문자열 형태이기 때문에 파싱(Parsing)에 적합하지 않습니다.
이제 BeautifulSoup을 이용해서 파이썬이 html 소스를 분석하고 데이터를 추출하기 편리하도록 객체로 변환합니다.
이전글/다음글
이전글 : BeautifulSoup 기본 사용
다음글 : 네이버 뉴스 제목 가져오기