1#!/usr/bin/env python 2# Copyright 2013 Google Inc. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16"""Inject javascript into html page source code.""" 17 18import datetime 19import logging 20import os 21import re 22import util 23import third_party.jsmin as jsmin 24 25DOCTYPE_RE = re.compile(r'^.{,256}?(<!--.*-->)?.{,256}?<!doctype html>', 26 re.IGNORECASE | re.DOTALL) 27HTML_RE = re.compile(r'^.{,256}?(<!--.*-->)?.{,256}?<html.*?>', 28 re.IGNORECASE | re.DOTALL) 29HEAD_RE = re.compile(r'^.{,256}?(<!--.*-->)?.{,256}?<head.*?>', 30 re.IGNORECASE | re.DOTALL) 31 32# Occurences of this marker in injected scripts will be replaced with 33# recording time in javascripts' Date().toValue() format. This allows 34# to properly set deterministic date in JS code. See 35# https://github.com/chromium/web-page-replay/issues/71 for details. 36TIME_SEED_MARKER = '{{WPR_TIME_SEED_TIMESTAMP}}' 37 38 39def GetScriptInjector(scripts): 40 """Loads |scripts| from disk and returns an injector of their content.""" 41 lines = [] 42 if scripts: 43 if not isinstance(scripts, list): 44 scripts = scripts.split(',') 45 for script in scripts: 46 if os.path.exists(script): 47 with open(script) as f: 48 lines.extend(f.read()) 49 elif util.resource_exists(script): 50 lines.extend(util.resource_string(script)) 51 else: 52 raise Exception('Script does not exist: %s', script) 53 54 script_template = jsmin.jsmin(''.join(lines), quote_chars="'\"`") 55 def injector(record_time): 56 delta = record_time - datetime.datetime(1970, 1, 1) 57 js_timestamp = \ 58 int(delta.total_seconds()) * 1000 + delta.microseconds / 1000 59 return script_template.replace(TIME_SEED_MARKER, str(js_timestamp)) 60 return injector 61 62 63def _IsHtmlContent(content): 64 content = content.strip() 65 return content.startswith('<') and content.endswith('>') 66 67 68def InjectScript(text_chunks, content_type, script_to_inject): 69 """Inject |script_to_inject| into |content| if |content_type| is 'text/html'. 70 71 Inject |script_to_inject| into |text_chunks| immediately after <head>, 72 <html> or <!doctype html>, if one of them is found. Otherwise, inject at 73 the beginning. 74 75 Returns: 76 text_chunks, already_injected 77 |text_chunks| is the new content if script is injected, otherwise 78 the original. If the script was injected, exactly one chunk in 79 |text_chunks| will have changed. 80 |just_injected| indicates if |script_to_inject| was just injected in 81 the content. 82 """ 83 if not content_type or content_type != 'text/html': 84 return text_chunks, False 85 content = "".join(text_chunks) 86 if not content or not _IsHtmlContent(content) or script_to_inject in content: 87 return text_chunks, False 88 for regexp in (HEAD_RE, HTML_RE, DOCTYPE_RE): 89 matchobj = regexp.search(content) 90 if matchobj: 91 pos = matchobj.end(0) 92 for i, chunk in enumerate(text_chunks): 93 if pos <= len(chunk): 94 result = text_chunks[:] 95 result[i] = '%s<script>%s</script>%s' % (chunk[0:pos], 96 script_to_inject, 97 chunk[pos:]) 98 return result, True 99 pos -= len(chunk) 100 result = text_chunks[:] 101 result[0] = '<script>%s</script>%s' % (script_to_inject, 102 text_chunks[0]) 103 logging.warning('Inject at the very beginning, because no tag of ' 104 '<head>, <html> or <!doctype html> is found.') 105 return result, True 106