78 lines
2.8 KiB
Python
Executable File
78 lines
2.8 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
from __future__ import print_function
|
|
|
|
from bs4 import BeautifulSoup
|
|
import argparse
|
|
import os
|
|
import sys
|
|
|
|
###################################################################################################
|
|
debug = False
|
|
PY3 = (sys.version_info.major >= 3)
|
|
scriptName = os.path.basename(__file__)
|
|
scriptPath = os.path.dirname(os.path.realpath(__file__))
|
|
origPath = os.getcwd()
|
|
|
|
###################################################################################################
|
|
if not PY3:
|
|
if hasattr(__builtins__, 'raw_input'): input = raw_input
|
|
|
|
try:
|
|
FileNotFoundError
|
|
except NameError:
|
|
FileNotFoundError = IOError
|
|
|
|
###################################################################################################
|
|
# print to stderr
|
|
def eprint(*args, **kwargs):
|
|
print(*args, file=sys.stderr, **kwargs)
|
|
|
|
###################################################################################################
|
|
# convenient boolean argument parsing
|
|
def str2bool(v):
|
|
if v.lower() in ('yes', 'true', 't', 'y', '1'):
|
|
return True
|
|
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
|
|
return False
|
|
else:
|
|
raise argparse.ArgumentTypeError('Boolean value expected.')
|
|
|
|
###################################################################################################
|
|
# main
|
|
def main():
|
|
global debug
|
|
|
|
parser = argparse.ArgumentParser(description=scriptName, add_help=False, usage='{} <arguments>'.format(scriptName))
|
|
parser.add_argument('-v', '--verbose', dest='debug', type=str2bool, nargs='?', const=True, default=False, help="Verbose output")
|
|
parser.add_argument('-i', '--input', required=True, metavar='<STR>', type=str, help='Input file')
|
|
parser.add_argument('-o', '--output', required=True, metavar='<STR>', type=str, help='Output file')
|
|
parser.add_argument('-c', '--div-class', required=True, dest='divClass', metavar='<STR>', type=str, default='', help='div class to remove')
|
|
parser.add_argument('-p', '--parser', required=False, dest='parser', metavar='<STR>', type=str, default='html.parser', help='BeautifulSoup parser')
|
|
parser.add_argument('-e', '--encoding', required=False, dest='encoding', metavar='<STR>', type=str, default='utf-8', help='Encoding for output file')
|
|
try:
|
|
parser.error = parser.exit
|
|
args = parser.parse_args()
|
|
except SystemExit:
|
|
parser.print_help()
|
|
exit(2)
|
|
|
|
debug = args.debug
|
|
if debug:
|
|
eprint(os.path.join(scriptPath, scriptName))
|
|
eprint("Arguments: {}".format(sys.argv[1:]))
|
|
eprint("Arguments: {}".format(args))
|
|
else:
|
|
sys.tracebacklimit = 0
|
|
|
|
soup = BeautifulSoup(open(args.input), args.parser)
|
|
for div in soup.find_all("div", { 'class' : args.divClass }):
|
|
div.decompose()
|
|
|
|
with open(args.output, 'wb') as f:
|
|
f.write(soup.prettify(args.encoding))
|
|
|
|
if __name__ == '__main__':
|
|
main()
|