-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpyl
More file actions
executable file
·166 lines (127 loc) · 4.74 KB
/
pyl
File metadata and controls
executable file
·166 lines (127 loc) · 4.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#!/usr/bin/python -O
'''
pyl: filename [code1] [code2] [code3]
If no code blocks are provided, system reads from stdin.
If one code block is provide, that block is processed for every line.
If two are provided, the first is processed for every line, and the second after everything executes.
If three are provided, the first is processed before anything happens, the second for every line, and the third after everything.
Interface:
_n: current line number
_line (for csv files): current line
_row (for html files): current row in table
_count: number of lines that have passed the filter
column(n,sep): splits line using sep, and returns n-th column.
columns(sep): splits line using sep, and returns all columns
_filter: a variable that can be set to indicate if line passes a test. Optional.
At the end, if no post-block is provided, number of matched lines (determined by _filter) is printed.
When entering code from the command-line, semicolons can be used to separate lines
Users could use any variable that does not begin with an underscore. Variables preceded
by one underscore are system variables exposed to the user.
The only ones users should modify is _filter.
'''
import sys
import argparse
import re
__tablefinder = re.compile(r"<table[^>]*?>(.+)</table>")
__rowsfinder = re.compile(r"<tr[^>]*?>.*?</tr>")
__tdfinder = re.compile(r"<td[^>]*?>.*?</td>")
__tdcontentfinder=re.compile(r"<td[^>]*?>(.*?)</td>")
parser=argparse.ArgumentParser(description='Python Line Processor')
parser.add_argument("--use", metavar="use", default="", help="pre-read csv of listed files")
parser.add_argument("--silent", "-s", action="store_true", help="Don't print number of lines that passed.")
parser.add_argument("--verbose", "-v", action="store_true", help="Automatically print all lines that pass.")
parser.add_argument("file",metavar="file", help="File to be processed.")
parser.add_argument("blocks",metavar='code1', nargs="*",help="Code to run for each line. Takes 0-3 blocks. If no blocks are provided, code is read from stdin. If 2 blocks are provided, the second is run after all lines are processed. If 3 blocks are provided, the first block is run before the file is processed and the last after the file is processed.")
args=parser.parse_args()
def table_parse(f):
text=f.read()
m=__tablefinder.search(text)
inner_content = m.group(1).strip()
rows= __rowsfinder.findall(inner_content)
table=[]
for row in rows:
tds=__tdfinder.findall(row)
row=[]
for td in tds:
m=__tdcontentfinder.search(td)
row.append(m.group(1).strip())
table.append(row)
return table
def code_from_argv(entry):
#ans=entry
ans="\n".join(entry.split(";;"))
#TODO Replace { and } with newlines, and do auto indent
return ans
__pre_processor=""
if args.silent:
__post_processor=""
else:
__post_processor='print "Finished processing "+args.file+". "+str(_count)+" lines matched."'
if len(args.blocks)==0:
__processor = sys.stdin.read()
elif len(args.blocks)==1:
__processor = code_from_argv(args.blocks[0])
elif len(args.blocks)==2:
__processor = code_from_argv(args.blocks[0])
__post_processor = code_from_argv(args.blocks[1])+"\n"+__post_processor
elif len(args.blocks)==3:
__processor = code_from_argv(args.blocks[1])
__post_processor = code_from_argv(args.blocks[2])
__pre_processor = code_from_argv(args.blocks[0])
__input_type="csv"
if args.file[-4:]=="html":
__input_type="html"
_files={}
for f_name in args.use.split(","):
if f_name.strip()=="":
continue
f=open(f_name)
_files[f_name]=f.read()
f.close()
### Utility Functions ###
def _in(filename, target, mode="l"):
lines=_read(filename).split("\n")
for line in lines:
if line.strip()==target:
return True
return False
def _read(filename):
if filename in _files:
return _files[filename]
f=open(filename)
ret=f.read()
f.close()
return ret
if __input_type=="csv":
def column(n,sep=None):
return _line.split(sep)[n].strip()
def columns(sep=None):
return _line.split(sep)
### Actually Processing Files ###
__f=open(args.file)
_n=0
_count=0
if __pre_processor!="":
exec(__pre_processor)
if __input_type=="csv":
for _line in __f:
_line=_line.strip()
_n+=1
_filter=True
exec(__processor)
if _filter:
_count+=1
if args.verbose:
print _line
elif __input_type=="html":
for _row in table_parse(__f):
_n+=1
_filter=True
exec(__processor)
if _filter:
_count+=1
if args.verbose:
print _line
if __post_processor!="":
exec(__post_processor)
__f.close()