Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
W
wanjia
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
kino
wanjia
Commits
267ec90c
Commit
267ec90c
authored
Dec 18, 2020
by
zhangminghui
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
1e55b544
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
55 additions
and
13 deletions
+55
-13
api_assemble.py
wanjia_tuomin/dmp_scripts/api_assemble/api_assemble.py
+2
-0
file_handle.py
wanjia_tuomin/dmp_scripts/file_handle/file_handle.py
+15
-9
upload2Ftp.sh
wanjia_tuomin/dmp_scripts/file_handle/upload2Ftp.sh
+34
-0
modifyStatus.py
wanjia_tuomin/dmp_scripts/modifyStatus.py
+4
-4
No files found.
wanjia_tuomin/dmp_scripts/api_assemble/api_assemble.py
0 → 100644
View file @
267ec90c
if
__name__
==
'__main__'
:
print
(
"api_assemble.py"
)
wanjia_tuomin/dmp_scripts/file_handle/file_handle.py
View file @
267ec90c
...
...
@@ -51,30 +51,33 @@ def getColumns(column,hdfs_csv_path):
col_file
+=
list_cols
[
i
][
0
]
+
', '
col_list
.
append
(
list_cols
[
i
][
0
])
col_create
+=
list_cols
[
i
][
0
]
+
" string, "
create_tmp_sql
=
'create table
impala_tmp as select
%
s from
%
s'
%
(
col
[
0
:
-
2
],
'
a'
)
md5_sql
=
'select
%
s from
%
s'
%
(
col_file
[
0
:
-
2
],
'impala_tmp'
)
create_tmp_sql
=
'create table
dmp_demo.impala_tmp as select
%
s from
%
s'
%
(
col
[
0
:
-
2
],
'dmp_demo.
a'
)
md5_sql
=
'select
%
s from
%
s'
%
(
col_file
[
0
:
-
2
],
'
dmp_demo.
impala_tmp'
)
impala_sql
=
""
for
i
in
range
(
0
,
len
(
col_list
)):
if
i
<
10
:
impala_sql
+=
col_list
[
i
]
+
" as c0"
+
str
(
i
+
1
)
+
", "
else
:
impala_sql
+=
col_list
[
i
]
+
" as c"
+
str
(
i
+
1
)
+
", "
impala_sql
=
'insert into
%
s as select
%
s from
%
s'
%
(
"impala_table"
,
impala_sql
[
0
:
-
2
],
'impala_tmp'
)
create_sql
=
'create external table a (
%
s) stored as textfile location
\\
"
%
s
\\
" '
%
(
col_create
[
0
:
-
2
],
hdfs_csv_path
)
logger
.
info
(
"load csv sql:"
,
create_sql
)
logger
.
info
(
"create impala external table sql:"
,
create_tmp_sql
)
logger
.
info
(
"tuomin sql:"
,
md5_sql
)
logger
.
info
(
"output impala sql:"
,
impala_sql
)
impala_sql
=
'insert into
%
s as select
%
s from
%
s'
%
(
"dmp_demo.impala_table"
,
impala_sql
[
0
:
-
2
],
'dmp_demo.impala_tmp'
)
create_sql
=
'create external table dmp_demo.a (
%
s) row format delimited fields terminated by
\'
,
\'
'
\
'location
\\
"
%
s
\\
" tblproperties(
\\
"skip.header.line.count
\\
"=
\\
"1
\\
")'
%
(
col_create
[
0
:
-
2
],
hdfs_csv_path
)
print
(
"=======> load csv sql:"
,
create_sql
)
print
(
"=======> create impala external table sql:"
,
create_tmp_sql
)
print
(
"=======> tuomin sql:"
,
md5_sql
)
print
(
"=======> output impala sql:"
,
impala_sql
)
return
[
create_sql
,
create_tmp_sql
,
md5_sql
,
impala_sql
]
except
Exception
as
e
:
logger
.
error
(
"json parse exception:{0}"
.
format
(
e
))
# 更新文件处理状态:正在处理中...,列名解析异常
modifyStatus
.
updateStatusById
(
constants
.
FILE_PROCESSING
,
constants
.
PARSE_COLS
,
e
,
localTime
,
constants
.
SP_UPDATE_PERSON
,
file_id
,
constants
.
FILE_TABLE_NAME
)
sys
.
exit
(
1
)
if
__name__
==
'__main__'
:
if
len
(
sys
.
argv
)
<
3
:
logger
.
info
(
"
python arguments is 3 but arguments pass
%
s"
%
len
(
sys
.
argv
))
print
(
"=======>
python arguments is 3 but arguments pass
%
s"
%
len
(
sys
.
argv
))
else
:
file_id
=
sys
.
argv
[
1
]
hdfs_path
=
sys
.
argv
[
2
]
...
...
@@ -95,6 +98,7 @@ if __name__ == '__main__':
# 更新文件处理状态:正在处理中...,连接数据库获取文件字段、类型异常
modifyStatus
.
updateStatusById
(
constants
.
FILE_PROCESSING
,
constants
.
GET_COL_FAILE
,
e
,
localTime
,
constants
.
SP_UPDATE_PERSON
,
file_id
,
constants
.
FILE_TABLE_NAME
)
sys
.
exit
(
1
)
list_sqls
=
getColumns
(
cols
,
hdfs_path
)
...
...
@@ -117,6 +121,7 @@ if __name__ == '__main__':
# 更新文件处理状态:正在处理中...,csv落表异常
modifyStatus
.
updateStatusById
(
constants
.
FILE_PROCESSING
,
constants
.
CSV_TABLE_FILE
,
e
,
localTime
,
constants
.
SP_UPDATE_PERSON
,
file_id
,
constants
.
FILE_TABLE_NAME
)
sys
.
exit
(
1
)
try
:
# upload file to ftp
...
...
@@ -133,6 +138,7 @@ if __name__ == '__main__':
# 更新文件处理状态:正在处理中...,csv落ftp异常
modifyStatus
.
updateStatusById
(
constants
.
FILE_PROCESSING
,
constants
.
CSV_TO_FTP_FILE
,
e
,
localTime
,
constants
.
SP_UPDATE_PERSON
,
file_id
,
constants
.
FILE_TABLE_NAME
)
sys
.
exit
(
1
)
# 更新文件处理状态:文件处理完成
modifyStatus
.
updateStatusById
(
constants
.
FILE_PROCESSING_FINISH
,
constants
.
FINAL_RESULT_SUCCESS
,
"成功"
,
localTime
,
constants
.
ZMH_UPDATE_PERSON
,
file_id
,
constants
.
FILE_TABLE_NAME
)
wanjia_tuomin/dmp_scripts/file_handle/upload2Ftp.sh
0 → 100644
View file @
267ec90c
#!/bin/bash
# 本地脱敏后的CSV,上传到ftp
#SFTP配置信息
#用户名
USER
=
sp
#密码
PASSWORD
=
sp940219sp
#待上传文件根目录
SRCDIR
=
/root
#FTP目录
DESDIR
=
/home/songpeng/testfile
#IP
IP
=
192.168.153.100
#端口
PORT
=
2121
#获取文件
cd
${
SRCDIR
}
;
#目录下的所有文件
#FILES=`ls`
#修改时间在执行时间五分钟之前的xml文件
FILES
=
`
find
${
SRCDIR
}
-name
'*.txt'
`
for
FILE
in
${
FILES
}
do
echo
${
FILE
}
#发送文件 (关键部分)
lftp
-u
${
USER
}
,
${
PASSWORD
}
sftp://
${
IP
}
:
${
PORT
}
<<
EOF
cd
${
DESDIR
}
/
lcd
${
SRCDIR
}
put
${
FILE
}
by
EOF
done
\ No newline at end of file
wanjia_tuomin/dmp_scripts/modifyStatus.py
View file @
267ec90c
import
pymysql
import
sys
import
constants
,
DbUtils
"""
...
...
@@ -24,11 +23,11 @@ def updateStatusById(treatment_status, treatment_status_result, error_info, upt_
# serch_data = """select t1 from test where id = {table_id}""".format(table_id=id)
# update_data = 'update test set status=%s where id =%s' %(status, id)
if
tableName
==
constants
.
PRE_FILE_TABLE_NAME
:
update_data
=
'update
%
s set pretreatment_status=
%
s,pretreatment_status_result=
%
s,error_info=
\
'
%
s
\'
, '
\
update_data
=
'update
%
s set pretreatment_status=
%
s,pretreatment_status_result=
%
s,error_info=
\
"
%
s
\"
, '
\
'upt_time=
\'
%
s
\'
,upt_person=
\'
%
s
\'
where file_deposit=
%
s'
%
(
tableName
,
treatment_status
,
treatment_status_result
,
error_info
,
upt_time
,
upt_person
,
file_deposit
)
elif
tableName
==
constants
.
FILE_TABLE_NAME
:
update_data
=
'update
%
s set deposit_status=
%
s,deposit_status_result=
%
s,error_info=
\
'
%
s
\'
,'
\
update_data
=
'update
%
s set deposit_status=
%
s,deposit_status_result=
%
s,error_info=
\
"
%
s
\"
,'
\
'upt_time=
\'
%
s
\'
,upt_person=
\'
%
s
\'
where file_deposit=
%
s'
%
(
tableName
,
treatment_status
,
treatment_status_result
,
error_info
,
upt_time
,
upt_person
,
file_deposit
)
...
...
@@ -37,3 +36,4 @@ def updateStatusById(treatment_status, treatment_status_result, error_info, upt_
DbUtils
.
close_conn
(
db
,
cursor
)
except
Exception
as
e
:
print
(
" error_info:
%
s ,update status executor failed:
%
s"
%
(
error_info
,
e
))
sys
.
exit
(
1
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment