初始化项目

651c9f26 · kinomin · 1cae46b3 · 651c9f26 · 651c9f26 · 651c9f26
Commit 651c9f26 authored Nov 06, 2020 by kinomin
Hide whitespace changes
Inline Side-by-side

Showing with 32 additions and 18 deletions

Sources.py spark/sources/Sources.py +19 -8

unzip.py spark/unzip.py +10 -9

unzip.sh spark/unzip.sh +3 -1

No files found.
--- a/spark/sources/Sources.py
+++ b/spark/sources/Sources.py
 import pymysql
 import exception.MyErrorException as ex
 from pyspark import SparkContext
+from sources import Sinks
 def getMySqlConnect():
    try:
-    # 连接数据库
+        # 连接数据库
        conn = pymysql.connect(
            host="hadoop1",  # 连接主机名,也可以用ip地址，例如127.0.0.1
            user="root",  # 连接用户名
@@ -15,6 +17,7 @@ def getMySqlConnect():
        )
    except ex.MyError as e:
        print("connection error", e.errorinfo)
+        Sinks.updateMysqlStatus(i_status=1, i_readcount=0, i_sinkcount=0, i_id=1)
    return conn
@@ -28,6 +31,7 @@ def getRecordByIdResultT1(id):
        # 获取下一个查询结果集
        record = cur.fetchone()
    except ex.MyError as e:
+        Sinks.updateMysqlStatus(i_status=1, i_readcount=0, i_sinkcount=0, i_id=1)
        print("executor failed", e.errorinfo)
        conn.rollback()
    finally:
@@ -37,13 +41,20 @@ def getRecordByIdResultT1(id):
 def readCsv(spark, path, col):
-    df = spark.read.format('csv').option('inferSchema', 'true').load(path).toDF(*col)
+    try:
+        df = spark.read.format('csv').option('inferSchema', 'true').load(path).toDF(*col)
+    except ex.MyError as e:
+        Sinks.updateMysqlStatus(i_status=1, i_readcount=0, i_sinkcount=0, i_id=1)
+        print("executor failed", e.errorinfo)
    return df
-def readTxt(spark, path, col, delimiter):
+def readTxt(spark, path, delimiter, *col):
-    rddFile = SparkContext.textFile("1.txt")
+    try:
-    rddMap = rddFile.map(lambda x: x.split(delimiter))
+        rddFile = SparkContext.textFile(path)
-    df = spark.createDataFrame(rddMap, col)
+        rddMap = rddFile.map(lambda x: x.split(delimiter))
-    return df
+        df = spark.createDataFrame(rddMap, *col)
+    except ex.MyError as e:
+        Sinks.updateMysqlStatus(i_status=1, i_readcount=0, i_sinkcount=0, i_id=1)
+        print("executor failed", e.errorinfo)
+    return df
\ No newline at end of file
--- a/spark/unzip.py
+++ b/spark/unzip.py
@@ -2,13 +2,14 @@
 import sys
 import os
 import shutil
+import exception.MyErrorException as ex
 import zipfile
-# pyName = sys.argv[0]  # sys.argv[0] 类似于shell中的$0, 但不是脚本名称，而是脚本的路径
+pyName = sys.argv[0]  # sys.argv[0] 类似于shell中的$0, 但不是脚本名称，而是脚本的路径
-# zipPath = sys.argv[1]  # sys.argv[1] 表示传入的第一个参数
+zipPath = sys.argv[1]  # sys.argv[1] 表示传入的第一个参数
-# inputPath = sys.argv[2]  # sys.argv[2] 表示传入的第二个参数
+inputPath = sys.argv[2]  # sys.argv[2] 表示传入的第二个参数
-# outputPath = sys.argv[3]
+outputPath = sys.argv[3]
 #问题:解压的时候要考虑密码吗???
 # os.system("hdfs dfs -put /srcPath /dstPath")
@@ -36,7 +37,7 @@ def unzip_single(srcPath, dstPath):
    zf = zipfile.ZipFile(srcPath)
    try:
        zf.extractall(path=dstPath)
-    except RuntimeError as e:
+    except ex.MyError as e:
        print(e)
    zf.close()
@@ -65,7 +66,7 @@ if __name__ == '__main__':
    # if len(sys.argv) == 3:
    #     source_dir, dest_dir = os.path.abspath(sys.argv[0].strip('"')), sys.argv[1]
    print('解压、复制中...')
-    # unzip_file(zipPath, inputPath)
+    unzip_all(zipPath, inputPath)
-    # get_file(inputPath, outputPath)
+    get_file(inputPath, outputPath)
-    unzip_all("D:\\src", "D:\\dst")
+    # unzip_all("D:\\src", "D:\\dst")
-    get_file("D:\\src", "D:\\dst")
+    # get_file("D:\\src", "D:\\dst")
--- a/spark/unzip.sh
+++ b/spark/unzip.sh
 #! /bin/bash
+set -e
 echo -e "\033[34m==========文件解压、复制开始\033[0m=========="
-python3 unzip.py /root/wanjia/files/zip_file/20201102/20201102.zip /root/wanjia/files/input_file/20201102 /root/wanjia/files/output_file/20201102
+python3 unzip.py /root/wanjia/files/zip_file/20201102/202011021111.zip /root/wanjia/files/input_file/20201102 /root/wanjia/files/output_file/20201102
 echo -e "\033[34m==========文件解压、复制结束\033[0m=========="
 echo ""
 echo -e "\033[34m==========将id写入环境变量\033[0m============"