使用java调用Python中部分代码后卡住?

使用java调用Python中部分代码后卡住?

使用Runtime直接执行Python程序,在Python中用 argv[1] 进行接收。

下面附上Python的程序:

spark = SparkSession.builder.appName("Read from Java Backend").master("local[*]").getOrCreate();

# 获取传递的参数

comment = sys.argv[1]

# 将JSON字符串转换为Python对象

comment = json.loads(comment)

# 将 comment 列表转换为 RDD

comment_rdd = spark.sparkContext.parallelize(comment)

# 将 RDD 转换为 Dataframe

df = spark.createDataFrame(comment_rdd.map(lambda x: Row(**x)))

# 加载停用词库

stop_words = spark.sparkContext.textFile("C:/Users/10421/Downloads/baidu_stopwords.txt").collect()

# 定义清理文本函数

def clean_text(text):

# 删除非中文字符

text = re.sub('[^\u4e00-\u9fa5]+', '', text)

return text

# 定义过滤停用词函数

def remove_stopwords(words):

return [w for w in words if w not in stop_words]

# 定义中文分词函数

def seg(text):

return jieba.lcut(text)

# 创建UDF

clean_text_udf = udf(clean_text, StringType())

remove_stopwords_udf = udf(remove_stopwords, StringType())

seg_udf = udf(seg,ArrayType(StringType()))

# 清理文本和过滤停用词

df = df.withColumn("comment", clean_text_udf("comment"))

df = df.withColumn("comment", remove_stopwords_udf("comment"))

df = df.select(concat_ws(" ","comment").alias("comment"))

df = df.withColumn("comment", clean_text_udf("comment"))

# 对comment进行分词

df = df.withColumn("words", seg_udf(col("comment")))

df = df.drop("comment")

df = df.select(explode(col("words")).alias("word"))

# 计算每个单词的出现次数

word_count = df.rdd.map(lambda x: (x.word, 1)).reduceByKey(lambda x, y: x + y)

sorted_word_count = word_count.sortBy(lambda x: x[1], ascending=False)

top_20_words = sorted_word_count.take(20)

column = 0

for row in top_20_words:

print(row[column])

这里尝试一步步注释掉找问题,发现从 sorted_word_count 往后的程序就会导致 Java 那里一直卡住。这部分注释掉可正常运行。

以下为Java代码:

@RestController

@RequestMapping("py")

public class TransController {

@PostMapping(value = "/run_py_program")

public static String runPyProgram(@RequestBody List<Map<String, Object>> comment) throws Exception {

// 将comment转换为JSON字符串

ObjectMapper mapper = new ObjectMapper();

String jsonComment = mapper.writeValueAsString(comment);

String json2 = jsonComment.replace("\"", "\\\"");

// 执行Python程序,并传递参数

String[] args1 = new String[]{"C:\\Users\\10421\\AppData\\Local\\Microsoft\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\python.exe", "C:/Users/10421/Desktop/data/ss.py", json2};

Process process = Runtime.getRuntime().exec(args1);

// 获取程序执行结果

InputStream inputStream = process.getInputStream();

BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream,"gb2312"));

StringBuilder resultBuilder = new StringBuilder();

String line;

while ((line = reader.readLine()) != null) {

resultBuilder.append(line);

}

InputStream errorStream = process.getErrorStream();

BufferedReader reader2 = new BufferedReader(new InputStreamReader(errorStream,"gb2312"));

StringBuilder resultBuilder2 = new StringBuilder();

String line2;

while ((line2 = reader2.readLine()) != null) {

resultBuilder2.append(line2);

}

in.close();

int exitValue = process.waitFor();

if (0 != exitValue) {

System.out.println("执行Shell脚本失败. error code is :" + exitValue);

}

System.out.println(resultBuilder);

// 返回程序执行结果

return resultBuilder.toString();

}

}

破案了,是本地跑不动这个,怎么办?


回答:

运行了一下你的代码,发现从sorted_word_count 这个地方开始往后的部分会导致卡住的问题。
可能的原因是编码的问题,改utf-8试试。

@RestController

@RequestMapping("py")

public class TransController {

@PostMapping(value = "/run_py_program")

public static String runPyProgram(@RequestBody List<Map<String, Object>> comment) throws Exception {

// 将 comment 转换为 JSON 字符串

ObjectMapper mapper = new ObjectMapper();

String jsonComment = mapper.writeValueAsString(comment);

String json2 = jsonComment.replace("\"", "\\\"");

// 执行 Python 程序,并传递参数

String[] args1 = new String[]{"python", "C:/Users/10421/Desktop/data/ss.py", json2};

Process process = Runtime.getRuntime().exec(args1);

// 获取程序执行结果

InputStream inputStream = process.getInputStream();

BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));

StringBuilder resultBuilder = new StringBuilder();

String line;

while ((line = reader.readLine()) != null) {

resultBuilder.append(line);

}

InputStream errorStream = process.getErrorStream();

BufferedReader reader2 = new BufferedReader(new InputStreamReader(errorStream, "UTF-8"));

StringBuilder resultBuilder2 = new StringBuilder();

String line2;

while ((line2 = reader2.readLine()) != null) {

resultBuilder2.append(line2);

}

int exitValue = process.waitFor();

if (0 != exitValue) {

System.out.println("执行Shell脚本失败. error code is :" + exitValue);

}

System.out.println(resultBuilder);

// 返回程序执行结果

return resultBuilder.toString();

}

}

以上是 使用java调用Python中部分代码后卡住? 的全部内容, 来源链接: utcz.com/p/938937.html

回到顶部