Skip to content

Commit fbbbf3f

Browse files
authored
[NPU] Fix AssignKernel (#831)
1 parent cc3b536 commit fbbbf3f

File tree

3 files changed

+17
-2
lines changed

3 files changed

+17
-2
lines changed

backends/npu/kernels/assign_kernel.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ void AssignKernel(const Context& dev_ctx,
2222
const phi::DenseTensor& x,
2323
phi::DenseTensor* out) {
2424
dev_ctx.template Alloc<T>(out);
25-
TensorCopy(dev_ctx, x, true, out);
25+
TensorCopy(dev_ctx, x, false, out);
2626
}
2727

2828
template <typename T, typename Context>

backends/npu/kernels/funcs/npu_op_runner.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,13 +587,20 @@ bool NpuOpRunner::GetFloatStatus(aclrtStream stream) {
587587
}
588588

589589
void NpuOpRunner::Run(aclrtStream stream, bool sync) const {
590+
static bool isAclEnableJit = false;
591+
590592
PADDLE_ENFORCE_NOT_NULL(
591593
stream,
592594
phi::errors::External("Stream should not be null, please check."));
593595
InitFloatStatus(stream);
594596
VLOG(1) << "NpuOpRunner: " << op_type_ << "\n"
595597
<< GetOpDescString(input_descs_, "Input")
596598
<< GetOpDescString(output_descs_, "Output");
599+
600+
if (!isAclEnableJit) {
601+
aclSetCompileopt(ACL_OP_JIT_COMPILE, "enable");
602+
isAclEnableJit = true;
603+
}
597604
aclError ret;
598605
// Ensure that the Gil has been released before running
599606
// aclopCompileAndExecute.

backends/npu/runtime/runtime.cc

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,8 @@ class EventResourcePool {
8080
++iter;
8181
} else {
8282
idle_event_list_[dev_id].push_back(*iter);
83-
iter = wait_event_list_[dev_id].erase(iter);
8483
reset_event(recorded_event_stream_map_[*iter], *iter);
84+
iter = wait_event_list_[dev_id].erase(iter);
8585
}
8686
} else {
8787
idle_event_list_[dev_id].push_back(*iter);
@@ -115,6 +115,12 @@ class EventResourcePool {
115115

116116
void RecordEvent(int dev_id, aclrtStream stream, aclrtEvent event) {
117117
std::lock_guard<std::mutex> lock(mutex_);
118+
if (event_has_been_recorded_[dev_id][event]) {
119+
LOG_IF(WARNING, FLAGS_npu_runtime_debug)
120+
<< "[RUNTIME] RecordEvent: event has already been recorded. event="
121+
<< event;
122+
reset_event(recorded_event_stream_map_[event], event);
123+
}
118124
event_has_been_recorded_[dev_id][event] = true;
119125
recorded_event_stream_map_[event] = stream;
120126
record_evnt(stream, event);
@@ -749,6 +755,7 @@ HcclDataType PDDataTypeToHcclDataType(C_DataType dtype) {
749755
return HCCL_DATA_TYPE_UINT8;
750756
} else {
751757
LOG(ERROR) << "Datatype " << dtype << " in hccl is not supported.";
758+
exit(-1);
752759
}
753760
}
754761

@@ -763,6 +770,7 @@ HcclReduceOp PDReduceOpToHcclReduceOp(C_CCLReduceOp op) {
763770
return HCCL_REDUCE_PROD;
764771
} else {
765772
LOG(ERROR) << "Reduceop " << op << " in hccl is not supported.";
773+
exit(-1);
766774
}
767775
}
768776

0 commit comments

Comments
 (0)