Home
last modified time | relevance | path

Searched refs:step_id (Results 1 – 25 of 102) sorted by relevance

12345

/external/tensorflow/tensorflow/core/distributed_runtime/
Dpartial_run_mgr_test.cc27 int step_id = 1; in TEST() local
29 partial_run_mgr.FindOrCreate(step_id, &cancellation_manager); in TEST()
36 int step_id = 1; in TEST() local
38 partial_run_mgr.FindOrCreate(step_id, &cancellation_manager); in TEST()
41 partial_run_mgr.FindOrCreate(step_id, &found_cancellation_manager); in TEST()
48 int step_id = 1; in TEST() local
50 partial_run_mgr.FindOrCreate(step_id, &cancellation_manager); in TEST()
62 int step_id = 1; in TEST() local
64 partial_run_mgr.FindOrCreate(step_id, &cancellation_manager); in TEST()
68 step_id, [&called](Status status) { called++; }, Status::OK()); in TEST()
[all …]
Dworker.cc101 void Worker::AbortStep(int64 step_id) { in AbortStep() argument
102 Rendezvous* rendez = env_->rendezvous_mgr->Find(step_id); in AbortStep()
103 SchedNonBlockingClosureAfter(1000000, [rendez, step_id]() { in AbortStep()
107 rendez->StartAbort(errors::Aborted("Step ", step_id, in AbortStep()
157 const int64 step_id = request->step_id(); in DoRunGraph() local
158 TRACEPRINTF("RunGraph: %lld", step_id); in DoRunGraph()
208 opts->SetCancelCallback([this, cm, step_id]() { in DoRunGraph()
210 AbortStep(step_id); in DoRunGraph()
226 request->graph_handle(), step_id, session.get(), request->exec_opts(), in DoRunGraph()
228 [this, step_id, response, session, cm, out, token, collector, tracer, in DoRunGraph()
[all …]
Dpartial_run_mgr.cc29 bool PartialRunMgr::FindOrCreate(int step_id, in FindOrCreate() argument
32 auto it = step_id_to_partial_run_.find(step_id); in FindOrCreate()
41 step_id_to_partial_run_[step_id] = std::move(partial_run); in FindOrCreate()
45 void PartialRunMgr::ExecutorDone(int step_id, const Status& executor_status) { in ExecutorDone() argument
50 auto run_it = step_id_to_partial_run_.find(step_id); in ExecutorDone()
68 step_id_to_partial_run_.erase(step_id); in ExecutorDone()
72 void PartialRunMgr::PartialRunDone(int step_id, StatusCallback done, in PartialRunDone() argument
77 auto run_it = step_id_to_partial_run_.find(step_id); in PartialRunDone()
93 step_id_to_partial_run_.erase(step_id); in PartialRunDone()
Dworker_cache_logger.cc59 bool WorkerCacheLogger::RetrieveLogs(int64 step_id, StepStats* ss) { in RetrieveLogs() argument
61 LogMap::iterator iter = log_map_.find(step_id); in RetrieveLogs()
71 void WorkerCacheLogger::Save(const string& device, int64 step_id, in Save() argument
74 StepLog* sl = &log_map_[step_id]; in Save()
85 void WorkerCacheLogger::RecordRecvTensor(int64 step_id, int64 start_usecs, in RecordRecvTensor() argument
91 RecordDataTransfer(step_id, start_usecs, end_usecs, tensor_name, src_device, in RecordRecvTensor()
95 void WorkerCacheLogger::RecordDataTransfer(int64 step_id, int64 start_usecs, in RecordDataTransfer() argument
130 Save(dst_device, step_id, ns); in RecordDataTransfer()
Dbase_rendezvous_mgr.cc53 RemoteRendezvous* BaseRendezvousMgr::Find(int64 step_id) { in Find() argument
54 return FindOrCreate(step_id); in Find()
57 BaseRemoteRendezvous* BaseRendezvousMgr::FindOrCreate(int64 step_id) { in FindOrCreate() argument
59 auto iter = table_.find(step_id); in FindOrCreate()
61 auto rr = Create(step_id, worker_env_); in FindOrCreate()
62 iter = table_.insert({step_id, rr}).first; in FindOrCreate()
68 void BaseRendezvousMgr::RecvLocalAsync(int64 step_id, in RecvLocalAsync() argument
71 auto rendez = FindOrCreate(step_id); in RecvLocalAsync()
85 Status BaseRendezvousMgr::RecvLocal(int64 step_id, in RecvLocal() argument
90 RecvLocalAsync(step_id, parsed, in RecvLocal()
[all …]
Drpc_collective_executor_mgr.cc48 CollectiveExecutor* RpcCollectiveExecutorMgr::Create(int64 step_id) { in Create() argument
51 worker_cache_, step_id); in Create()
52 return new BaseCollectiveExecutor(this, rma, step_id, dev_mgr_, in Create()
61 int64 step_id = random::New64(); in NewRandomStepId() local
63 step_id &= kStepIdMask; in NewRandomStepId()
64 return step_id; in NewRandomStepId()
155 void RpcCollectiveExecutorMgr::RetireStepId(int64 graph_key, int64 step_id) { in RetireStepId() argument
159 if (step_id == it->second->next_step_id_) { in RetireStepId()
Dbase_rendezvous_mgr.h72 RemoteRendezvous* Find(int64 step_id) override;
78 void RecvLocalAsync(int64 step_id, const Rendezvous::ParsedKey& parsed,
82 Status RecvLocal(int64 step_id, const Rendezvous::ParsedKey& parsed,
89 void Cleanup(int64 step_id) override;
95 virtual BaseRemoteRendezvous* Create(int64 step_id,
108 BaseRemoteRendezvous* FindOrCreate(int64 step_id);
121 BaseRemoteRendezvous(const WorkerEnv* env, int64 step_id);
Dgraph_mgr.cc339 Status GraphMgr::SendInputs(const int64 step_id, const NamedTensors& in) { in SendInputs() argument
340 Rendezvous* rendezvous = worker_env_->rendezvous_mgr->Find(step_id); in SendInputs()
355 Status GraphMgr::RecvOutputs(const int64 step_id, NamedTensors* out) { in RecvOutputs() argument
356 Rendezvous* rendezvous = worker_env_->rendezvous_mgr->Find(step_id); in RecvOutputs()
362 s = errors::Internal("Failed to fetch outputs for step ", step_id, in RecvOutputs()
368 void GraphMgr::RecvOutputsAsync(const int64 step_id, NamedTensors* out, in RecvOutputsAsync() argument
370 Rendezvous* rendezvous = worker_env_->rendezvous_mgr->Find(step_id); in RecvOutputsAsync()
391 void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id, in ExecuteAsync() argument
426 RemoteRendezvous* rendezvous = worker_env_->rendezvous_mgr->Find(step_id); in ExecuteAsync()
431 worker_env_->collective_executor_mgr->FindOrCreate(step_id), in ExecuteAsync()
[all …]
Drendezvous_mgr_interface.h74 virtual RemoteRendezvous* Find(int64 step_id) = 0;
80 virtual void RecvLocalAsync(int64 step_id,
85 virtual Status RecvLocal(int64 step_id, const Rendezvous::ParsedKey& parsed,
92 virtual void Cleanup(int64 step_id) = 0;
Dmaster_session.cc155 void RetrieveLogs(int64 step_id, StepStats* ss) { in RetrieveLogs() argument
157 worker_cache_->RetrieveLogs(step_id, ss); in RetrieveLogs()
161 req.add_fetch_step_id(step_id); in RetrieveLogs()
170 [step_id, ss, resp, &scoped_mu, &all_done](const Status& s) { in RetrieveLogs()
175 if (step_id != lss.step_id()) { in RetrieveLogs()
200 Status RunPartitions(const MasterEnv* env, int64 step_id,
205 Status RunPartitions(const MasterEnv* env, int64 step_id,
212 void CleanupPartitionsAsync(int64 step_id, StatusCallback done);
215 void ProcessStats(int64 step_id, PerStepState* pss, ProfileHandler* ph,
319 const FetchListType& fetches, const MasterEnv* env, int64 step_id,
[all …]
Dworker_cache_logger.h48 bool RetrieveLogs(int64 step_id, StepStats* ss);
59 void RecordRecvTensor(int64 step_id, int64 start_usecs, int64 end_usecs,
65 void RecordDataTransfer(int64 step_id, int64 start_usecs, int64 end_usecs,
84 void Save(const string& device, int64 step_id, NodeExecStats* ns);
Dgraph_mgr.h89 void ExecuteAsync(const string& handle, const int64 step_id,
96 Status SendInputs(const int64 step_id, const NamedTensors& in);
97 Status RecvOutputs(const int64 step_id, NamedTensors* out);
98 void RecvOutputsAsync(const int64 step_id, NamedTensors* out,
165 void StartParallelExecutors(const string& handle, int64 step_id, Item* item,
/external/tensorflow/tensorflow/core/distributed_runtime/rpc/
Drpc_rendezvous_mgr_test.cc87 const int64 step_id = 123; in TEST_F() local
92 RemoteRendezvous* rendez = rmgr_.Find(step_id); in TEST_F()
101 TF_ASSERT_OK(rmgr_.RecvLocal(step_id, key, &val, &val_dead)); in TEST_F()
104 rmgr_.Cleanup(step_id); in TEST_F()
112 const int64 step_id = 123; in TEST_F() local
113 RemoteRendezvous* rendez = rmgr_.Find(step_id); in TEST_F()
126 const int64 step_id = 321; in TEST_F() local
127 RemoteRendezvous* rendez = rmgr_.Find(step_id); in TEST_F()
129 SchedClosure([this, step_id]() { in TEST_F()
131 rmgr_.Cleanup(step_id); in TEST_F()
[all …]
/external/tensorflow/tensorflow/core/framework/
Dlog_memory.cc41 void LogMemory::RecordStep(const int64 step_id, const string& handle) { in RecordStep() argument
43 step.set_step_id(step_id); in RecordStep()
49 const int64 step_id, in RecordTensorAllocation() argument
52 allocation.set_step_id(step_id); in RecordTensorAllocation()
67 const int64 step_id, const int index, in RecordTensorOutput() argument
70 output.set_step_id(step_id); in RecordTensorOutput()
78 const int64 step_id, size_t num_bytes, in RecordRawAllocation() argument
81 allocation.set_step_id(step_id); in RecordRawAllocation()
91 const int64 step_id, void* ptr, in RecordRawDeallocation() argument
94 deallocation.set_step_id(step_id); in RecordRawDeallocation()
Dlog_memory.h62 static void RecordStep(int64 step_id, const string& handle);
70 static void RecordTensorAllocation(const string& kernel_name, int64 step_id,
83 static void RecordTensorOutput(const string& kernel_name, int64 step_id,
95 static void RecordRawAllocation(const string& operation, int64 step_id,
104 static void RecordRawDeallocation(const string& operation, int64 step_id,
/external/tensorflow/tensorflow/core/common_runtime/
Dcollective_executor_mgr.cc42 CollectiveExecutor* CollectiveExecutorMgr::FindOrCreate(int64 step_id) { in FindOrCreate() argument
46 auto it = executor_table_.find(step_id); in FindOrCreate()
50 ce = Create(step_id); in FindOrCreate()
51 executor_table_[step_id] = ce; in FindOrCreate()
58 CollectiveExecutor* CollectiveExecutorMgr::Create(int64 step_id) { in Create() argument
60 new CollectiveRemoteAccessLocal(dev_mgr_, dev_resolver_.get(), step_id); in Create()
61 return new BaseCollectiveExecutor(this, rma, step_id, dev_mgr_, in Create()
65 void CollectiveExecutorMgr::Cleanup(int64 step_id) { in Cleanup() argument
69 auto it = executor_table_.find(step_id); in Cleanup()
Dscoped_allocator_mgr.cc130 void ScopedAllocatorMgr::Cleanup(int64 step_id) { in Cleanup() argument
132 auto it = per_step_map_.find(step_id); in Cleanup()
139 ScopedAllocatorContainer* ScopedAllocatorMgr::GetContainer(int64 step_id) { in GetContainer() argument
140 VLOG(2) << "GetContainer " << step_id << " on " << device_name(); in GetContainer()
143 auto it = per_step_map_.find(step_id); in GetContainer()
145 sac = new ScopedAllocatorContainer(this, step_id); in GetContainer()
146 per_step_map_[step_id] = sac; in GetContainer()
154 const Tensor& backing_tensor, int64 step_id, int32 scope_id, in AddScopedAllocator() argument
158 ScopedAllocatorContainer* sac = GetContainer(step_id); in AddScopedAllocator()
Dtest_collective_executor_mgr.h62 CollectiveExecutor* FindOrCreate(int64 step_id) override { in FindOrCreate() argument
65 auto iter = table_.find(step_id); in FindOrCreate()
70 table_[step_id] = ce; in FindOrCreate()
76 void Cleanup(int64 step_id) override { in Cleanup() argument
78 auto iter = table_.find(step_id); in Cleanup()
110 void RetireStepId(int64 graph_key, int64 step_id) override {} in RetireStepId() argument
Dscoped_allocator_mgr.h47 ScopedAllocatorContainer(const ScopedAllocatorMgr* mgr, int64 step_id) in ScopedAllocatorContainer() argument
48 : mgr_(mgr), step_id_(step_id) {} in ScopedAllocatorContainer()
79 ScopedAllocatorContainer* GetContainer(int64 step_id);
83 const Tensor& backing_tensor, int64 step_id, int32 scope_id,
88 void Cleanup(int64 step_id);
Dcollective_executor_mgr.h33 CollectiveExecutor* FindOrCreate(int64 step_id) override;
35 void Cleanup(int64 step_id) override;
56 void RetireStepId(int64 graph_key, int64 step_id) override {} in RetireStepId() argument
60 virtual CollectiveExecutor* Create(int64 step_id);
/external/tensorflow/tensorflow/contrib/mpi/
Dmpi_rendezvous_mgr.h72 void Init(const Rendezvous::ParsedKey& parsed, const int64 step_id, in Init() argument
75 mRes_.set_step_id(step_id); in Init()
106 void Init(const Rendezvous::ParsedKey& parsed, const int64 step_id) { in Init() argument
107 req_.set_step_id(step_id); in Init()
118 MPIRemoteRendezvous(const WorkerEnv* env, int64 step_id, const MPIUtils* util, in MPIRemoteRendezvous() argument
120 : BaseRemoteRendezvous(env, step_id), in MPIRemoteRendezvous()
148 void QueueRequest(std::string key, int64 step_id, in QueueRequest() argument
153 const std::string key_id = strings::StrCat(key, "_", step_id); in QueueRequest()
158 BaseRemoteRendezvous* Create(int64 step_id,
194 void GetRecvCall(const int64 step_id, const std::string& key, in GetRecvCall() argument
[all …]
Dmpi_rendezvous_mgr.cc57 BaseRemoteRendezvous* MPIRendezvousMgr::Create(int64 step_id, in Create() argument
59 return new MPIRemoteRendezvous(worker_env, step_id, mpiutils_, this); in Create()
155 const int64 step_id = request.step_id(); in AddRequest() local
199 [this, parsed, step_id, send_cb]( in AddRequest()
205 << " step: " << step_id in AddRequest()
211 << " @ step: " << step_id << std::endl; in AddRequest()
214 mpi_send_call->Init(parsed, step_id, is_dead); in AddRequest()
271 worker_env_2->compute_pool->Schedule([this, step_id, parsed, done_cb]() { in AddRequest()
272 this->RecvLocalAsync(step_id, parsed, done_cb); in AddRequest()
291 const int64 step_id = mRes.step_id(); in MPIBackgroundThread() local
[all …]
/external/tensorflow/tensorflow/contrib/gdr/
Dgdr_collective_executor_mgr.cc36 RecvBufCall(int64 step_id, const string& peer_device, const string& peer_task, in RecvBufCall() argument
44 req_.set_step_id(step_id); in RecvBufCall()
70 int64 step_id, in CollectiveRemoteAccessDistributed() argument
72 : CollectiveRemoteAccessLocal(dev_mgr, dev_resolver, step_id), in CollectiveRemoteAccessDistributed()
150 CollectiveExecutor* GdrCollectiveExecutorMgr::Create(int64 step_id) { in Create() argument
153 worker_cache_, step_id, in Create()
155 return new BaseCollectiveExecutor(this, rma, step_id, dev_mgr_, in Create()
Dgdr_worker.cc58 const int64 step_id = request->step_id(); in GrpcRecvTensorAsync() local
60 TRACEPRINTF("RecvTensor: %lld %s", step_id, key.c_str()); in GrpcRecvTensorAsync()
76 opts->SetCancelCallback([this, step_id]() { AbortStep(step_id); }); in GrpcRecvTensorAsync()
79 step_id, parsed, in GrpcRecvTensorAsync()
160 env_->collective_executor_mgr->FindOrCreate(request->step_id()), true); in RecvBufAsync()
/external/tensorflow/tensorflow/contrib/verbs/
Dverbs_util.cc28 string VerbsUtil::AppendStepidToKey(const string& key, int64 step_id) { in AppendStepidToKey() argument
29 return strings::StrCat(key, ";", step_id); in AppendStepidToKey()
34 int64& step_id) { in GetKeyAndStepId() argument
45 strings::safe_strto64(parts[5], &step_id); in GetKeyAndStepId()

12345